diff options
author | kmacy <kmacy@FreeBSD.org> | 2009-06-19 23:34:32 +0000 |
---|---|---|
committer | kmacy <kmacy@FreeBSD.org> | 2009-06-19 23:34:32 +0000 |
commit | 473a60e6d29146ca1471b85f279cacd49a31ff2c (patch) | |
tree | 7f1ac587e63f32f5ee1471191841af4f7a3b6f21 | |
parent | 6154623e0c7a2a355870e4a5ffacd4ec8e4ce8f9 (diff) | |
download | FreeBSD-src-473a60e6d29146ca1471b85f279cacd49a31ff2c.zip FreeBSD-src-473a60e6d29146ca1471b85f279cacd49a31ff2c.tar.gz |
Greatly simplify cxgb by removing almost all of the custom mbuf management logic
- remove mbuf iovec - useful, but adds too much complexity when isolated to
the driver
- remove driver private caching - insufficient benefit over UMA to justify
the added complexity and maintenance overhead
- remove separate logic for managing multiple transmit queues, with the
new drbr routines the control flow can be made to much more closely resemble
legacy drivers
- remove dedicated service threads, with per-cpu callouts one can get the same
benefit much more simply by registering a callout 1 tick in the future if there
are still buffered packets
- remove embedded mbuf usage - Jeffr's changes will (I hope) soon be integrated
greatly reducing the overhead of using kernel APIs for reference counting
clusters
- add hysteresis to descriptor coalescing logic
- add coalesce threshold sysctls to allow users to decide at run-time
between optimizing for forwarding / UDP or optimizing for TCP
- add once per second watchdog to effectively close the very rare races
occurring from coalescing
- incorporate Navdeep's changes to the initialization path required to
convert port and adapter locks back to ordinary mutexes (silencing BPF
LOR complaints)
- enable prefetches in get_packet and tx cleaning
Reviewed by: navdeep@
MFC after: 2 weeks
-rw-r--r-- | sys/conf/files | 4 | ||||
-rw-r--r-- | sys/dev/cxgb/common/cxgb_t3_cpl.h | 21 | ||||
-rw-r--r-- | sys/dev/cxgb/cxgb_adapter.h | 119 | ||||
-rw-r--r-- | sys/dev/cxgb/cxgb_main.c | 681 | ||||
-rw-r--r-- | sys/dev/cxgb/cxgb_multiq.c | 594 | ||||
-rw-r--r-- | sys/dev/cxgb/cxgb_osdep.h | 21 | ||||
-rw-r--r-- | sys/dev/cxgb/cxgb_sge.c | 1217 | ||||
-rw-r--r-- | sys/dev/cxgb/sys/cxgb_support.c | 305 | ||||
-rw-r--r-- | sys/dev/cxgb/sys/mvec.h | 308 | ||||
-rw-r--r-- | sys/dev/cxgb/sys/uipc_mvec.c | 333 | ||||
-rw-r--r-- | sys/modules/cxgb/cxgb/Makefile | 2 |
11 files changed, 1136 insertions, 2469 deletions
diff --git a/sys/conf/files b/sys/conf/files index b074f86..19f1e2e 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -756,8 +756,6 @@ dev/cxgb/cxgb_offload.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/cxgb_sge.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" -dev/cxgb/cxgb_multiq.c optional cxgb pci \ - compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_mc5.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_vsc7323.c optional cxgb pci \ @@ -776,8 +774,6 @@ dev/cxgb/common/cxgb_tn1010.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/sys/uipc_mvec.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" -dev/cxgb/sys/cxgb_support.c optional cxgb pci \ - compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/cxgb_t3fw.c optional cxgb cxgb_t3fw \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cy/cy.c optional cy diff --git a/sys/dev/cxgb/common/cxgb_t3_cpl.h b/sys/dev/cxgb/common/cxgb_t3_cpl.h index b4b512f..b1dd564 100644 --- a/sys/dev/cxgb/common/cxgb_t3_cpl.h +++ b/sys/dev/cxgb/common/cxgb_t3_cpl.h @@ -237,10 +237,21 @@ struct rss_header { #ifndef CHELSIO_FW struct work_request_hdr { - __be32 wr_hi; - __be32 wr_lo; + union { + struct { + __be32 wr_hi; + __be32 wr_lo; + } ilp32; + struct { + __be64 wr_hilo; + } lp64; + } u; }; +#define wrh_hi u.ilp32.wr_hi +#define wrh_lo u.ilp32.wr_lo +#define wrh_hilo u.lp64.wr_hilo + /* wr_hi fields */ #define S_WR_SGE_CREDITS 0 #define M_WR_SGE_CREDITS 0xFF @@ -817,8 +828,7 @@ struct cpl_peer_close { }; struct tx_data_wr { - __be32 wr_hi; - __be32 wr_lo; + WR_HDR; __be32 len; __be32 flags; __be32 sndseq; @@ -936,8 +946,7 @@ struct cpl_rdma_ec_status { }; struct mngt_pktsched_wr { - __be32 wr_hi; - __be32 wr_lo; + WR_HDR; __u8 mngt_opcode; __u8 rsvd[7]; __u8 sched; diff --git a/sys/dev/cxgb/cxgb_adapter.h b/sys/dev/cxgb/cxgb_adapter.h index 930c973..1e1e47a 100644 --- a/sys/dev/cxgb/cxgb_adapter.h +++ b/sys/dev/cxgb/cxgb_adapter.h @@ -35,7 +35,6 @@ $FreeBSD$ #include <sys/lock.h> #include <sys/mutex.h> -#include <sys/sx.h> #include <sys/rman.h> #include <sys/mbuf.h> #include <sys/socket.h> @@ -63,8 +62,6 @@ $FreeBSD$ #include <netinet/tcp_lro.h> #endif -#define USE_SX - struct adapter; struct sge_qset; extern int cxgb_debug; @@ -82,22 +79,9 @@ extern int cxgb_debug; mtx_destroy((lock)); \ } while (0) -#define SX_INIT(lock, lockname) \ - do { \ - printf("initializing %s at %s:%d\n", lockname, __FILE__, __LINE__); \ - sx_init((lock), lockname); \ - } while (0) - -#define SX_DESTROY(lock) \ - do { \ - printf("destroying %s at %s:%d\n", (lock)->lock_object.lo_name, __FILE__, __LINE__); \ - sx_destroy((lock)); \ - } while (0) #else #define MTX_INIT mtx_init #define MTX_DESTROY mtx_destroy -#define SX_INIT sx_init -#define SX_DESTROY sx_destroy #endif enum { @@ -110,20 +94,17 @@ struct port_info { struct adapter *adapter; struct ifnet *ifp; int if_flags; + int flags; const struct port_type_info *port_type; struct cphy phy; struct cmac mac; struct link_config link_config; struct ifmedia media; -#ifdef USE_SX - struct sx lock; -#else struct mtx lock; -#endif - uint8_t port_id; - uint8_t tx_chan; - uint8_t txpkt_intf; - uint8_t first_qset; + uint32_t port_id; + uint32_t tx_chan; + uint32_t txpkt_intf; + uint32_t first_qset; uint32_t nqsets; int link_fault; @@ -135,19 +116,30 @@ struct port_info { #define PORT_NAME_LEN 32 char lockbuf[PORT_LOCK_NAME_LEN]; char namebuf[PORT_NAME_LEN]; -}; +} __aligned(L1_CACHE_BYTES); -enum { /* adapter flags */ +enum { + /* adapter flags */ FULL_INIT_DONE = (1 << 0), USING_MSI = (1 << 1), USING_MSIX = (1 << 2), QUEUES_BOUND = (1 << 3), - FW_UPTODATE = (1 << 4), - TPS_UPTODATE = (1 << 5), + FW_UPTODATE = (1 << 4), + TPS_UPTODATE = (1 << 5), CXGB_SHUTDOWN = (1 << 6), CXGB_OFLD_INIT = (1 << 7), - TP_PARITY_INIT = (1 << 8), + TP_PARITY_INIT = (1 << 8), + CXGB_BUSY = (1 << 9), + + /* port flags */ + DOOMED = (1 << 0), }; +#define IS_DOOMED(p) (p->flags & DOOMED) +#define SET_DOOMED(p) do {p->flags |= DOOMED;} while (0) +#define DOOMED(p) (p->flags & DOOMED) +#define IS_BUSY(sc) (sc->flags & CXGB_BUSY) +#define SET_BUSY(sc) do {sc->flags |= CXGB_BUSY;} while (0) +#define CLR_BUSY(sc) do {sc->flags &= ~CXGB_BUSY;} while (0) #define FL_Q_SIZE 4096 #define JUMBO_Q_SIZE 1024 @@ -205,10 +197,6 @@ struct sge_rspq { uint32_t rspq_dump_count; }; -#ifndef DISABLE_MBUF_IOVEC -#define rspq_mbuf rspq_mh.mh_head -#endif - struct rx_desc; struct rx_sw_desc; @@ -253,7 +241,6 @@ struct sge_txq { bus_addr_t phys_addr; struct task qresume_task; struct task qreclaim_task; - struct port_info *port; uint32_t cntxt_id; uint64_t stops; uint64_t restarts; @@ -261,26 +248,21 @@ struct sge_txq { bus_dmamap_t desc_map; bus_dma_tag_t entry_tag; struct mbuf_head sendq; - /* - * cleanq should really be an buf_ring to avoid extra - * mbuf touches - */ - struct mbuf_head cleanq; + struct buf_ring *txq_mr; struct ifaltq *txq_ifq; - struct mbuf *immpkt; - + struct callout txq_timer; + struct callout txq_watchdog; + uint64_t txq_coalesced; uint32_t txq_drops; uint32_t txq_skipped; - uint32_t txq_coalesced; uint32_t txq_enqueued; uint32_t txq_dump_start; uint32_t txq_dump_count; - unsigned long txq_frees; - struct mtx lock; + uint64_t txq_direct_packets; + uint64_t txq_direct_bytes; + uint64_t txq_frees; struct sg_ent txq_sgl[TX_MAX_SEGS / 2 + 1]; - #define TXQ_NAME_LEN 32 - char lockbuf[TXQ_NAME_LEN]; }; @@ -297,6 +279,8 @@ enum { #define QS_EXITING 0x1 #define QS_RUNNING 0x2 #define QS_BOUND 0x4 +#define QS_FLUSHING 0x8 +#define QS_TIMEOUT 0x10 struct sge_qset { struct sge_rspq rspq; @@ -309,10 +293,10 @@ struct sge_qset { uint64_t port_stats[SGE_PSTAT_MAX]; struct port_info *port; int idx; /* qset # */ - int qs_cpuid; int qs_flags; + int coalescing; struct cv qs_cv; - struct mtx qs_mtx; + struct mtx lock; #define QS_NAME_LEN 32 char namebuf[QS_NAME_LEN]; }; @@ -328,7 +312,7 @@ struct adapter { device_t dev; int flags; TAILQ_ENTRY(adapter) adapter_entry; - + /* PCI register resources */ int regs_rid; struct resource *regs_res; @@ -401,11 +385,7 @@ struct adapter { char port_types[MAX_NPORTS + 1]; uint32_t open_device_map; uint32_t registered_device_map; -#ifdef USE_SX - struct sx lock; -#else struct mtx lock; -#endif driver_intr_t *cxgb_intr; int msi_count; @@ -422,31 +402,17 @@ struct t3_rx_mode { struct port_info *port; }; - #define MDIO_LOCK(adapter) mtx_lock(&(adapter)->mdio_lock) #define MDIO_UNLOCK(adapter) mtx_unlock(&(adapter)->mdio_lock) #define ELMR_LOCK(adapter) mtx_lock(&(adapter)->elmer_lock) #define ELMR_UNLOCK(adapter) mtx_unlock(&(adapter)->elmer_lock) -#ifdef USE_SX -#define PORT_LOCK(port) sx_xlock(&(port)->lock); -#define PORT_UNLOCK(port) sx_xunlock(&(port)->lock); -#define PORT_LOCK_INIT(port, name) SX_INIT(&(port)->lock, name) -#define PORT_LOCK_DEINIT(port) SX_DESTROY(&(port)->lock) -#define PORT_LOCK_ASSERT_OWNED(port) sx_assert(&(port)->lock, SA_LOCKED) - -#define ADAPTER_LOCK(adap) sx_xlock(&(adap)->lock); -#define ADAPTER_UNLOCK(adap) sx_xunlock(&(adap)->lock); -#define ADAPTER_LOCK_INIT(adap, name) SX_INIT(&(adap)->lock, name) -#define ADAPTER_LOCK_DEINIT(adap) SX_DESTROY(&(adap)->lock) -#define ADAPTER_LOCK_ASSERT_NOTOWNED(adap) sx_assert(&(adap)->lock, SA_UNLOCKED) -#define ADAPTER_LOCK_ASSERT_OWNED(adap) sx_assert(&(adap)->lock, SA_LOCKED) -#else #define PORT_LOCK(port) mtx_lock(&(port)->lock); #define PORT_UNLOCK(port) mtx_unlock(&(port)->lock); #define PORT_LOCK_INIT(port, name) mtx_init(&(port)->lock, name, 0, MTX_DEF) #define PORT_LOCK_DEINIT(port) mtx_destroy(&(port)->lock) +#define PORT_LOCK_ASSERT_NOTOWNED(port) mtx_assert(&(port)->lock, MA_NOTOWNED) #define PORT_LOCK_ASSERT_OWNED(port) mtx_assert(&(port)->lock, MA_OWNED) #define ADAPTER_LOCK(adap) mtx_lock(&(adap)->lock); @@ -455,7 +421,6 @@ struct t3_rx_mode { #define ADAPTER_LOCK_DEINIT(adap) mtx_destroy(&(adap)->lock) #define ADAPTER_LOCK_ASSERT_NOTOWNED(adap) mtx_assert(&(adap)->lock, MA_NOTOWNED) #define ADAPTER_LOCK_ASSERT_OWNED(adap) mtx_assert(&(adap)->lock, MA_OWNED) -#endif static __inline uint32_t @@ -555,14 +520,11 @@ void t3_sge_stop(adapter_t *); void t3b_intr(void *data); void t3_intr_msi(void *data); void t3_intr_msix(void *data); -int t3_encap(struct sge_qset *, struct mbuf **, int); int t3_sge_init_adapter(adapter_t *); int t3_sge_reset_adapter(adapter_t *); int t3_sge_init_port(struct port_info *); -void t3_sge_deinit_sw(adapter_t *); -void t3_free_tx_desc(struct sge_txq *q, int n); -void t3_free_tx_desc_all(struct sge_txq *q); +void t3_free_tx_desc(struct sge_qset *qs, int n, int qid); void t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad); @@ -615,13 +577,8 @@ static inline int offload_running(adapter_t *adapter) return isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT); } -int cxgb_pcpu_enqueue_packet(struct ifnet *ifp, struct mbuf *m); -int cxgb_pcpu_transmit(struct ifnet *ifp, struct mbuf *m); -void cxgb_pcpu_shutdown_threads(struct adapter *sc); -void cxgb_pcpu_startup_threads(struct adapter *sc); - -int process_responses(adapter_t *adap, struct sge_qset *qs, int budget); -void t3_free_qset(adapter_t *sc, struct sge_qset *q); +void cxgb_tx_watchdog(void *arg); +int cxgb_transmit(struct ifnet *ifp, struct mbuf *m); +void cxgb_qflush(struct ifnet *ifp); void cxgb_start(struct ifnet *ifp); -void refill_fl_service(adapter_t *adap, struct sge_fl *fl); #endif diff --git a/sys/dev/cxgb/cxgb_main.c b/sys/dev/cxgb/cxgb_main.c index cd7ed7c..49582e7 100644 --- a/sys/dev/cxgb/cxgb_main.c +++ b/sys/dev/cxgb/cxgb_main.c @@ -84,10 +84,12 @@ __FBSDID("$FreeBSD$"); static int cxgb_setup_interrupts(adapter_t *); static void cxgb_teardown_interrupts(adapter_t *); +static int cxgb_begin_op(struct port_info *, const char *); +static int cxgb_begin_detach(struct port_info *); +static int cxgb_end_op(struct port_info *); static void cxgb_init(void *); -static void cxgb_init_locked(struct port_info *); -static void cxgb_stop_locked(struct port_info *); -static void cxgb_set_rxmode(struct port_info *); +static int cxgb_init_synchronized(struct port_info *); +static int cxgb_uninit_synchronized(struct port_info *); static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t); static int cxgb_media_change(struct ifnet *); static int cxgb_ifm_type(int); @@ -96,7 +98,6 @@ static int setup_sge_qsets(adapter_t *); static void cxgb_async_intr(void *); static void cxgb_ext_intr_handler(void *, int); static void cxgb_tick_handler(void *, int); -static void cxgb_down_locked(struct adapter *sc); static void cxgb_tick(void *); static void setup_rss(adapter_t *sc); @@ -114,7 +115,6 @@ static int cxgb_get_regs_len(void); static int offload_open(struct port_info *pi); static void touch_bars(device_t dev); static int offload_close(struct t3cdev *tdev); -static void cxgb_link_start(struct port_info *p); int t3_detect_link_fault(adapter_t *adapter, int port_id); static device_method_t cxgb_controller_methods[] = { @@ -722,29 +722,47 @@ cxgb_free(struct adapter *sc) sc->flags |= CXGB_SHUTDOWN; ADAPTER_UNLOCK(sc); - cxgb_pcpu_shutdown_threads(sc); - - ADAPTER_LOCK(sc); - cxgb_down_locked(sc); - ADAPTER_UNLOCK(sc); - - t3_sge_deinit_sw(sc); /* - * Wait for last callout + * Make sure all child devices are gone. */ - - DELAY(hz*100); - bus_generic_detach(sc->dev); - for (i = 0; i < (sc)->params.nports; i++) { if (sc->portdev[i] && device_delete_child(sc->dev, sc->portdev[i]) != 0) device_printf(sc->dev, "failed to delete child port\n"); } - cxgb_teardown_interrupts(sc); + /* + * At this point, it is as if cxgb_port_detach has run on all ports, and + * cxgb_down has run on the adapter. All interrupts have been silenced, + * all open devices have been closed. + */ + KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)", + __func__, sc->open_device_map)); + for (i = 0; i < sc->params.nports; i++) { + KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!", + __func__, i)); + } + /* + * Finish off the adapter's callouts. + */ + callout_drain(&sc->cxgb_tick_ch); + callout_drain(&sc->sge_timer_ch); + + /* + * Release resources grabbed under FULL_INIT_DONE by cxgb_up. The + * sysctls are cleaned up by the kernel linker. + */ + if (sc->flags & FULL_INIT_DONE) { + t3_free_sge_resources(sc); + sc->flags &= ~FULL_INIT_DONE; + } + + /* + * Release all interrupt resources. + */ + cxgb_teardown_interrupts(sc); #ifdef MSI_SUPPORTED if (sc->flags & (USING_MSI | USING_MSIX)) { device_printf(sc->dev, "releasing msi message(s)\n"); @@ -759,26 +777,26 @@ cxgb_free(struct adapter *sc) } #endif + /* + * Free the adapter's taskqueue. + */ if (sc->tq != NULL) { taskqueue_free(sc->tq); sc->tq = NULL; } if (is_offload(sc)) { + clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT); cxgb_adapter_unofld(sc); - if (isset(&sc->open_device_map, OFFLOAD_DEVMAP_BIT)) - offload_close(&sc->tdev); - else - printf("cxgb_free: DEVMAP_BIT not set\n"); - } else - printf("not offloading set\n"); + } + #ifdef notyet if (sc->flags & CXGB_OFLD_INIT) cxgb_offload_deactivate(sc); #endif free(sc->filters, M_DEVBUF); t3_sge_free(sc); - + cxgb_offload_exit(); if (sc->udbs_res != NULL) @@ -1052,10 +1070,9 @@ cxgb_port_attach(device_t dev) } ether_ifattach(ifp, p->hw_addr); + ifp->if_transmit = cxgb_transmit; + ifp->if_qflush = cxgb_qflush; -#ifdef IFNET_MULTIQUEUE - ifp->if_transmit = cxgb_pcpu_transmit; -#endif /* * Only default to jumbo frames on 10GigE */ @@ -1112,15 +1129,8 @@ cxgb_port_attach(device_t dev) ifmedia_set(&p->media, IFM_ETHER | IFM_AUTO); } - /* Get the latest mac address, User can use a LAA */ - bcopy(IF_LLADDR(p->ifp), p->hw_addr, ETHER_ADDR_LEN); t3_sge_init_port(p); -#if defined(LINK_ATTACH) - cxgb_link_start(p); - t3_link_changed(sc, p->port_id); -#endif - return (err); } @@ -1130,46 +1140,38 @@ cxgb_port_attach(device_t dev) * removing the device from the view of the kernel, i.e. from all * interfaces lists etc. This routine is only called when the driver is * being unloaded, not when the link goes down. - * */ static int cxgb_port_detach(device_t dev) { struct port_info *p; struct adapter *sc; + int i; p = device_get_softc(dev); sc = p->adapter; + cxgb_begin_detach(p); + if (p->port_cdev != NULL) destroy_dev(p->port_cdev); - + + cxgb_uninit_synchronized(p); ether_ifdetach(p->ifp); - PORT_LOCK(p); - if (p->ifp->if_drv_flags & IFF_DRV_RUNNING) - cxgb_stop_locked(p); - PORT_UNLOCK(p); - - callout_drain(&sc->cxgb_tick_ch); - callout_drain(&sc->sge_timer_ch); - - if (sc->tq != NULL) { - printf("draining slow intr\n"); - - taskqueue_drain(sc->tq, &sc->slow_intr_task); - printf("draining ext intr\n"); - taskqueue_drain(sc->tq, &sc->ext_intr_task); - printf("draining tick task\n"); - taskqueue_drain(sc->tq, &sc->tick_task); + for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) { + struct sge_qset *qs = &sc->sge.qs[i]; + struct sge_txq *txq = &qs->txq[TXQ_ETH]; + + callout_drain(&txq->txq_watchdog); + callout_drain(&txq->txq_timer); } - /* - * the lock may be acquired in ifdetach - */ PORT_LOCK_DEINIT(p); if_free(p->ifp); - + p->ifp = NULL; + + cxgb_end_op(p); return (0); } @@ -1276,12 +1278,16 @@ t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed, int duplex, int fc) { struct port_info *pi = &adapter->port[port_id]; + struct ifnet *ifp = pi->ifp; + + /* no race with detach, so ifp should always be good */ + KASSERT(ifp, ("%s: if detached.", __func__)); if (link_status) { - pi->ifp->if_baudrate = IF_Mbps(speed); - if_link_state_change(pi->ifp, LINK_STATE_UP); + ifp->if_baudrate = IF_Mbps(speed); + if_link_state_change(ifp, LINK_STATE_UP); } else - if_link_state_change(pi->ifp, LINK_STATE_DOWN); + if_link_state_change(ifp, LINK_STATE_DOWN); } /** @@ -1325,13 +1331,13 @@ t3_os_ext_intr_handler(adapter_t *sc) * interrupts in the meantime and let the task reenable them when * it's done. */ - ADAPTER_LOCK(sc); if (sc->slow_intr_mask) { + ADAPTER_LOCK(sc); sc->slow_intr_mask &= ~F_T3DBG; t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask); taskqueue_enqueue(sc->tq, &sc->ext_intr_task); + ADAPTER_UNLOCK(sc); } - ADAPTER_UNLOCK(sc); } void @@ -1348,21 +1354,19 @@ t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[]) bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN); } -/** - * link_start - enable a port - * @p: the port to enable - * - * Performs the MAC and PHY actions needed to enable a port. +/* + * Programs the XGMAC based on the settings in the ifnet. These settings + * include MTU, MAC address, mcast addresses, etc. */ static void -cxgb_link_start(struct port_info *p) +cxgb_update_mac_settings(struct port_info *p) { - struct ifnet *ifp; + struct ifnet *ifp = p->ifp; struct t3_rx_mode rm; struct cmac *mac = &p->mac; int mtu, hwtagging; - ifp = p->ifp; + PORT_LOCK_ASSERT_OWNED(p); bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN); @@ -1372,15 +1376,11 @@ cxgb_link_start(struct port_info *p) hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0; - t3_init_rx_mode(&rm, p); - if (!mac->multiport) - t3_mac_reset(mac); t3_mac_set_mtu(mac, mtu); t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging); t3_mac_set_address(mac, 0, p->hw_addr); + t3_init_rx_mode(&rm, p); t3_mac_set_rx_mode(mac, &rm); - t3_link_start(&p->phy, mac, &p->link_config); - t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX); } @@ -1415,7 +1415,7 @@ init_tp_parity(struct adapter *adap) req = mtod(m, struct cpl_smt_write_req *); m->m_len = m->m_pkthdr.len = sizeof(*req); memset(req, 0, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i)); req->iff = i; t3_mgmt_tx(adap, m); @@ -1428,7 +1428,7 @@ init_tp_parity(struct adapter *adap) req = mtod(m, struct cpl_l2t_write_req *); m->m_len = m->m_pkthdr.len = sizeof(*req); memset(req, 0, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i)); req->params = htonl(V_L2T_W_IDX(i)); t3_mgmt_tx(adap, m); @@ -1441,7 +1441,7 @@ init_tp_parity(struct adapter *adap) req = mtod(m, struct cpl_rte_write_req *); m->m_len = m->m_pkthdr.len = sizeof(*req); memset(req, 0, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i)); req->l2t_idx = htonl(V_L2T_W_IDX(i)); t3_mgmt_tx(adap, m); @@ -1451,7 +1451,7 @@ init_tp_parity(struct adapter *adap) greq = mtod(m, struct cpl_set_tcb_field *); m->m_len = m->m_pkthdr.len = sizeof(*greq); memset(greq, 0, sizeof(*greq)); - greq->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0)); greq->mask = htobe64(1); t3_mgmt_tx(adap, m); @@ -1532,7 +1532,7 @@ write_smt_entry(struct adapter *adapter, int idx) req = mtod(m, struct cpl_smt_write_req *); m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx)); req->mtu_idx = NMTUS - 1; /* should be 0 but there's a T3 bug */ req->iff = idx; @@ -1559,10 +1559,8 @@ init_smt(struct adapter *adapter) static void init_port_mtus(adapter_t *adapter) { - unsigned int mtus = adapter->port[0].ifp->if_mtu; + unsigned int mtus = ETHERMTU | (ETHERMTU << 16); - if (adapter->port[1].ifp) - mtus |= adapter->port[1].ifp->if_mtu << 16; t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus); } @@ -1576,7 +1574,7 @@ send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo, m = m_gethdr(M_DONTWAIT, MT_DATA); if (m) { req = mtod(m, struct mngt_pktsched_wr *); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT)); + req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT)); req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET; req->sched = sched; req->idx = qidx; @@ -1593,7 +1591,6 @@ bind_qsets(adapter_t *sc) { int i, j; - cxgb_pcpu_startup_threads(sc); for (i = 0; i < (sc)->params.nports; ++i) { const struct port_info *pi = adap2pinfo(sc, i); @@ -1717,14 +1714,20 @@ cxgb_up(struct adapter *sc) { int err = 0; + ADAPTER_LOCK_ASSERT_NOTOWNED(sc); + KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)", + __func__, sc->open_device_map)); + if ((sc->flags & FULL_INIT_DONE) == 0) { if ((sc->flags & FW_UPTODATE) == 0) if ((err = upgrade_fw(sc))) goto out; + if ((sc->flags & TPS_UPTODATE) == 0) if ((err = update_tpsram(sc))) goto out; + err = t3_init_hw(sc, 0); if (err) goto out; @@ -1756,78 +1759,53 @@ cxgb_up(struct adapter *sc) sc->flags |= TP_PARITY_INIT; if (sc->flags & TP_PARITY_INIT) { - t3_write_reg(sc, A_TP_INT_CAUSE, - F_CMCACHEPERR | F_ARPLUTPERR); + t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR); t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff); } - if (!(sc->flags & QUEUES_BOUND)) { bind_qsets(sc); sc->flags |= QUEUES_BOUND; } + + t3_sge_reset_adapter(sc); out: return (err); } - /* - * Bring down the interface but do not free any resources. + * Called when the last open device is closed. Does NOT undo all of cxgb_up's + * work. Specifically, the resources grabbed under FULL_INIT_DONE are released + * during controller_detach, not here. */ static void -cxgb_down_locked(struct adapter *sc) +cxgb_down(struct adapter *sc) { - + ADAPTER_LOCK_ASSERT_NOTOWNED(sc); + t3_sge_stop(sc); t3_intr_disable(sc); - - callout_stop(&sc->cxgb_tick_ch); - callout_stop(&sc->sge_timer_ch); } static int offload_open(struct port_info *pi) { - struct adapter *adapter = pi->adapter; - struct t3cdev *tdev = &adapter->tdev; + struct adapter *sc = pi->adapter; + struct t3cdev *tdev = &sc->tdev; - int adap_up = adapter->open_device_map & PORT_MASK; - int err = 0; + ADAPTER_LOCK_ASSERT_NOTOWNED(sc); - if (atomic_cmpset_int(&adapter->open_device_map, - (adapter->open_device_map & ~(1<<OFFLOAD_DEVMAP_BIT)), - (adapter->open_device_map | (1<<OFFLOAD_DEVMAP_BIT))) == 0) - return (0); + setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT); - if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT)) - printf("offload_open: DEVMAP_BIT did not get set 0x%x\n", - adapter->open_device_map); - ADAPTER_LOCK(pi->adapter); - if (!adap_up) - err = cxgb_up(adapter); - ADAPTER_UNLOCK(pi->adapter); - if (err) - return (err); - - t3_tp_set_offload_mode(adapter, 1); + t3_tp_set_offload_mode(sc, 1); tdev->lldev = pi->ifp; - - init_port_mtus(adapter); - t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd, - adapter->params.b_wnd, - adapter->params.rev == 0 ? - adapter->port[0].ifp->if_mtu : 0xffff); - init_smt(adapter); - /* Call back all registered clients */ + init_port_mtus(sc); + t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd, + sc->params.rev == 0 ? sc->port[0].ifp->if_mtu : 0xffff); + init_smt(sc); cxgb_add_clients(tdev); - /* restore them in case the offload module has changed them */ - if (err) { - t3_tp_set_offload_mode(adapter, 0); - clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT); - cxgb_set_dummy_ops(tdev); - } - return (err); + return (0); } static int @@ -1844,147 +1822,220 @@ offload_close(struct t3cdev *tdev) tdev->lldev = NULL; cxgb_set_dummy_ops(tdev); t3_tp_set_offload_mode(adapter, 0); + clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT); - ADAPTER_LOCK(adapter); - if (!adapter->open_device_map) - cxgb_down_locked(adapter); + return (0); +} + +/* + * Begin a synchronized operation. If this call succeeds, it is guaranteed that + * no one will remove the port or its ifp from underneath the caller. Caller is + * also granted exclusive access to open_device_map. + * + * operation here means init, uninit, detach, and ioctl service. + * + * May fail. + * EINTR (ctrl-c pressed during ifconfig for example). + * ENXIO (port is about to detach - due to kldunload for example). + */ +int +cxgb_begin_op(struct port_info *p, const char *wmsg) +{ + int rc = 0; + struct adapter *sc = p->adapter; + + ADAPTER_LOCK(sc); + + while (!IS_DOOMED(p) && IS_BUSY(sc)) { + if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, wmsg, 0)) { + rc = EINTR; + goto done; + } + } + + if (IS_DOOMED(p)) + rc = ENXIO; + else if (!IS_BUSY(sc)) + SET_BUSY(sc); + else { + KASSERT(0, ("%s: port %d, p->flags = %x , sc->flags = %x", + __func__, p->port_id, p->flags, sc->flags)); + rc = EDOOFUS; + } + +done: + ADAPTER_UNLOCK(sc); + return (rc); +} - ADAPTER_UNLOCK(adapter); +/* + * End a synchronized operation. Read comment block above cxgb_begin_op. + */ +int +cxgb_end_op(struct port_info *p) +{ + struct adapter *sc = p->adapter; + + ADAPTER_LOCK(sc); + KASSERT(IS_BUSY(sc), ("%s: not busy.", __func__)); + CLR_BUSY(sc); + wakeup_one(&sc->flags); + ADAPTER_UNLOCK(sc); return (0); } +/* + * Prepare for port detachment. Detach is a special kind of synchronized + * operation. Also read comment before cxgb_begin_op. + */ +static int +cxgb_begin_detach(struct port_info *p) +{ + struct adapter *sc = p->adapter; + /* + * Inform those waiting for this port that it is going to be destroyed + * and they should not continue further. (They'll return with ENXIO). + */ + ADAPTER_LOCK(sc); + SET_DOOMED(p); + wakeup(&sc->flags); + ADAPTER_UNLOCK(sc); + + /* + * Wait for in-progress operations. + */ + ADAPTER_LOCK(sc); + while (IS_BUSY(sc)) { + mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0); + } + SET_BUSY(sc); + ADAPTER_UNLOCK(sc); + + return (0); +} + +/* + * if_init for cxgb ports. + */ static void cxgb_init(void *arg) { struct port_info *p = arg; - PORT_LOCK(p); - cxgb_init_locked(p); - PORT_UNLOCK(p); + if (cxgb_begin_op(p, "cxgbinit")) + return; + + cxgb_init_synchronized(p); + cxgb_end_op(p); } -static void -cxgb_init_locked(struct port_info *p) +static int +cxgb_init_synchronized(struct port_info *p) { - struct ifnet *ifp; - adapter_t *sc = p->adapter; - int err; - - PORT_LOCK_ASSERT_OWNED(p); - ifp = p->ifp; + struct adapter *sc = p->adapter; + struct ifnet *ifp = p->ifp; + struct cmac *mac = &p->mac; + int i, rc; - ADAPTER_LOCK(p->adapter); - if ((sc->open_device_map == 0) && (err = cxgb_up(sc))) { - ADAPTER_UNLOCK(p->adapter); - cxgb_stop_locked(p); - return; - } - if (p->adapter->open_device_map == 0) { - t3_intr_clear(sc); - } - setbit(&p->adapter->open_device_map, p->port_id); - ADAPTER_UNLOCK(p->adapter); + if (sc->open_device_map == 0) { + if ((rc = cxgb_up(sc)) != 0) + return (rc); - if (is_offload(sc) && !ofld_disable) { - err = offload_open(p); - if (err) + if (is_offload(sc) && !ofld_disable && offload_open(p)) log(LOG_WARNING, "Could not initialize offload capabilities\n"); } - device_printf(sc->dev, "enabling interrupts on port=%d\n", p->port_id); + PORT_LOCK(p); t3_port_intr_enable(sc, p->port_id); + if (!mac->multiport) + t3_mac_reset(mac); + cxgb_update_mac_settings(p); + t3_link_start(&p->phy, mac, &p->link_config); + t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX); + ifp->if_drv_flags |= IFF_DRV_RUNNING; + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + PORT_UNLOCK(p); -#if !defined(LINK_ATTACH) - cxgb_link_start(p); t3_link_changed(sc, p->port_id); -#endif - ifp->if_baudrate = IF_Mbps(p->link_config.speed); - callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc); - t3_sge_reset_adapter(sc); + for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) { + struct sge_qset *qs = &sc->sge.qs[i]; + struct sge_txq *txq = &qs->txq[TXQ_ETH]; - ifp->if_drv_flags |= IFF_DRV_RUNNING; - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; -} + callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs, + txq->txq_watchdog.c_cpu); + } -static void -cxgb_set_rxmode(struct port_info *p) -{ - struct t3_rx_mode rm; - struct cmac *mac = &p->mac; + /* all ok */ + setbit(&sc->open_device_map, p->port_id); - t3_init_rx_mode(&rm, p); - mtx_lock(&p->adapter->mdio_lock); - t3_mac_set_rx_mode(mac, &rm); - mtx_unlock(&p->adapter->mdio_lock); + return (0); } -static void -cxgb_stop_locked(struct port_info *pi) +/* + * Called on "ifconfig down", and from port_detach + */ +static int +cxgb_uninit_synchronized(struct port_info *pi) { - struct ifnet *ifp; + struct adapter *sc = pi->adapter; + struct ifnet *ifp = pi->ifp; - PORT_LOCK_ASSERT_OWNED(pi); - ADAPTER_LOCK_ASSERT_NOTOWNED(pi->adapter); - - ifp = pi->ifp; - t3_port_intr_disable(pi->adapter, pi->port_id); + /* + * Clear this port's bit from the open device map, and then drain all + * the tasks that can access/manipulate this port's port_info or ifp. + * We disable this port's interrupts here and so the the slow/ext + * interrupt tasks won't be enqueued. The tick task will continue to + * be enqueued every second but the runs after this drain will not see + * this port in the open device map. + * + * A well behaved task must take open_device_map into account and ignore + * ports that are not open. + */ + clrbit(&sc->open_device_map, pi->port_id); + t3_port_intr_disable(sc, pi->port_id); + taskqueue_drain(sc->tq, &sc->slow_intr_task); + taskqueue_drain(sc->tq, &sc->ext_intr_task); + taskqueue_drain(sc->tq, &sc->tick_task); + + PORT_LOCK(pi); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); /* disable pause frames */ - t3_set_reg_field(pi->adapter, A_XGM_TX_CFG + pi->mac.offset, - F_TXPAUSEEN, 0); + t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0); /* Reset RX FIFO HWM */ - t3_set_reg_field(pi->adapter, A_XGM_RXFIFO_CFG + pi->mac.offset, + t3_set_reg_field(sc, A_XGM_RXFIFO_CFG + pi->mac.offset, V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0); - - ADAPTER_LOCK(pi->adapter); - clrbit(&pi->adapter->open_device_map, pi->port_id); - - if (pi->adapter->open_device_map == 0) - cxgb_down_locked(pi->adapter); - - ADAPTER_UNLOCK(pi->adapter); - -#if !defined(LINK_ATTACH) DELAY(100); /* Wait for TXFIFO empty */ - t3_wait_op_done(pi->adapter, A_XGM_TXFIFO_CFG + pi->mac.offset, + t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset, F_TXFIFO_EMPTY, 1, 20, 5); DELAY(100); t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX); + pi->phy.ops->power_down(&pi->phy, 1); -#endif -} + PORT_UNLOCK(pi); -static int -cxgb_set_mtu(struct port_info *p, int mtu) -{ - struct ifnet *ifp = p->ifp; - int error = 0; - - if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) - error = EINVAL; - else if (ifp->if_mtu != mtu) { - PORT_LOCK(p); - ifp->if_mtu = mtu; - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - cxgb_stop_locked(p); - cxgb_init_locked(p); - } - PORT_UNLOCK(p); - } - return (error); + pi->link_config.link_ok = 0; + t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0); + + if ((sc->open_device_map & PORT_MASK) == 0) + offload_close(&sc->tdev); + + if (sc->open_device_map == 0) + cxgb_down(pi->adapter); + + return (0); } #ifdef LRO_SUPPORTED @@ -2011,64 +2062,57 @@ static int cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data) { struct port_info *p = ifp->if_softc; -#ifdef INET - struct ifaddr *ifa = (struct ifaddr *)data; -#endif struct ifreq *ifr = (struct ifreq *)data; - int flags, error = 0, reinit = 0; + int flags, error = 0, mtu, handle_unsynchronized = 0; uint32_t mask; - /* - * XXX need to check that we aren't in the middle of an unload + if ((error = cxgb_begin_op(p, "cxgbioct")) != 0) + return (error); + + /* + * Only commands that should be handled within begin-op/end-op are + * serviced in this switch statement. See handle_unsynchronized. */ switch (command) { case SIOCSIFMTU: - error = cxgb_set_mtu(p, ifr->ifr_mtu); - break; - case SIOCSIFADDR: -#ifdef INET - if (ifa->ifa_addr->sa_family == AF_INET) { - ifp->if_flags |= IFF_UP; - if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { - PORT_LOCK(p); - cxgb_init_locked(p); - PORT_UNLOCK(p); - } - arp_ifinit(ifp, ifa); - } else -#endif - error = ether_ioctl(ifp, command, data); + mtu = ifr->ifr_mtu; + if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) { + error = EINVAL; + } else { + ifp->if_mtu = mtu; + PORT_LOCK(p); + cxgb_update_mac_settings(p); + PORT_UNLOCK(p); + } + break; case SIOCSIFFLAGS: - PORT_LOCK(p); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { flags = p->if_flags; if (((ifp->if_flags ^ flags) & IFF_PROMISC) || - ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) - cxgb_set_rxmode(p); + ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) { + PORT_LOCK(p); + cxgb_update_mac_settings(p); + PORT_UNLOCK(p); + } } else - cxgb_init_locked(p); + error = cxgb_init_synchronized(p); p->if_flags = ifp->if_flags; } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) - cxgb_stop_locked(p); - - PORT_UNLOCK(p); + error = cxgb_uninit_synchronized(p); + break; case SIOCADDMULTI: case SIOCDELMULTI: if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - cxgb_set_rxmode(p); + PORT_LOCK(p); + cxgb_update_mac_settings(p); + PORT_UNLOCK(p); } - break; - case SIOCSIFMEDIA: - case SIOCGIFMEDIA: - PORT_LOCK(p); - error = ifmedia_ioctl(ifp, ifr, &p->media, command); - PORT_UNLOCK(p); + break; case SIOCSIFCAP: - PORT_LOCK(p); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { if (IFCAP_TXCSUM & ifp->if_capenable) { @@ -2091,12 +2135,8 @@ cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data) } else if (IFCAP_TXCSUM & ifp->if_capenable) { ifp->if_capenable |= IFCAP_TSO4; ifp->if_hwassist |= CSUM_TSO; - } else { - if (cxgb_debug) - printf("cxgb requires tx checksum offload" - " be enabled to use TSO\n"); + } else error = EINVAL; - } } #ifdef LRO_SUPPORTED if (mask & IFCAP_LRO) { @@ -2108,29 +2148,54 @@ cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data) #endif if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; - reinit = ifp->if_drv_flags & IFF_DRV_RUNNING; + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + PORT_LOCK(p); + cxgb_update_mac_settings(p); + PORT_UNLOCK(p); + } } if (mask & IFCAP_VLAN_MTU) { ifp->if_capenable ^= IFCAP_VLAN_MTU; - reinit = ifp->if_drv_flags & IFF_DRV_RUNNING; + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + PORT_LOCK(p); + cxgb_update_mac_settings(p); + PORT_UNLOCK(p); + } } if (mask & IFCAP_VLAN_HWCSUM) { ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; } - if (reinit) { - cxgb_stop_locked(p); - cxgb_init_locked(p); - } - PORT_UNLOCK(p); #ifdef VLAN_CAPABILITIES VLAN_CAPABILITIES(ifp); #endif break; default: - error = ether_ioctl(ifp, command, data); + handle_unsynchronized = 1; break; } + + /* + * We don't want to call anything outside the driver while inside a + * begin-op/end-op block. If it calls us back (eg. ether_ioctl may + * call cxgb_init), which is cxgb_init), we may deadlock if the state is + * already marked busy. + * + * XXX: this probably opens a small race window with kldunload... + */ + cxgb_end_op(p); + + /* The IS_DOOMED check is racy, we're clutching at straws here */ + if (handle_unsynchronized && !IS_DOOMED(p)) { + switch (command) { + case SIOCSIFMEDIA: + case SIOCGIFMEDIA: + error = ifmedia_ioctl(ifp, ifr, &p->media, command); + default: + error = ether_ioctl(ifp, command, data); + } + } + return (error); } @@ -2189,15 +2254,15 @@ cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) int m; if (cur->ifm_data != p->phy.modtype) { - /* p->media about to be rebuilt, must hold lock */ - PORT_LOCK_ASSERT_OWNED(p); + PORT_LOCK(p); m = cxgb_ifm_type(p->phy.modtype); ifmedia_removeall(&p->media); ifmedia_add(&p->media, m, p->phy.modtype, NULL); ifmedia_set(&p->media, m); cur = p->media.ifm_cur; /* ifmedia_set modified ifm_cur */ ifmr->ifm_current = m; + PORT_UNLOCK(p); } ifmr->ifm_status = IFM_AVALID; @@ -2267,63 +2332,50 @@ check_link_status(adapter_t *sc) { int i; - /* For synchronized access to open_device_map */ - ADAPTER_LOCK_ASSERT_OWNED(sc); - for (i = 0; i < (sc)->params.nports; ++i) { struct port_info *p = &sc->port[i]; - struct link_config *lc = &p->link_config; - - if (!isset(&sc->open_device_map, p->port_id)) { - /* - * port is down, report link down too. Note - * that we do this for IRQ based PHYs too. - */ - lc->link_ok = 0; - t3_os_link_changed(sc, i, lc->link_ok, lc->speed, - lc->duplex, lc->fc); - } else if (p->link_fault || !(p->phy.caps & SUPPORTED_IRQ)) + + if (!isset(&sc->open_device_map, p->port_id)) + continue; + + if (p->link_fault || !(p->phy.caps & SUPPORTED_IRQ)) t3_link_changed(sc, i); } } static void -check_t3b2_mac(struct adapter *adapter) +check_t3b2_mac(struct adapter *sc) { int i; - if(adapter->flags & CXGB_SHUTDOWN) + if (sc->flags & CXGB_SHUTDOWN) return; - - for_each_port(adapter, i) { - struct port_info *p = &adapter->port[i]; - struct ifnet *ifp = p->ifp; + + for_each_port(sc, i) { + struct port_info *p = &sc->port[i]; int status; - - if(adapter->flags & CXGB_SHUTDOWN) - return; - - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) +#ifdef INVARIANTS + struct ifnet *ifp = p->ifp; +#endif + + if (!isset(&sc->open_device_map, p->port_id)) continue; - - status = 0; + + KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING, + ("%s: state mismatch (drv_flags %x, device_map %x)", + __func__, ifp->if_drv_flags, sc->open_device_map)); + PORT_LOCK(p); - if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) - status = t3b2_mac_watchdog_task(&p->mac); + status = t3b2_mac_watchdog_task(&p->mac); if (status == 1) p->mac.stats.num_toggled++; else if (status == 2) { struct cmac *mac = &p->mac; - int mtu = ifp->if_mtu; - if (ifp->if_capenable & IFCAP_VLAN_MTU) - mtu += ETHER_VLAN_ENCAP_LEN; - t3_mac_set_mtu(mac, mtu); - t3_mac_set_address(mac, 0, p->hw_addr); - cxgb_set_rxmode(p); + cxgb_update_mac_settings(p); t3_link_start(&p->phy, mac, &p->link_config); t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX); - t3_port_intr_enable(adapter, p->port_id); + t3_port_intr_enable(sc, p->port_id); p->mac.stats.num_resets++; } PORT_UNLOCK(p); @@ -2335,7 +2387,7 @@ cxgb_tick(void *arg) { adapter_t *sc = (adapter_t *)arg; - if(sc->flags & CXGB_SHUTDOWN) + if (sc->flags & CXGB_SHUTDOWN) return; taskqueue_enqueue(sc->tq, &sc->tick_task); @@ -2350,21 +2402,12 @@ cxgb_tick_handler(void *arg, int count) int i; uint32_t cause, reset; - if(sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE)) + if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE)) return; - ADAPTER_LOCK(sc); - check_link_status(sc); - sc->check_task_cnt++; - /* - * adapter lock can currently only be acquired after the - * port lock - */ - ADAPTER_UNLOCK(sc); - if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map) check_t3b2_mac(sc); @@ -2393,6 +2436,10 @@ cxgb_tick_handler(void *arg, int count) struct ifnet *ifp = pi->ifp; struct cmac *mac = &pi->mac; struct mac_stats *mstats = &mac->stats; + + if (!isset(&sc->open_device_map, pi->port_id)) + continue; + PORT_LOCK(pi); t3_mac_update_stats(mac); PORT_UNLOCK(pi); diff --git a/sys/dev/cxgb/cxgb_multiq.c b/sys/dev/cxgb/cxgb_multiq.c deleted file mode 100644 index c3abcfb..0000000 --- a/sys/dev/cxgb/cxgb_multiq.c +++ /dev/null @@ -1,594 +0,0 @@ -/************************************************************************** - -Copyright (c) 2007, Chelsio Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -2. Neither the name of the Chelsio Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - -***************************************************************************/ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/kernel.h> -#include <sys/bus.h> -#include <sys/module.h> -#include <sys/pciio.h> -#include <sys/conf.h> -#include <machine/bus.h> -#include <machine/resource.h> -#include <sys/bus_dma.h> -#include <sys/kthread.h> -#include <sys/rman.h> -#include <sys/ioccom.h> -#include <sys/mbuf.h> -#include <sys/linker.h> -#include <sys/firmware.h> -#include <sys/socket.h> -#include <sys/sockio.h> -#include <sys/proc.h> -#include <sys/sched.h> -#include <sys/smp.h> -#include <sys/sysctl.h> -#include <sys/queue.h> -#include <sys/taskqueue.h> -#include <sys/unistd.h> -#include <sys/syslog.h> - -#include <net/bpf.h> -#include <net/ethernet.h> -#include <net/if.h> -#include <net/if_arp.h> -#include <net/if_dl.h> -#include <net/if_media.h> -#include <net/if_types.h> - -#include <netinet/in_systm.h> -#include <netinet/in.h> -#include <netinet/if_ether.h> -#include <netinet/ip.h> -#include <netinet/ip6.h> -#include <netinet/sctp_crc32.h> -#include <netinet/sctp.h> -#include <netinet/tcp.h> -#include <netinet/udp.h> - - -#include <dev/pci/pcireg.h> -#include <dev/pci/pcivar.h> -#include <dev/pci/pci_private.h> - -#include <vm/vm.h> -#include <vm/pmap.h> - -#include <cxgb_include.h> -#include <sys/mvec.h> - -extern int txq_fills; -int multiq_tx_enable = 1; -int coalesce_tx_enable = 1; -int wakeup_tx_thread = 0; - -extern struct sysctl_oid_list sysctl__hw_cxgb_children; -static int sleep_ticks = 1; -TUNABLE_INT("hw.cxgb.sleep_ticks", &sleep_ticks); -SYSCTL_UINT(_hw_cxgb, OID_AUTO, sleep_ticks, CTLFLAG_RDTUN, &sleep_ticks, 0, - "ticks to sleep between checking pcpu queues"); - -int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; -TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size); -SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, - "size of per-queue mbuf ring"); - - -static void cxgb_pcpu_start_proc(void *arg); -static int cxgb_tx(struct sge_qset *qs, uint32_t txmax); - -#ifdef IFNET_MULTIQUEUE -static int cxgb_pcpu_cookie_to_qidx(struct port_info *pi, uint32_t cookie); -#endif - -static inline int -cxgb_pcpu_enqueue_packet_(struct sge_qset *qs, struct mbuf *m) -{ - struct sge_txq *txq; - int err = 0; - - KASSERT(m != NULL, ("null mbuf")); - KASSERT(m->m_type == MT_DATA, ("bad mbuf type %d", m->m_type)); - if (qs->qs_flags & QS_EXITING) { - m_freem(m); - return (ENETDOWN); - } - txq = &qs->txq[TXQ_ETH]; - err = drbr_enqueue(qs->port->ifp, txq->txq_mr, m); - if (err) { - txq->txq_drops++; - m_freem(m); - } - if (wakeup_tx_thread && !err && - ((txq->flags & TXQ_TRANSMITTING) == 0)) - wakeup(qs); - - return (err); -} - -static int -cxgb_dequeue_packet(struct sge_txq *txq, struct mbuf **m_vec) -{ - struct mbuf *m, *m0; - struct sge_qset *qs; - int count, size, coalesced; - struct adapter *sc; - -#ifdef ALTQ - if (ALTQ_IS_ENABLED(txq->txq_ifq)) { - IFQ_DRV_DEQUEUE(txq->txq_ifq, m); - if (m == NULL) - return (0); - - m_vec[0] = m; - return (1); - } -#endif - mtx_assert(&txq->lock, MA_OWNED); - coalesced = count = size = 0; - qs = txq_to_qset(txq, TXQ_ETH); - if (qs->qs_flags & QS_EXITING) - return (0); - - if (txq->immpkt != NULL) { - m_vec[0] = txq->immpkt; - txq->immpkt = NULL; - return (1); - } - sc = qs->port->adapter; - - m = buf_ring_dequeue_sc(txq->txq_mr); - if (m == NULL) - return (0); - - count = 1; - - m_vec[0] = m; - if (m->m_pkthdr.tso_segsz > 0 || - m->m_pkthdr.len > TX_WR_SIZE_MAX || - m->m_next != NULL || - (coalesce_tx_enable == 0)) { - return (count); - } - - size = m->m_pkthdr.len; - for (m = buf_ring_peek(txq->txq_mr); m != NULL; - m = buf_ring_peek(txq->txq_mr)) { - - if (m->m_pkthdr.tso_segsz > 0 - || size + m->m_pkthdr.len > TX_WR_SIZE_MAX - || m->m_next != NULL) - break; - - m0 = buf_ring_dequeue_sc(txq->txq_mr); -#ifdef DEBUG_BUFRING - if (m0 != m) - panic("peek and dequeue don't match"); -#endif - size += m->m_pkthdr.len; - m_vec[count++] = m; - - if (count == TX_WR_COUNT_MAX) - break; - - coalesced++; - } - txq->txq_coalesced += coalesced; - - return (count); -} - -static void -cxgb_pcpu_free(struct sge_qset *qs) -{ - struct mbuf *m; - struct sge_txq *txq = &qs->txq[TXQ_ETH]; - - mtx_lock(&txq->lock); - while ((m = mbufq_dequeue(&txq->sendq)) != NULL) - m_freem(m); - while ((m = buf_ring_dequeue_sc(txq->txq_mr)) != NULL) - m_freem(m); - - t3_free_tx_desc_all(txq); - mtx_unlock(&txq->lock); -} - -static int -cxgb_pcpu_reclaim_tx(struct sge_txq *txq) -{ - int reclaimable; - struct sge_qset *qs = txq_to_qset(txq, TXQ_ETH); - - mtx_assert(&txq->lock, MA_OWNED); - - reclaimable = desc_reclaimable(txq); - if (reclaimable == 0) - return (0); - - t3_free_tx_desc(txq, reclaimable); - - txq->cleaned += reclaimable; - txq->in_use -= reclaimable; - if (isset(&qs->txq_stopped, TXQ_ETH)) { - qs->port->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; - clrbit(&qs->txq_stopped, TXQ_ETH); - } - - return (reclaimable); -} - -static int -cxgb_pcpu_start_(struct sge_qset *qs, struct mbuf *immpkt, int tx_flush) -{ - int i, err, initerr, flush, reclaimed, stopped; - struct port_info *pi; - struct sge_txq *txq; - adapter_t *sc; - uint32_t max_desc; - - pi = qs->port; - initerr = err = i = reclaimed = 0; - sc = pi->adapter; - txq = &qs->txq[TXQ_ETH]; - - mtx_assert(&txq->lock, MA_OWNED); - - retry: - if (!pi->link_config.link_ok) - initerr = ENETDOWN; - else if (qs->qs_flags & QS_EXITING) - initerr = ENETDOWN; - else if ((pi->ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - initerr = ENETDOWN; - else if ((pi->ifp->if_flags & IFF_UP) == 0) - initerr = ENETDOWN; - else if (immpkt) { - - if (!buf_ring_empty(txq->txq_mr) - || ALTQ_IS_ENABLED(&pi->ifp->if_snd)) - initerr = cxgb_pcpu_enqueue_packet_(qs, immpkt); - else - txq->immpkt = immpkt; - - immpkt = NULL; - } - if (initerr) { - if (immpkt) - m_freem(immpkt); - if (initerr == ENOBUFS && !tx_flush) - wakeup(qs); - return (initerr); - } - - if ((tx_flush && (desc_reclaimable(txq) > 0)) || - (desc_reclaimable(txq) > (TX_ETH_Q_SIZE>>3))) { - cxgb_pcpu_reclaim_tx(txq); - } - - stopped = isset(&qs->txq_stopped, TXQ_ETH); - flush = ((!drbr_empty(pi->ifp, txq->txq_mr) - && !stopped) || txq->immpkt); - max_desc = tx_flush ? TX_ETH_Q_SIZE : TX_START_MAX_DESC; - - err = flush ? cxgb_tx(qs, max_desc) : 0; - - if ((tx_flush && flush && err == 0) && - !drbr_empty(pi->ifp, txq->txq_mr)) { - struct thread *td = curthread; - - if (++i > 1) { - thread_lock(td); - sched_prio(td, PRI_MIN_TIMESHARE); - thread_unlock(td); - } - if (i > 200) { - device_printf(qs->port->adapter->dev, - "exceeded max enqueue tries\n"); - return (EBUSY); - } - goto retry; - } - return (err); -} - -int -cxgb_pcpu_transmit(struct ifnet *ifp, struct mbuf *immpkt) -{ - uint32_t cookie; - int err, qidx, locked, resid; - struct port_info *pi; - struct sge_qset *qs; - struct sge_txq *txq = NULL /* gcc is dumb */; - struct adapter *sc; - - pi = ifp->if_softc; - sc = pi->adapter; - qs = NULL; - qidx = resid = err = cookie = locked = 0; - -#ifdef IFNET_MULTIQUEUE - if (immpkt && (immpkt->m_pkthdr.flowid != 0)) { - cookie = immpkt->m_pkthdr.flowid; - qidx = cxgb_pcpu_cookie_to_qidx(pi, cookie); - qs = &pi->adapter->sge.qs[qidx]; - } else -#endif - qs = &pi->adapter->sge.qs[pi->first_qset]; - - txq = &qs->txq[TXQ_ETH]; - if (((sc->tunq_coalesce == 0) || - (buf_ring_count(txq->txq_mr) >= TX_WR_COUNT_MAX) || - (coalesce_tx_enable == 0)) && mtx_trylock(&txq->lock)) { - txq->flags |= TXQ_TRANSMITTING; - err = cxgb_pcpu_start_(qs, immpkt, FALSE); - txq->flags &= ~TXQ_TRANSMITTING; - mtx_unlock(&txq->lock); - } else if (immpkt) - return (cxgb_pcpu_enqueue_packet_(qs, immpkt)); - return ((err == EBUSY) ? 0 : err); -} - -void -cxgb_start(struct ifnet *ifp) -{ - struct port_info *p = ifp->if_softc; - - if (!p->link_config.link_ok) - return; - - cxgb_pcpu_transmit(ifp, NULL); -} - -static void -cxgb_pcpu_start_proc(void *arg) -{ - struct sge_qset *qs = arg; - struct thread *td; - struct sge_txq *txq = &qs->txq[TXQ_ETH]; - int idleticks, err = 0; - - td = curthread; - sleep_ticks = max(hz/1000, 1); - qs->qs_flags |= QS_RUNNING; - thread_lock(td); - sched_bind(td, qs->qs_cpuid); - thread_unlock(td); - - DELAY(qs->qs_cpuid*100000); - if (bootverbose) - printf("bound to %d running on %d\n", qs->qs_cpuid, curcpu); - - for (;;) { - if (qs->qs_flags & QS_EXITING) - break; - - if ((qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { - idleticks = hz; - if (!buf_ring_empty(txq->txq_mr) || - !mbufq_empty(&txq->sendq)) - cxgb_pcpu_free(qs); - goto done; - } else - idleticks = sleep_ticks; - if (mtx_trylock(&txq->lock)) { - txq->flags |= TXQ_TRANSMITTING; - err = cxgb_pcpu_start_(qs, NULL, TRUE); - txq->flags &= ~TXQ_TRANSMITTING; - mtx_unlock(&txq->lock); - } else - err = EINPROGRESS; -#ifdef notyet - if (mtx_trylock(&qs->rspq.lock)) { - process_responses(sc, qs, -1); - - refill_fl_service(sc, &qs->fl[0]); - refill_fl_service(sc, &qs->fl[1]); - t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | - V_NEWTIMER(qs->rspq.next_holdoff) | V_NEWINDEX(qs->rspq.cidx)); - - mtx_unlock(&qs->rspq.lock); - } -#endif - if ((!buf_ring_empty(txq->txq_mr)) && err == 0) { -#if 0 - if (cxgb_debug) - printf("head=%p cons=%d prod=%d\n", - txq->sendq.head, txq->txq_mr.br_cons, - txq->txq_mr.br_prod); -#endif - continue; - } - done: - tsleep(qs, 1, "cxgbidle", idleticks); - } - - if (bootverbose) - device_printf(qs->port->adapter->dev, "exiting thread for cpu%d\n", qs->qs_cpuid); - - - cxgb_pcpu_free(qs); - t3_free_qset(qs->port->adapter, qs); - - qs->qs_flags &= ~QS_RUNNING; -#if __FreeBSD_version >= 800002 - kproc_exit(0); -#else - kthread_exit(0); -#endif -} - -#ifdef IFNET_MULTIQUEUE -static int -cxgb_pcpu_cookie_to_qidx(struct port_info *pi, uint32_t cookie) -{ - int qidx; - uint32_t tmp; - - if (multiq_tx_enable == 0) - return (pi->first_qset); - - /* - * Will probably need to be changed for 4-port XXX - */ - tmp = pi->tx_chan ? cookie : cookie & ((RSS_TABLE_SIZE>>1)-1); - DPRINTF(" tmp=%d ", tmp); - qidx = (tmp & (pi->nqsets -1)) + pi->first_qset; - - return (qidx); -} -#endif - -void -cxgb_pcpu_startup_threads(struct adapter *sc) -{ - int i, j, nqsets; - struct proc *p; - - - for (i = 0; i < (sc)->params.nports; ++i) { - struct port_info *pi = adap2pinfo(sc, i); - -#ifdef IFNET_MULTIQUEUE - nqsets = pi->nqsets; -#else - nqsets = 1; -#endif - for (j = 0; j < nqsets; ++j) { - struct sge_qset *qs; - - qs = &sc->sge.qs[pi->first_qset + j]; - qs->port = pi; - qs->qs_cpuid = ((pi->first_qset + j) % mp_ncpus); - device_printf(sc->dev, "starting thread for %d\n", - qs->qs_cpuid); - -#if __FreeBSD_version >= 800002 - kproc_create(cxgb_pcpu_start_proc, qs, &p, - RFNOWAIT, 0, "cxgbsp"); -#else - kthread_create(cxgb_pcpu_start_proc, qs, &p, - RFNOWAIT, 0, "cxgbsp"); -#endif - DELAY(200); - } - } -} - -void -cxgb_pcpu_shutdown_threads(struct adapter *sc) -{ - int i, j; - int nqsets; - - for (i = 0; i < sc->params.nports; i++) { - struct port_info *pi = &sc->port[i]; - int first = pi->first_qset; - -#ifdef IFNET_MULTIQUEUE - nqsets = pi->nqsets; -#else - nqsets = 1; -#endif - for (j = 0; j < nqsets; j++) { - struct sge_qset *qs = &sc->sge.qs[first + j]; - - qs->qs_flags |= QS_EXITING; - wakeup(qs); - tsleep(&sc, PRI_MIN_TIMESHARE, "cxgb unload 0", hz>>2); - while (qs->qs_flags & QS_RUNNING) { - qs->qs_flags |= QS_EXITING; - device_printf(sc->dev, "qset thread %d still running - sleeping\n", first + j); - tsleep(&sc, PRI_MIN_TIMESHARE, "cxgb unload 1", 2*hz); - } - } - } -} - -static __inline void -check_pkt_coalesce(struct sge_qset *qs) -{ - struct adapter *sc; - struct sge_txq *txq; - - txq = &qs->txq[TXQ_ETH]; - sc = qs->port->adapter; - - if (sc->tunq_fill[qs->idx] && (txq->in_use < (txq->size - (txq->size>>2)))) - sc->tunq_fill[qs->idx] = 0; - else if (!sc->tunq_fill[qs->idx] && (txq->in_use > (txq->size - (txq->size>>2)))) - sc->tunq_fill[qs->idx] = 1; -} - -static int -cxgb_tx(struct sge_qset *qs, uint32_t txmax) -{ - struct sge_txq *txq; - struct ifnet *ifp = qs->port->ifp; - int i, err, in_use_init, count; - struct mbuf *m_vec[TX_WR_COUNT_MAX]; - - txq = &qs->txq[TXQ_ETH]; - ifp = qs->port->ifp; - in_use_init = txq->in_use; - count = err = 0; - - mtx_assert(&txq->lock, MA_OWNED); - while ((txq->in_use - in_use_init < txmax) && - (txq->size > txq->in_use + TX_MAX_DESC)) { - check_pkt_coalesce(qs); - count = cxgb_dequeue_packet(txq, m_vec); - if (count == 0) - break; - for (i = 0; i < count; i++) - ETHER_BPF_MTAP(ifp, m_vec[i]); - - if ((err = t3_encap(qs, m_vec, count)) != 0) - break; - txq->txq_enqueued += count; - } - if (txq->size <= txq->in_use + TX_MAX_DESC) { - txq_fills++; - setbit(&qs->txq_stopped, TXQ_ETH); - } - if (err == ENOMEM) { - int i; - /* - * Sub-optimal :-/ - */ - printf("ENOMEM!!!"); - for (i = 0; i < count; i++) - m_freem(m_vec[i]); - } - return (err); -} - diff --git a/sys/dev/cxgb/cxgb_osdep.h b/sys/dev/cxgb/cxgb_osdep.h index f03cb0d..6108214 100644 --- a/sys/dev/cxgb/cxgb_osdep.h +++ b/sys/dev/cxgb/cxgb_osdep.h @@ -41,8 +41,8 @@ $FreeBSD$ #include <dev/mii/mii.h> +#define CONFIG_CHELSIO_T3_CORE #include <common/cxgb_version.h> -#include <cxgb_config.h> #ifndef _CXGB_OSDEP_H_ #define _CXGB_OSDEP_H_ @@ -113,9 +113,9 @@ struct t3_mbuf_hdr { #include "opt_inet.h" #ifdef INET #define LRO_SUPPORTED -#endif #define TOE_SUPPORTED #endif +#endif #if __FreeBSD_version < 800054 #if defined (__GNUC__) @@ -165,8 +165,7 @@ struct t3_mbuf_hdr { #define TX_MAX_DESC 4 /* max descriptors per packet */ -#define TX_START_MIN_DESC (TX_MAX_DESC << 2) -#define TX_START_MAX_DESC (TX_MAX_DESC << 3) /* maximum number of descriptors +#define TX_START_MAX_DESC (TX_MAX_DESC << 2) /* maximum number of descriptors * call to start used per */ #define TX_CLEAN_MAX_DESC (TX_MAX_DESC << 4) /* maximum tx descriptors @@ -177,18 +176,17 @@ struct t3_mbuf_hdr { #define TX_WR_COUNT_MAX 7 /* the maximum total number of packets that can be * aggregated into a single TX WR */ +#if defined(__i386__) || defined(__amd64__) - -#if defined(__i386__) || defined(__amd64__) -#define smp_mb() mb() - -#define L1_CACHE_BYTES 128 static __inline void prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); -} +} +#define smp_mb() mb() + +#define L1_CACHE_BYTES 128 extern void kdb_backtrace(void); #define WARN_ON(condition) do { \ @@ -198,8 +196,7 @@ extern void kdb_backtrace(void); } \ } while (0) - -#else /* !i386 && !amd64 */ +#else #define smp_mb() #define prefetch(x) #define L1_CACHE_BYTES 32 diff --git a/sys/dev/cxgb/cxgb_sge.c b/sys/dev/cxgb/cxgb_sge.c index 4583483..3ee721c 100644 --- a/sys/dev/cxgb/cxgb_sge.c +++ b/sys/dev/cxgb/cxgb_sge.c @@ -51,6 +51,8 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/syslog.h> +#include <net/bpf.h> + #include <netinet/in_systm.h> #include <netinet/in.h> #include <netinet/ip.h> @@ -65,28 +67,61 @@ __FBSDID("$FreeBSD$"); #include <cxgb_include.h> #include <sys/mvec.h> -int txq_fills = 0; +int txq_fills = 0; +int multiq_tx_enable = 1; + +extern struct sysctl_oid_list sysctl__hw_cxgb_children; +int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; +TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, + "size of per-queue mbuf ring"); + +static int cxgb_tx_coalesce_force = 0; +TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW, + &cxgb_tx_coalesce_force, 0, + "coalesce small packets into a single work request regardless of ring state"); + +#define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 +#define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) +#define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 +#define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 +#define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 +#define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 +#define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 + + +static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; +TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start", + &cxgb_tx_coalesce_enable_start); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW, + &cxgb_tx_coalesce_enable_start, 0, + "coalesce enable threshold"); +static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; +TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW, + &cxgb_tx_coalesce_enable_stop, 0, + "coalesce disable threshold"); +static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; +TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW, + &cxgb_tx_reclaim_threshold, 0, + "tx cleaning minimum threshold"); + /* * XXX don't re-enable this until TOE stops assuming * we have an m_ext */ static int recycle_enable = 0; -extern int cxgb_txq_buf_ring_size; -int cxgb_cached_allocations; -int cxgb_cached; int cxgb_ext_freed = 0; int cxgb_ext_inited = 0; int fl_q_size = 0; int jumbo_q_size = 0; extern int cxgb_use_16k_clusters; -extern int cxgb_pcpu_cache_enable; extern int nmbjumbo4; extern int nmbjumbo9; extern int nmbjumbo16; -extern int multiq_tx_enable; -extern int coalesce_tx_enable; -extern int wakeup_tx_thread; #define USE_GTS 0 @@ -138,22 +173,22 @@ struct rsp_desc { /* response queue descriptor */ #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) struct tx_sw_desc { /* SW state per Tx descriptor */ - struct mbuf_iovec mi; + struct mbuf *m; bus_dmamap_t map; int flags; }; struct rx_sw_desc { /* SW state per Rx descriptor */ - caddr_t rxsd_cl; - caddr_t data; - bus_dmamap_t map; - int flags; + caddr_t rxsd_cl; + struct mbuf *m; + bus_dmamap_t map; + int flags; }; struct txq_state { - unsigned int compl; - unsigned int gen; - unsigned int pidx; + unsigned int compl; + unsigned int gen; + unsigned int pidx; }; struct refill_fl_cb_arg { @@ -162,6 +197,7 @@ struct refill_fl_cb_arg { int nseg; }; + /* * Maps a number of flits to the number of Tx descriptors that can hold them. * The formula is @@ -187,13 +223,133 @@ static uint8_t flit_desc_map[] = { #endif }; +#define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) +#define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) +#define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) +#define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) +#define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) +#define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) +#define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ + drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) +#define TXQ_RING_DEQUEUE(qs) \ + drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) int cxgb_debug = 0; static void sge_timer_cb(void *arg); static void sge_timer_reclaim(void *arg, int ncount); static void sge_txq_reclaim_handler(void *arg, int ncount); +static void cxgb_start_locked(struct sge_qset *qs); + +/* + * XXX need to cope with bursty scheduling by looking at a wider + * window than we are now for determining the need for coalescing + * + */ +static __inline uint64_t +check_pkt_coalesce(struct sge_qset *qs) +{ + struct adapter *sc; + struct sge_txq *txq; + uint8_t *fill; + + if (__predict_false(cxgb_tx_coalesce_force)) + return (1); + txq = &qs->txq[TXQ_ETH]; + sc = qs->port->adapter; + fill = &sc->tunq_fill[qs->idx]; + + if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) + cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; + if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) + cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; + /* + * if the hardware transmit queue is more than 1/8 full + * we mark it as coalescing - we drop back from coalescing + * when we go below 1/32 full and there are no packets enqueued, + * this provides us with some degree of hysteresis + */ + if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && + TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) + *fill = 0; + else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) + *fill = 1; + + return (sc->tunq_coalesce); +} + +#ifdef __LP64__ +static void +set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) +{ + uint64_t wr_hilo; +#if _BYTE_ORDER == _LITTLE_ENDIAN + wr_hilo = wr_hi; + wr_hilo |= (((uint64_t)wr_lo)<<32); +#else + wr_hilo = wr_lo; + wr_hilo |= (((uint64_t)wr_hi)<<32); +#endif + wrp->wrh_hilo = wr_hilo; +} +#else +static void +set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) +{ + + wrp->wrh_hi = wr_hi; + wmb(); + wrp->wrh_lo = wr_lo; +} +#endif +struct coalesce_info { + int count; + int nbytes; +}; + +static int +coalesce_check(struct mbuf *m, void *arg) +{ + struct coalesce_info *ci = arg; + int *count = &ci->count; + int *nbytes = &ci->nbytes; + + if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) && + (*count < 7) && (m->m_next == NULL))) { + *count += 1; + *nbytes += m->m_len; + return (1); + } + return (0); +} + +static struct mbuf * +cxgb_dequeue(struct sge_qset *qs) +{ + struct mbuf *m, *m_head, *m_tail; + struct coalesce_info ci; + + + if (check_pkt_coalesce(qs) == 0) + return TXQ_RING_DEQUEUE(qs); + + m_head = m_tail = NULL; + ci.count = ci.nbytes = 0; + do { + m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); + if (m_head == NULL) { + m_tail = m_head = m; + } else if (m != NULL) { + m_tail->m_nextpkt = m; + m_tail = m; + } + } while (m != NULL); + if (ci.count > 7) + panic("trying to coalesce %d packets in to one WR", ci.count); + return (m_head); +} + /** * reclaim_completed_tx - reclaims completed Tx descriptors * @adapter: the adapter @@ -204,19 +360,27 @@ static void sge_txq_reclaim_handler(void *arg, int ncount); * queue's lock held. */ static __inline int -reclaim_completed_tx_(struct sge_txq *q, int reclaim_min) +reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) { + struct sge_txq *q = &qs->txq[queue]; int reclaim = desc_reclaimable(q); + if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || + (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) + cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; + if (reclaim < reclaim_min) return (0); - - mtx_assert(&q->lock, MA_OWNED); + + mtx_assert(&qs->lock, MA_OWNED); if (reclaim > 0) { - t3_free_tx_desc(q, reclaim); + t3_free_tx_desc(qs, reclaim, queue); q->cleaned += reclaim; q->in_use -= reclaim; - } + } + if (isset(&qs->txq_stopped, TXQ_ETH)) + clrbit(&qs->txq_stopped, TXQ_ETH); + return (reclaim); } @@ -513,20 +677,27 @@ refill_fl(adapter_t *sc, struct sge_fl *q, int n) struct rx_sw_desc *sd = &q->sdesc[q->pidx]; struct rx_desc *d = &q->desc[q->pidx]; struct refill_fl_cb_arg cb_arg; + struct mbuf *m; caddr_t cl; int err, count = 0; - int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); cb_arg.error = 0; while (n--) { /* * We only allocate a cluster, mbuf allocation happens after rx */ - if ((cl = cxgb_cache_get(q->zone)) == NULL) { - log(LOG_WARNING, "Failed to allocate cluster\n"); - goto done; + if (q->zone == zone_pack) { + if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) + break; + cl = m->m_ext.ext_buf; + } else { + if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) + break; + if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { + uma_zfree(q->zone, cl); + break; + } } - if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); @@ -537,22 +708,19 @@ refill_fl(adapter_t *sc, struct sge_fl *q, int n) } #if !defined(__i386__) && !defined(__amd64__) err = bus_dmamap_load(q->entry_tag, sd->map, - cl + header_size, q->buf_size, - refill_fl_cb, &cb_arg, 0); + cl, q->buf_size, refill_fl_cb, &cb_arg, 0); if (err != 0 || cb_arg.error) { - log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error); - /* - * XXX free cluster - */ - return; + if (q->zone = zone_pack) + uma_zfree(q->zone, cl); + m_free(m); } #else - cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + header_size)); + cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); #endif sd->flags |= RX_SW_DESC_INUSE; sd->rxsd_cl = cl; - sd->data = cl + header_size; + sd->m = m; d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); d->len_gen = htobe32(V_FLD_GEN1(q->gen)); @@ -596,9 +764,20 @@ free_rx_bufs(adapter_t *sc, struct sge_fl *q) if (d->flags & RX_SW_DESC_INUSE) { bus_dmamap_unload(q->entry_tag, d->map); bus_dmamap_destroy(q->entry_tag, d->map); - uma_zfree(q->zone, d->rxsd_cl); + if (q->zone == zone_pack) { + m_init(d->m, zone_pack, MCLBYTES, + M_NOWAIT, MT_DATA, M_EXT); + uma_zfree(zone_pack, d->m); + } else { + m_init(d->m, zone_mbuf, MLEN, + M_NOWAIT, MT_DATA, 0); + uma_zfree(zone_mbuf, d->m); + uma_zfree(q->zone, d->rxsd_cl); + } } + d->rxsd_cl = NULL; + d->m = NULL; if (++cidx == q->size) cidx = 0; } @@ -617,12 +796,6 @@ __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) refill_fl(adap, fl, min(max, fl->size - fl->credits)); } -void -refill_fl_service(adapter_t *adap, struct sge_fl *fl) -{ - __refill_fl_lt(adap, fl, 512); -} - /** * recycle_rx_buf - recycle a receive buffer * @adapter: the adapter @@ -641,7 +814,7 @@ recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) q->sdesc[q->pidx] = q->sdesc[idx]; to->addr_lo = from->addr_lo; // already big endian to->addr_hi = from->addr_hi; // likewise - wmb(); + wmb(); /* necessary ? */ to->len_gen = htobe32(V_FLD_GEN1(q->gen)); to->gen2 = htobe32(V_FLD_GEN2(q->gen)); q->credits++; @@ -750,28 +923,33 @@ static void sge_timer_cb(void *arg) { adapter_t *sc = arg; -#ifndef IFNET_MULTIQUEUE - struct port_info *pi; - struct sge_qset *qs; - struct sge_txq *txq; - int i, j; - int reclaim_ofl, refill_rx; + if ((sc->flags & USING_MSIX) == 0) { + + struct port_info *pi; + struct sge_qset *qs; + struct sge_txq *txq; + int i, j; + int reclaim_ofl, refill_rx; - for (i = 0; i < sc->params.nports; i++) { - pi = &sc->port[i]; - for (j = 0; j < pi->nqsets; j++) { - qs = &sc->sge.qs[pi->first_qset + j]; - txq = &qs->txq[0]; - reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; - refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || - (qs->fl[1].credits < qs->fl[1].size)); - if (reclaim_ofl || refill_rx) { - taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); - break; + if (sc->open_device_map == 0) + return; + + for (i = 0; i < sc->params.nports; i++) { + pi = &sc->port[i]; + for (j = 0; j < pi->nqsets; j++) { + qs = &sc->sge.qs[pi->first_qset + j]; + txq = &qs->txq[0]; + reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; + refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || + (qs->fl[1].credits < qs->fl[1].size)); + if (reclaim_ofl || refill_rx) { + taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); + break; + } } } } -#endif + if (sc->params.nports > 2) { int i; @@ -783,7 +961,8 @@ sge_timer_cb(void *arg) (FW_TUNNEL_SGEEC_START + pi->first_qset)); } } - if (sc->open_device_map != 0) + if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && + sc->open_device_map != 0) callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); } @@ -798,8 +977,6 @@ t3_sge_init_adapter(adapter_t *sc) callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); - mi_init(); - cxgb_cache_init(); return (0); } @@ -817,13 +994,6 @@ t3_sge_init_port(struct port_info *pi) return (0); } -void -t3_sge_deinit_sw(adapter_t *sc) -{ - - mi_deinit(); -} - /** * refill_rspq - replenish an SGE response queue * @adapter: the adapter @@ -842,29 +1012,16 @@ refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); } -static __inline void -sge_txq_reclaim_(struct sge_txq *txq, int force) -{ - - if (desc_reclaimable(txq) < 16) - return; - if (mtx_trylock(&txq->lock) == 0) - return; - reclaim_completed_tx_(txq, 16); - mtx_unlock(&txq->lock); - -} - static void sge_txq_reclaim_handler(void *arg, int ncount) { - struct sge_txq *q = arg; + struct sge_qset *qs = arg; + int i; - sge_txq_reclaim_(q, TRUE); + for (i = 0; i < 3; i++) + reclaim_completed_tx(qs, 16, i); } - - static void sge_timer_reclaim(void *arg, int ncount) { @@ -872,18 +1029,15 @@ sge_timer_reclaim(void *arg, int ncount) int i, nqsets = pi->nqsets; adapter_t *sc = pi->adapter; struct sge_qset *qs; - struct sge_txq *txq; struct mtx *lock; + + KASSERT((sc->flags & USING_MSIX) == 0, + ("can't call timer reclaim for msi-x")); -#ifdef IFNET_MULTIQUEUE - panic("%s should not be called with multiqueue support\n", __FUNCTION__); -#endif for (i = 0; i < nqsets; i++) { qs = &sc->sge.qs[pi->first_qset + i]; - txq = &qs->txq[TXQ_OFLD]; - sge_txq_reclaim_(txq, FALSE); - + reclaim_completed_tx(qs, 16, TXQ_OFLD); lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : &sc->sge.qs[0].rspq.lock; @@ -980,7 +1134,7 @@ calc_tx_descs(const struct mbuf *m, int nsegs) { unsigned int flits; - if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) + if (m->m_pkthdr.len <= PIO_LEN) return 1; flits = sgl_len(nsegs) + 2; @@ -997,13 +1151,13 @@ busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, { struct mbuf *m0; int err, pktlen, pass = 0; - + retry: err = 0; m0 = *m; pktlen = m0->m_pkthdr.len; #if defined(__i386__) || defined(__amd64__) - if (busdma_map_sg_collapse(m, segs, nsegs) == 0) { + if (busdma_map_sg_collapse(txq, txsd, m, segs, nsegs) == 0) { goto done; } else #endif @@ -1081,7 +1235,7 @@ make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) * @adap: the adapter * @q: the Tx queue * - * Ring the doorbel if a Tx queue is asleep. There is a natural race, + * Ring the doorbell if a Tx queue is asleep. There is a natural race, * where the HW is going to sleep just after we checked, however, * then the interrupt handler will detect the outstanding TX packet * and ring the doorbell for us. @@ -1144,11 +1298,10 @@ write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; if (__predict_true(ndesc == 1)) { - wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | - V_WR_SGLSFLT(flits)) | wr_hi; - wmb(); - wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | - V_WR_GEN(txqs->gen)) | wr_lo; + set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | + V_WR_SGLSFLT(flits)) | wr_hi, + htonl(V_WR_LEN(flits + sgl_flits) | + V_WR_GEN(txqs->gen)) | wr_lo); /* XXX gen? */ wr_gen2(txd, txqs->gen); @@ -1157,7 +1310,7 @@ write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs const uint64_t *fp = (const uint64_t *)sgl; struct work_request_hdr *wp = wrp; - wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | + wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | V_WR_SGLSFLT(flits)) | wr_hi; while (sgl_flits) { @@ -1180,26 +1333,24 @@ write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs txd = txq->desc; txsd = txq->sdesc; } - + /* * when the head of the mbuf chain * is freed all clusters will be freed * with it */ - KASSERT(txsd->mi.mi_base == NULL, - ("overwriting valid entry mi_base==%p", txsd->mi.mi_base)); wrp = (struct work_request_hdr *)txd; - wrp->wr_hi = htonl(V_WR_DATATYPE(1) | + wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | V_WR_SGLSFLT(1)) | wr_hi; - wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, + wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, sgl_flits + 1)) | V_WR_GEN(txqs->gen)) | wr_lo; wr_gen2(txd, txqs->gen); flits = 1; } - wrp->wr_hi |= htonl(F_WR_EOP); + wrp->wrh_hi |= htonl(F_WR_EOP); wmb(); - wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; + wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; wr_gen2((struct tx_desc *)wp, ogen); } } @@ -1214,18 +1365,12 @@ do { \ cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ } while (0) -#define GET_VTAG_MI(cntrl, mi) \ -do { \ - if ((mi)->mi_flags & M_VLANTAG) \ - cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((mi)->mi_ether_vtag); \ -} while (0) #else #define GET_VTAG(cntrl, m) -#define GET_VTAG_MI(cntrl, m) #endif -int -t3_encap(struct sge_qset *qs, struct mbuf **m, int count) +static int +t3_encap(struct sge_qset *qs, struct mbuf **m) { adapter_t *sc; struct mbuf *m0; @@ -1242,89 +1387,89 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count) bus_dma_segment_t segs[TX_MAX_SEGS]; struct tx_desc *txd; - struct mbuf_vec *mv; - struct mbuf_iovec *mi; - DPRINTF("t3_encap cpu=%d ", curcpu); - - mi = NULL; pi = qs->port; sc = pi->adapter; txq = &qs->txq[TXQ_ETH]; txd = &txq->desc[txq->pidx]; txsd = &txq->sdesc[txq->pidx]; sgl = txq->txq_sgl; + + prefetch(txd); m0 = *m; DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset); DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan); - if (cxgb_debug) - printf("mi_base=%p cidx=%d pidx=%d\n\n", txsd->mi.mi_base, txq->cidx, txq->pidx); - mtx_assert(&txq->lock, MA_OWNED); + mtx_assert(&qs->lock, MA_OWNED); cntrl = V_TXPKT_INTF(pi->txpkt_intf); -/* - * XXX need to add VLAN support for 6.x - */ + KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); + #ifdef VLAN_SUPPORTED - if (m0->m_pkthdr.csum_flags & (CSUM_TSO)) + if (m0->m_nextpkt == NULL && m0->m_next != NULL && + m0->m_pkthdr.csum_flags & (CSUM_TSO)) tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); #endif - KASSERT(txsd->mi.mi_base == NULL, - ("overwriting valid entry mi_base==%p", txsd->mi.mi_base)); - if (count > 1) { - if ((err = busdma_map_sg_vec(m, &m0, segs, count))) + if (m0->m_nextpkt != NULL) { + busdma_map_sg_vec(txq, txsd, m0, segs, &nsegs); + ndesc = 1; + mlen = 0; + } else { + if ((err = busdma_map_sg_collapse(txq, txsd, &m0, segs, &nsegs))) { + if (cxgb_debug) + printf("failed ... err=%d\n", err); return (err); - nsegs = count; - } else if ((err = busdma_map_sg_collapse(&m0, segs, &nsegs))) { - if (cxgb_debug) - printf("failed ... err=%d\n", err); - return (err); - } - KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count)); - - if ((m0->m_pkthdr.len > PIO_LEN) || (count > 1)) { - mi_collapse_mbuf(&txsd->mi, m0); - mi = &txsd->mi; + } + mlen = m0->m_pkthdr.len; + ndesc = calc_tx_descs(m0, nsegs); } - if (count > 1) { + txq_prod(txq, ndesc, &txqs); + + KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); + txsd->m = m0; + + if (m0->m_nextpkt != NULL) { struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; int i, fidx; - struct mbuf_iovec *batchmi; - mv = mtomv(m0); - batchmi = mv->mv_vec; - + if (nsegs > 7) + panic("trying to coalesce %d packets in to one WR", nsegs); + txq->txq_coalesced += nsegs; wrp = (struct work_request_hdr *)txd; + flits = nsegs*2 + 1; - flits = count*2 + 1; - txq_prod(txq, 1, &txqs); - - for (fidx = 1, i = 0; i < count; i++, batchmi++, fidx += 2) { - struct cpl_tx_pkt_batch_entry *cbe = &cpl_batch->pkt_entry[i]; + for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { + struct cpl_tx_pkt_batch_entry *cbe; + uint64_t flit; + uint32_t *hflit = (uint32_t *)&flit; + int cflags = m0->m_pkthdr.csum_flags; cntrl = V_TXPKT_INTF(pi->txpkt_intf); - GET_VTAG_MI(cntrl, batchmi); + GET_VTAG(cntrl, m0); cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); - if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) + if (__predict_false(!(cflags & CSUM_IP))) cntrl |= F_TXPKT_IPCSUM_DIS; - if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))) + if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP)))) cntrl |= F_TXPKT_L4CSUM_DIS; - cbe->cntrl = htonl(cntrl); - cbe->len = htonl(batchmi->mi_len | 0x80000000); + + hflit[0] = htonl(cntrl); + hflit[1] = htonl(segs[i].ds_len | 0x80000000); + flit |= htobe64(1 << 24); + cbe = &cpl_batch->pkt_entry[i]; + cbe->cntrl = hflit[0]; + cbe->len = hflit[1]; cbe->addr = htobe64(segs[i].ds_addr); - txd->flit[fidx] |= htobe64(1 << 24); } - wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | - V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); - wmb(); - wrp->wr_lo = htonl(V_WR_LEN(flits) | + wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | + V_WR_SGLSFLT(flits)) | + htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); + wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); - /* XXX gen? */ + set_wr_hdr(wrp, wr_hi, wr_lo); + wmb(); wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq); - return (0); } else if (tso_info) { int min_size = TCPPKTHDRSIZE, eth_type, tagged; @@ -1337,7 +1482,6 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count) GET_VTAG(cntrl, m0); cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); hdr->cntrl = htonl(cntrl); - mlen = m0->m_pkthdr.len; hdr->len = htonl(mlen | 0x80000000); DPRINTF("tso buf len=%d\n", mlen); @@ -1386,18 +1530,16 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count) */ DPRINTF("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags); - txq_prod(txq, 1, &txqs); + txsd->m = NULL; m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); - m_freem(m0); - m0 = NULL; flits = (mlen + 7) / 8 + 3; - hdr->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | + wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | F_WR_SOP | F_WR_EOP | txqs.compl); - wmb(); - hdr->wr.wr_lo = htonl(V_WR_LEN(flits) | + wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); - + set_wr_hdr(&hdr->wr, wr_hi, wr_lo); + wmb(); wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq); return (0); @@ -1405,7 +1547,7 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count) flits = 3; } else { struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; - + GET_VTAG(cntrl, m0); cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) @@ -1413,66 +1555,243 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count) if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))) cntrl |= F_TXPKT_L4CSUM_DIS; cpl->cntrl = htonl(cntrl); - mlen = m0->m_pkthdr.len; cpl->len = htonl(mlen | 0x80000000); if (mlen <= PIO_LEN) { - txq_prod(txq, 1, &txqs); + txsd->m = NULL; m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); - m_freem(m0); - m0 = NULL; flits = (mlen + 7) / 8 + 2; - cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | - V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | + + wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | + V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | F_WR_SOP | F_WR_EOP | txqs.compl); - wmb(); - cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | + wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); - + set_wr_hdr(&cpl->wr, wr_hi, wr_lo); + wmb(); wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq); - DPRINTF("pio buf\n"); return (0); } - DPRINTF("regular buf\n"); flits = 2; } wrp = (struct work_request_hdr *)txd; - -#ifdef nomore - /* - * XXX need to move into one of the helper routines above - * - */ - if ((err = busdma_map_mbufs(m, txq, txsd, segs, &nsegs)) != 0) - return (err); - m0 = *m; -#endif - ndesc = calc_tx_descs(m0, nsegs); - sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; make_sgl(sgp, segs, nsegs); sgl_flits = sgl_len(nsegs); - DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); - txq_prod(txq, ndesc, &txqs); + KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); wr_lo = htonl(V_WR_TID(txq->token)); - write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); + write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, + sgl_flits, wr_hi, wr_lo); check_ring_tx_db(pi->adapter, txq); - if ((m0->m_type == MT_DATA) && - ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) && - (m0->m_ext.ext_type != EXT_PACKET)) { - m0->m_flags &= ~M_EXT ; - cxgb_mbufs_outstanding--; - m_free(m0); + return (0); +} + +void +cxgb_tx_watchdog(void *arg) +{ + struct sge_qset *qs = arg; + struct sge_txq *txq = &qs->txq[TXQ_ETH]; + + if (qs->coalescing != 0 && + (txq->in_use <= cxgb_tx_coalesce_enable_stop) && + TXQ_RING_EMPTY(qs)) + qs->coalescing = 0; + else if (qs->coalescing == 0 && + (txq->in_use >= cxgb_tx_coalesce_enable_start)) + qs->coalescing = 1; + if (TXQ_TRYLOCK(qs)) { + qs->qs_flags |= QS_FLUSHING; + cxgb_start_locked(qs); + qs->qs_flags &= ~QS_FLUSHING; + TXQ_UNLOCK(qs); } - + if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) + callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, + qs, txq->txq_watchdog.c_cpu); +} + +static void +cxgb_tx_timeout(void *arg) +{ + struct sge_qset *qs = arg; + struct sge_txq *txq = &qs->txq[TXQ_ETH]; + + if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) + qs->coalescing = 1; + if (TXQ_TRYLOCK(qs)) { + qs->qs_flags |= QS_TIMEOUT; + cxgb_start_locked(qs); + qs->qs_flags &= ~QS_TIMEOUT; + TXQ_UNLOCK(qs); + } +} + +static void +cxgb_start_locked(struct sge_qset *qs) +{ + struct mbuf *m_head = NULL; + struct sge_txq *txq = &qs->txq[TXQ_ETH]; + int avail, txmax; + int in_use_init = txq->in_use; + struct port_info *pi = qs->port; + struct ifnet *ifp = pi->ifp; + avail = txq->size - txq->in_use - 4; + txmax = min(TX_START_MAX_DESC, avail); + + if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) + reclaim_completed_tx(qs, 0, TXQ_ETH); + + if (!pi->link_config.link_ok) { + TXQ_RING_FLUSH(qs); + return; + } + TXQ_LOCK_ASSERT(qs); + while ((txq->in_use - in_use_init < txmax) && + !TXQ_RING_EMPTY(qs) && + (ifp->if_drv_flags & IFF_DRV_RUNNING) && + pi->link_config.link_ok) { + reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); + + if ((m_head = cxgb_dequeue(qs)) == NULL) + break; + /* + * Encapsulation can modify our pointer, and or make it + * NULL on failure. In that event, we can't requeue. + */ + if (t3_encap(qs, &m_head) || m_head == NULL) + break; + + /* Send a copy of the frame to the BPF listener */ + ETHER_BPF_MTAP(ifp, m_head); + + /* + * We sent via PIO, no longer need a copy + */ + if (m_head->m_nextpkt == NULL && + m_head->m_pkthdr.len <= PIO_LEN) + m_freem(m_head); + + m_head = NULL; + } + if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && + pi->link_config.link_ok) + callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, + qs, txq->txq_timer.c_cpu); + if (m_head != NULL) + m_freem(m_head); +} + +static int +cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) +{ + struct port_info *pi = qs->port; + struct sge_txq *txq = &qs->txq[TXQ_ETH]; + struct buf_ring *br = txq->txq_mr; + int error, avail; + + avail = txq->size - txq->in_use; + TXQ_LOCK_ASSERT(qs); + + /* + * We can only do a direct transmit if the following are true: + * - we aren't coalescing (ring < 3/4 full) + * - the link is up -- checked in caller + * - there are no packets enqueued already + * - there is space in hardware transmit queue + */ + if (check_pkt_coalesce(qs) == 0 && + TXQ_RING_EMPTY(qs) && avail > 4) { + if (t3_encap(qs, &m)) { + if (m != NULL && + (error = drbr_enqueue(ifp, br, m)) != 0) + return (error); + } else { + /* + * We've bypassed the buf ring so we need to update + * the stats directly + */ + txq->txq_direct_packets++; + txq->txq_direct_bytes += m->m_pkthdr.len; + /* + ** Send a copy of the frame to the BPF + ** listener and set the watchdog on. + */ + ETHER_BPF_MTAP(ifp, m); + /* + * We sent via PIO, no longer need a copy + */ + if (m->m_pkthdr.len <= PIO_LEN) + m_freem(m); + + } + } else if ((error = drbr_enqueue(ifp, br, m)) != 0) + return (error); + + reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); + if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && + (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) + cxgb_start_locked(qs); + else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) + callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, + qs, txq->txq_timer.c_cpu); return (0); } +int +cxgb_transmit(struct ifnet *ifp, struct mbuf *m) +{ + struct sge_qset *qs; + struct port_info *pi = ifp->if_softc; + int error, qidx = pi->first_qset; + + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 + ||(!pi->link_config.link_ok)) { + m_freem(m); + return (0); + } + + if (m->m_flags & M_FLOWID) + qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; + + qs = &pi->adapter->sge.qs[qidx]; + + if (TXQ_TRYLOCK(qs)) { + /* XXX running */ + error = cxgb_transmit_locked(ifp, qs, m); + TXQ_UNLOCK(qs); + } else + error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); + return (error); +} +void +cxgb_start(struct ifnet *ifp) +{ + struct port_info *pi = ifp->if_softc; + struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset]; + + if (!pi->link_config.link_ok) + return; + + TXQ_LOCK(qs); + cxgb_start_locked(qs); + TXQ_UNLOCK(qs); +} + +void +cxgb_qflush(struct ifnet *ifp) +{ + /* + * flush any enqueued mbufs in the buf_rings + * and in the transmit queues + * no-op for now + */ + return; +} /** * write_imm - write a packet into a Tx descriptor as immediate data @@ -1492,6 +1811,7 @@ write_imm(struct tx_desc *d, struct mbuf *m, { struct work_request_hdr *from = mtod(m, struct work_request_hdr *); struct work_request_hdr *to = (struct work_request_hdr *)d; + uint32_t wr_hi, wr_lo; if (len > WR_LEN) panic("len too big %d\n", len); @@ -1499,11 +1819,12 @@ write_imm(struct tx_desc *d, struct mbuf *m, panic("len too small %d", len); memcpy(&to[1], &from[1], len - sizeof(*from)); - to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | + wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | V_WR_BCNTLFLT(len & 7)); - wmb(); - to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | + wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8)); + set_wr_hdr(to, wr_hi, wr_lo); + wmb(); wr_gen2(d, gen); /* @@ -1551,11 +1872,7 @@ addq_exit: mbufq_tail(&q->sendq, m); struct sge_qset *qs = txq_to_qset(q, qid); - printf("stopping q\n"); - setbit(&qs->txq_stopped, qid); - smp_mb(); - if (should_restart_tx(q) && test_and_clear_bit(qid, &qs->txq_stopped)) return 2; @@ -1580,8 +1897,6 @@ reclaim_completed_tx_imm(struct sge_txq *q) { unsigned int reclaim = q->processed - q->cleaned; - mtx_assert(&q->lock, MA_OWNED); - q->in_use -= reclaim; q->cleaned += reclaim; } @@ -1603,26 +1918,27 @@ immediate(const struct mbuf *m) * descriptor and have no page fragments. */ static int -ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) +ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) { int ret; struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); - + struct sge_txq *q = &qs->txq[TXQ_CTRL]; + if (__predict_false(!immediate(m))) { m_freem(m); return 0; } - wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); - wrp->wr_lo = htonl(V_WR_TID(q->token)); + wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); + wrp->wrh_lo = htonl(V_WR_TID(q->token)); - mtx_lock(&q->lock); + TXQ_LOCK(qs); again: reclaim_completed_tx_imm(q); ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); if (__predict_false(ret)) { if (ret == 1) { - mtx_unlock(&q->lock); + TXQ_UNLOCK(qs); log(LOG_ERR, "no desc available\n"); return (ENOSPC); } @@ -1635,8 +1951,7 @@ again: reclaim_completed_tx_imm(q); q->pidx = 0; q->gen ^= 1; } - mtx_unlock(&q->lock); - wmb(); + TXQ_UNLOCK(qs); t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); return (0); @@ -1659,7 +1974,7 @@ restart_ctrlq(void *data, int npending) log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use); - mtx_lock(&q->lock); + TXQ_LOCK(qs); again: reclaim_completed_tx_imm(q); while (q->in_use < q->size && @@ -1675,15 +1990,13 @@ again: reclaim_completed_tx_imm(q); } if (!mbufq_empty(&q->sendq)) { setbit(&qs->txq_stopped, TXQ_CTRL); - smp_mb(); if (should_restart_tx(q) && test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) goto again; q->stops++; } - mtx_unlock(&q->lock); - wmb(); + TXQ_UNLOCK(qs); t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } @@ -1695,10 +2008,9 @@ again: reclaim_completed_tx_imm(q); int t3_mgmt_tx(struct adapter *adap, struct mbuf *m) { - return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); + return ctrl_xmit(adap, &adap->sge.qs[0], m); } - /** * free_qset - free the resources of an SGE queue set * @sc: the controller owning the queue set @@ -1708,13 +2020,12 @@ t3_mgmt_tx(struct adapter *adap, struct mbuf *m) * as HW contexts, packet buffers, and descriptor rings. Traffic to the * queue set must be quiesced prior to calling this. */ -void +static void t3_free_qset(adapter_t *sc, struct sge_qset *q) { int i; - t3_free_tx_desc_all(&q->txq[TXQ_ETH]); - + reclaim_completed_tx(q, 0, TXQ_ETH); for (i = 0; i < SGE_TXQ_PER_SET; i++) { if (q->txq[i].txq_mr != NULL) buf_ring_free(q->txq[i].txq_mr, M_DEVBUF); @@ -1741,6 +2052,8 @@ t3_free_qset(adapter_t *sc, struct sge_qset *q) } } + mtx_unlock(&q->lock); + MTX_DESTROY(&q->lock); for (i = 0; i < SGE_TXQ_PER_SET; i++) { if (q->txq[i].desc) { mtx_lock_spin(&sc->sge.reg_lock); @@ -1752,7 +2065,6 @@ t3_free_qset(adapter_t *sc, struct sge_qset *q) q->txq[i].desc_map); bus_dma_tag_destroy(q->txq[i].desc_tag); bus_dma_tag_destroy(q->txq[i].entry_tag); - MTX_DESTROY(&q->txq[i].lock); } if (q->txq[i].sdesc) { free(q->txq[i].sdesc, M_DEVBUF); @@ -1789,14 +2101,14 @@ t3_free_sge_resources(adapter_t *sc) { int i, nqsets; -#ifdef IFNET_MULTIQUEUE - panic("%s should not be called when IFNET_MULTIQUEUE is defined", __FUNCTION__); -#endif for (nqsets = i = 0; i < (sc)->params.nports; i++) nqsets += sc->port[i].nqsets; - for (i = 0; i < nqsets; ++i) + for (i = 0; i < nqsets; ++i) { + TXQ_LOCK(&sc->sge.qs[i]); t3_free_qset(sc, &sc->sge.qs[i]); + } + } /** @@ -1865,31 +2177,32 @@ t3_sge_stop(adapter_t *sc) * Returns number of buffers of reclaimed */ void -t3_free_tx_desc(struct sge_txq *q, int reclaimable) +t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) { struct tx_sw_desc *txsd; - unsigned int cidx; - + unsigned int cidx, mask; + struct sge_txq *q = &qs->txq[queue]; + #ifdef T3_TRACE T3_TRACE2(sc->tb[q->cntxt_id & 7], "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); #endif cidx = q->cidx; + mask = q->size - 1; txsd = &q->sdesc[cidx]; - DPRINTF("reclaiming %d WR\n", reclaimable); - mtx_assert(&q->lock, MA_OWNED); + + mtx_assert(&qs->lock, MA_OWNED); while (reclaimable--) { - DPRINTF("cidx=%d d=%p\n", cidx, txsd); - if (txsd->mi.mi_base != NULL) { + prefetch(q->sdesc[(cidx + 1) & mask].m); + prefetch(q->sdesc[(cidx + 2) & mask].m); + + if (txsd->m != NULL) { if (txsd->flags & TX_SW_DESC_MAPPED) { bus_dmamap_unload(q->entry_tag, txsd->map); txsd->flags &= ~TX_SW_DESC_MAPPED; } - m_freem_iovec(&txsd->mi); -#if 0 - buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__); -#endif - txsd->mi.mi_base = NULL; + m_freem_list(txsd->m); + txsd->m = NULL; } else q->txq_skipped++; @@ -1903,25 +2216,6 @@ t3_free_tx_desc(struct sge_txq *q, int reclaimable) } -void -t3_free_tx_desc_all(struct sge_txq *q) -{ - int i; - struct tx_sw_desc *txsd; - - for (i = 0; i < q->size; i++) { - txsd = &q->sdesc[i]; - if (txsd->mi.mi_base != NULL) { - if (txsd->flags & TX_SW_DESC_MAPPED) { - bus_dmamap_unload(q->entry_tag, txsd->map); - txsd->flags &= ~TX_SW_DESC_MAPPED; - } - m_freem_iovec(&txsd->mi); - bzero(&txsd->mi, sizeof(txsd->mi)); - } - } -} - /** * is_new_response - check if a response is newly written * @r: the response descriptor @@ -1990,7 +2284,7 @@ write_ofld_wr(adapter_t *adap, struct mbuf *m, txqs.compl = 0; write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, - from->wr_hi, from->wr_lo); + from->wrh_hi, from->wrh_lo); } /** @@ -2009,11 +2303,12 @@ calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) if (m->m_len <= WR_LEN && nsegs == 0) return (1); /* packet fits as immediate data */ - if (m->m_flags & M_IOVEC) - cnt = mtomv(m)->mv_count; - else - cnt = nsegs; + /* + * This needs to be re-visited for TOE + */ + cnt = nsegs; + /* headers */ flits = m->m_len / 8; @@ -2031,11 +2326,12 @@ calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) * Send an offload packet through an SGE offload queue. */ static int -ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) +ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) { int ret, nsegs; unsigned int ndesc; unsigned int pidx, gen; + struct sge_txq *q = &qs->txq[TXQ_OFLD]; bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs; struct tx_sw_desc *stx; @@ -2045,17 +2341,16 @@ ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) busdma_map_sgl(vsegs, segs, nsegs); stx = &q->sdesc[q->pidx]; - KASSERT(stx->mi.mi_base == NULL, ("mi_base set")); - mtx_lock(&q->lock); -again: reclaim_completed_tx_(q, 16); + TXQ_LOCK(qs); +again: reclaim_completed_tx(qs, 16, TXQ_OFLD); ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); if (__predict_false(ret)) { if (ret == 1) { printf("no ofld desc avail\n"); m_set_priority(m, ndesc); /* save for restart */ - mtx_unlock(&q->lock); + TXQ_UNLOCK(qs); return (EINTR); } goto again; @@ -2075,7 +2370,7 @@ again: reclaim_completed_tx_(q, 16); ndesc, pidx, skb->len, skb->len - skb->data_len, skb_shinfo(skb)->nr_frags); #endif - mtx_unlock(&q->lock); + TXQ_UNLOCK(qs); write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); check_ring_tx_db(adap, q); @@ -2099,8 +2394,8 @@ restart_offloadq(void *data, int npending) struct tx_sw_desc *stx = &q->sdesc[q->pidx]; int nsegs, cleaned; - mtx_lock(&q->lock); -again: cleaned = reclaim_completed_tx_(q, 16); + TXQ_LOCK(qs); +again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD); while ((m = mbufq_peek(&q->sendq)) != NULL) { unsigned int gen, pidx; @@ -2108,8 +2403,6 @@ again: cleaned = reclaim_completed_tx_(q, 16); if (__predict_false(q->size - q->in_use < ndesc)) { setbit(&qs->txq_stopped, TXQ_OFLD); - smp_mb(); - if (should_restart_tx(q) && test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) goto again; @@ -2128,16 +2421,15 @@ again: cleaned = reclaim_completed_tx_(q, 16); (void)mbufq_dequeue(&q->sendq); busdma_map_mbufs(&m, q, stx, segs, &nsegs); - mtx_unlock(&q->lock); + TXQ_UNLOCK(qs); write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); - mtx_lock(&q->lock); + TXQ_LOCK(qs); } - mtx_unlock(&q->lock); - #if USE_GTS set_bit(TXQ_RUNNING, &q->flags); set_bit(TXQ_LAST_PKT_DB, &q->flags); #endif + TXQ_UNLOCK(qs); wmb(); t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); @@ -2185,9 +2477,9 @@ t3_offload_tx(struct t3cdev *tdev, struct mbuf *m) struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; if (__predict_false(is_ctrl_pkt(m))) - return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m); + return ctrl_xmit(adap, qs, m); - return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m); + return ofld_xmit(adap, qs, m); } /** @@ -2274,12 +2566,15 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, const struct qset_params *p, int ntxq, struct port_info *pi) { struct sge_qset *q = &sc->sge.qs[id]; - int i, header_size, ret = 0; + int i, ret = 0; + + MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); + q->port = pi; for (i = 0; i < SGE_TXQ_PER_SET; i++) { if ((q->txq[i].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, - M_DEVBUF, M_WAITOK, &q->txq[i].lock)) == NULL) { + M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { device_printf(sc->dev, "failed to allocate mbuf ring\n"); goto err; } @@ -2289,7 +2584,11 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, device_printf(sc->dev, "failed to allocate ifq\n"); goto err; } - ifq_init(q->txq[i].txq_ifq, pi->ifp); + ifq_init(q->txq[i].txq_ifq, pi->ifp); + callout_init(&q->txq[i].txq_timer, 1); + callout_init(&q->txq[i].txq_watchdog, 1); + q->txq[i].txq_timer.c_cpu = id % mp_ncpus; + q->txq[i].txq_watchdog.c_cpu = id % mp_ncpus; } init_qset_cntxt(q, id); q->idx = id; @@ -2320,11 +2619,6 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, } for (i = 0; i < ntxq; ++i) { - /* - * The control queue always uses immediate data so does not - * need to keep track of any mbufs. - * XXX Placeholder for future TOE support. - */ size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); if ((ret = alloc_ring(sc, p->txq_size[i], @@ -2339,17 +2633,12 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, mbufq_init(&q->txq[i].sendq); q->txq[i].gen = 1; q->txq[i].size = p->txq_size[i]; - snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d", - device_get_unit(sc->dev), irq_vec_idx, i); - MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF); } - - q->txq[TXQ_ETH].port = pi; TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); - TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]); - TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]); + TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); + TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); q->fl[0].gen = q->fl[1].gen = 1; q->fl[0].size = p->fl_size; @@ -2359,26 +2648,24 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, q->rspq.cidx = 0; q->rspq.size = p->rspq_size; - - header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); q->txq[TXQ_ETH].stop_thres = nports * flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); - q->fl[0].buf_size = (MCLBYTES - header_size); - q->fl[0].zone = zone_clust; - q->fl[0].type = EXT_CLUSTER; + q->fl[0].buf_size = MCLBYTES; + q->fl[0].zone = zone_pack; + q->fl[0].type = EXT_PACKET; #if __FreeBSD_version > 800000 if (cxgb_use_16k_clusters) { - q->fl[1].buf_size = MJUM16BYTES - header_size; + q->fl[1].buf_size = MJUM16BYTES; q->fl[1].zone = zone_jumbo16; q->fl[1].type = EXT_JUMBO16; } else { - q->fl[1].buf_size = MJUM9BYTES - header_size; + q->fl[1].buf_size = MJUM9BYTES; q->fl[1].zone = zone_jumbo9; q->fl[1].type = EXT_JUMBO9; } #else - q->fl[1].buf_size = MJUMPAGESIZE - header_size; + q->fl[1].buf_size = MJUMPAGESIZE; q->fl[1].zone = zone_jumbop; q->fl[1].type = EXT_JUMBOP; #endif @@ -2466,6 +2753,7 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, err_unlock: mtx_unlock_spin(&sc->sge.reg_lock); err: + TXQ_LOCK(q); t3_free_qset(sc, q); return (ret); @@ -2504,9 +2792,6 @@ t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) m->m_pkthdr.rcvif = ifp; m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; -#ifndef DISABLE_MBUF_IOVEC - m_explode(m); -#endif /* * adjust after conversion to mbuf chain */ @@ -2515,53 +2800,6 @@ t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) m->m_data += (sizeof(*cpl) + ethpad); } -static void -ext_free_handler(void *arg1, void * arg2) -{ - uintptr_t type = (uintptr_t)arg2; - uma_zone_t zone; - struct mbuf *m; - - m = arg1; - zone = m_getzonefromtype(type); - m->m_ext.ext_type = (int)type; - cxgb_ext_freed++; - cxgb_cache_put(zone, m); -} - -static void -init_cluster_mbuf(caddr_t cl, int flags, int type, uma_zone_t zone) -{ - struct mbuf *m; - int header_size; - - header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + - sizeof(struct m_ext_) + sizeof(uint32_t); - - bzero(cl, header_size); - m = (struct mbuf *)cl; - - cxgb_ext_inited++; - SLIST_INIT(&m->m_pkthdr.tags); - m->m_type = MT_DATA; - m->m_flags = flags | M_NOFREE | M_EXT; - m->m_data = cl + header_size; - m->m_ext.ext_buf = cl; - m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t)); - m->m_ext.ext_size = m_getsizefromtype(type); - m->m_ext.ext_free = ext_free_handler; -#if __FreeBSD_version >= 800016 - m->m_ext.ext_arg1 = cl; - m->m_ext.ext_arg2 = (void *)(uintptr_t)type; -#else - m->m_ext.ext_args = (void *)(uintptr_t)type; -#endif - m->m_ext.ext_type = EXT_EXTREF; - *(m->m_ext.ref_cnt) = 1; - DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt); -} - - /** * get_packet - return the next ingress packet buffer from a free list * @adap: the adapter that received the packet @@ -2578,8 +2816,6 @@ init_cluster_mbuf(caddr_t cl, int flags, int type, uma_zone_t zone) * threshold and the packet is too big to copy, or (b) the packet should * be copied but there is no memory for the copy. */ -#ifdef DISABLE_MBUF_IOVEC - static int get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, struct t3_mbuf_hdr *mh, struct rsp_desc *r) @@ -2587,49 +2823,69 @@ get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, unsigned int len_cq = ntohl(r->len_cq); struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; - struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; + int mask, cidx = fl->cidx; + struct rx_sw_desc *sd = &fl->sdesc[cidx]; uint32_t len = G_RSPD_LEN(len_cq); - uint32_t flags = ntohl(r->flags); - uint8_t sopeop = G_RSPD_SOP_EOP(flags); + uint32_t flags = M_EXT; + uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); caddr_t cl; - struct mbuf *m, *m0; + struct mbuf *m; int ret = 0; - - prefetch(sd->rxsd_cl); + + mask = fl->size - 1; + prefetch(fl->sdesc[(cidx + 1) & mask].m); + prefetch(fl->sdesc[(cidx + 2) & mask].m); + prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); + prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); fl->credits--; bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); - if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { - if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) + if (recycle_enable && len <= SGE_RX_COPY_THRES && + sopeop == RSPQ_SOP_EOP) { + if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) goto skip_recycle; - cl = mtod(m0, void *); - memcpy(cl, sd->data, len); + cl = mtod(m, void *); + memcpy(cl, sd->rxsd_cl, len); recycle_rx_buf(adap, fl, fl->cidx); - m = m0; - m0->m_len = len; + m->m_pkthdr.len = m->m_len = len; + m->m_flags = 0; + mh->mh_head = mh->mh_tail = m; + ret = 1; + goto done; } else { skip_recycle: - bus_dmamap_unload(fl->entry_tag, sd->map); cl = sd->rxsd_cl; - m = m0 = (struct mbuf *)cl; + m = sd->m; if ((sopeop == RSPQ_SOP_EOP) || (sopeop == RSPQ_SOP)) - flags = M_PKTHDR; - init_cluster_mbuf(cl, flags, fl->type, fl->zone); - m0->m_len = len; + flags |= M_PKTHDR; + if (fl->zone == zone_pack) { + m_init(m, zone_pack, MCLBYTES, M_NOWAIT, MT_DATA, flags); + /* + * restore clobbered data pointer + */ + m->m_data = m->m_ext.ext_buf; + } else { + m_cljset(m, cl, fl->type); + m->m_flags = flags; + } + m->m_len = len; } switch(sopeop) { case RSPQ_SOP_EOP: - DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); + ret = 1; + /* FALLTHROUGH */ + case RSPQ_SOP: mh->mh_head = mh->mh_tail = m; m->m_pkthdr.len = len; - ret = 1; break; + case RSPQ_EOP: + ret = 1; + /* FALLTHROUGH */ case RSPQ_NSOP_NEOP: - DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); if (mh->mh_tail == NULL) { log(LOG_ERR, "discarding intermediate descriptor entry\n"); m_freem(m); @@ -2638,105 +2894,17 @@ get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, mh->mh_tail->m_next = m; mh->mh_tail = m; mh->mh_head->m_pkthdr.len += len; - ret = 0; - break; - case RSPQ_SOP: - DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); - m->m_pkthdr.len = len; - mh->mh_head = mh->mh_tail = m; - ret = 0; - break; - case RSPQ_EOP: - DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); - mh->mh_head->m_pkthdr.len += len; - mh->mh_tail->m_next = m; - mh->mh_tail = m; - ret = 1; break; } + if (cxgb_debug) + printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); +done: if (++fl->cidx == fl->size) fl->cidx = 0; return (ret); } -#else - -static int -get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, - struct mbuf **m, struct rsp_desc *r) -{ - - unsigned int len_cq = ntohl(r->len_cq); - struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; - struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; - uint32_t len = G_RSPD_LEN(len_cq); - uint32_t flags = ntohl(r->flags); - uint8_t sopeop = G_RSPD_SOP_EOP(flags); - void *cl; - int ret = 0; - struct mbuf *m0; -#if 0 - if ((sd + 1 )->rxsd_cl) - prefetch((sd + 1)->rxsd_cl); - if ((sd + 2)->rxsd_cl) - prefetch((sd + 2)->rxsd_cl); -#endif - DPRINTF("rx cpu=%d\n", curcpu); - fl->credits--; - bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); - - if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { - if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) - goto skip_recycle; - cl = mtod(m0, void *); - memcpy(cl, sd->data, len); - recycle_rx_buf(adap, fl, fl->cidx); - *m = m0; - } else { - skip_recycle: - bus_dmamap_unload(fl->entry_tag, sd->map); - cl = sd->rxsd_cl; - *m = m0 = (struct mbuf *)cl; - } - - switch(sopeop) { - case RSPQ_SOP_EOP: - DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); - if (cl == sd->rxsd_cl) - init_cluster_mbuf(cl, M_PKTHDR, fl->type, fl->zone); - m0->m_len = m0->m_pkthdr.len = len; - ret = 1; - goto done; - break; - case RSPQ_NSOP_NEOP: - DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); - panic("chaining unsupported"); - ret = 0; - break; - case RSPQ_SOP: - DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); - panic("chaining unsupported"); - m_iovinit(m0); - ret = 0; - break; - case RSPQ_EOP: - DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); - panic("chaining unsupported"); - ret = 1; - break; - } - panic("append not supported"); -#if 0 - m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref); -#endif -done: - if (++fl->cidx == fl->size) - fl->cidx = 0; - - return (ret); -} -#endif /** * handle_rsp_cntrl_info - handles control information in a response * @qs: the queue set corresponding to the response @@ -2795,7 +2963,7 @@ check_ring_db(adapter_t *adap, struct sge_qset *qs, * on this queue. If the system is under memory shortage use a fairly * long delay to help recovery. */ -int +static int process_responses(adapter_t *adap, struct sge_qset *qs, int budget) { struct sge_rspq *rspq = &qs->rspq; @@ -2838,9 +3006,6 @@ process_responses(adapter_t *adap, struct sge_qset *qs, int budget) } else { m = m_gethdr(M_DONTWAIT, MT_DATA); } - - /* XXX m is lost here if rspq->rspq_mbuf is not NULL */ - if (m == NULL) goto no_mem; @@ -2873,18 +3038,14 @@ process_responses(adapter_t *adap, struct sge_qset *qs, int budget) } else if (r->len_cq) { int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; -#ifdef DISABLE_MBUF_IOVEC eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r); -#else - eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r); -#endif -#ifdef IFNET_MULTIQUEUE - rspq->rspq_mh.mh_head->m_flags |= M_FLOWID; - rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash; -#endif + if (eop) { + rspq->rspq_mh.mh_head->m_flags |= M_FLOWID; + rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash; + } + ethpad = 2; } else { - DPRINTF("pure response\n"); rspq->pure_rsps++; } skip: @@ -2899,13 +3060,11 @@ process_responses(adapter_t *adap, struct sge_qset *qs, int budget) rspq->gen ^= 1; r = rspq->desc; } - prefetch(r); + if (++rspq->credits >= (rspq->size / 4)) { refill_rspq(adap, rspq, rspq->credits); rspq->credits = 0; } - DPRINTF("eth=%d eop=%d flags=0x%x\n", eth, eop, flags); - if (!eth && eop) { rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum; /* @@ -2921,8 +3080,6 @@ process_responses(adapter_t *adap, struct sge_qset *qs, int budget) } else if (eth && eop) { struct mbuf *m = rspq->rspq_mh.mh_head; - prefetch(mtod(m, uint8_t *)); - prefetch(mtod(m, uint8_t *) + L1_CACHE_BYTES); t3_rx_eth(adap, rspq, m, ethpad); @@ -2951,7 +3108,6 @@ process_responses(adapter_t *adap, struct sge_qset *qs, int budget) struct ifnet *ifp = m->m_pkthdr.rcvif; (*ifp->if_input)(ifp, m); } - DPRINTF("received tunnel packet\n"); rspq->rspq_mh.mh_head = NULL; } @@ -2974,7 +3130,7 @@ process_responses(adapter_t *adap, struct sge_qset *qs, int budget) if (sleeping) check_ring_db(adap, qs, sleeping); - smp_mb(); /* commit Tx queue processed updates */ + mb(); /* commit Tx queue processed updates */ if (__predict_false(qs->txq_stopped > 1)) { printf("restarting tx on %p\n", qs); @@ -3068,17 +3224,9 @@ t3_intr_msix(void *data) struct sge_qset *qs = data; adapter_t *adap = qs->port->adapter; struct sge_rspq *rspq = &qs->rspq; -#ifndef IFNET_MULTIQUEUE - mtx_lock(&rspq->lock); -#else - if (mtx_trylock(&rspq->lock)) -#endif - { - - if (process_responses_gts(adap, rspq) == 0) - rspq->unhandled_irqs++; - mtx_unlock(&rspq->lock); - } + + if (process_responses_gts(adap, rspq) == 0) + rspq->unhandled_irqs++; } #define QDUMP_SBUF_SIZE 32 * 400 @@ -3357,53 +3505,13 @@ t3_add_attach_sysctls(adapter_t *sc) "enable_debug", CTLFLAG_RW, &cxgb_debug, 0, "enable verbose debugging output"); - SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tunq_coalesce", + SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce", CTLFLAG_RD, &sc->tunq_coalesce, "#tunneled packets freed"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "txq_overrun", CTLFLAG_RD, &txq_fills, 0, "#times txq overrun"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "pcpu_cache_enable", - CTLFLAG_RW, &cxgb_pcpu_cache_enable, - 0, "#enable driver local pcpu caches"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "multiq_tx_enable", - CTLFLAG_RW, &multiq_tx_enable, - 0, "enable transmit by multiple tx queues"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "coalesce_tx_enable", - CTLFLAG_RW, &coalesce_tx_enable, - 0, "coalesce small packets in work requests - WARNING ALPHA"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "wakeup_tx_thread", - CTLFLAG_RW, &wakeup_tx_thread, - 0, "wakeup tx thread if no transmitter running"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "cache_alloc", - CTLFLAG_RD, &cxgb_cached_allocations, - 0, "#times a cluster was allocated from cache"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "cached", - CTLFLAG_RD, &cxgb_cached, - 0, "#times a cluster was cached"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "ext_freed", - CTLFLAG_RD, &cxgb_ext_freed, - 0, "#times a cluster was freed through ext_free"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "ext_inited", - CTLFLAG_RD, &cxgb_ext_inited, - 0, "#times a cluster was initialized for ext_free"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "mbufs_outstanding", - CTLFLAG_RD, &cxgb_mbufs_outstanding, - 0, "#mbufs in flight in the driver"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "pack_outstanding", - CTLFLAG_RD, &cxgb_pack_outstanding, - 0, "#packet in flight in the driver"); } @@ -3425,7 +3533,6 @@ sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) return (EINVAL); parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); - PORT_LOCK(p); t3_mac_update_stats(&p->mac); PORT_UNLOCK(p); @@ -3553,9 +3660,9 @@ t3_add_configured_sysctls(adapter_t *sc) SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", CTLFLAG_RD, &txq->txq_skipped, 0, "#tunneled packet descriptors skipped"); - SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "coalesced", + SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", CTLFLAG_RD, &txq->txq_coalesced, - 0, "#tunneled packets coalesced"); + "#tunneled packets coalesced"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", CTLFLAG_RD, &txq->txq_enqueued, 0, "#tunneled packets enqueued to hardware"); diff --git a/sys/dev/cxgb/sys/cxgb_support.c b/sys/dev/cxgb/sys/cxgb_support.c deleted file mode 100644 index 55cf0d5..0000000 --- a/sys/dev/cxgb/sys/cxgb_support.c +++ /dev/null @@ -1,305 +0,0 @@ -/************************************************************************** - -Copyright (c) 2007, Chelsio Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Neither the name of the Chelsio Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - -***************************************************************************/ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/kernel.h> -#include <sys/malloc.h> -#include <sys/queue.h> - - -#include <sys/proc.h> -#include <sys/sched.h> -#include <sys/smp.h> -#include <sys/systm.h> -#include <vm/vm.h> -#include <vm/pmap.h> - -#include <cxgb_include.h> -#include <sys/mvec.h> - -extern int cxgb_use_16k_clusters; -int cxgb_pcpu_cache_enable = 1; - -struct buf_stack { - caddr_t *bs_stack; - volatile int bs_head; - int bs_size; -}; - -static __inline int -buf_stack_push(struct buf_stack *bs, caddr_t buf) -{ - if (bs->bs_head + 1 >= bs->bs_size) - return (ENOSPC); - - bs->bs_stack[++(bs->bs_head)] = buf; - return (0); -} - -static __inline caddr_t -buf_stack_pop(struct buf_stack *bs) -{ - if (bs->bs_head < 0) - return (NULL); - - return (bs->bs_stack[(bs->bs_head)--]); -} - -/* - * Stack is full - * - */ -static __inline int -buf_stack_avail(struct buf_stack *bs) -{ - return (bs->bs_size - bs->bs_head - 1); -} - -struct cxgb_cache_pcpu { - struct buf_stack ccp_jumbo_free; - struct buf_stack ccp_cluster_free; - uma_zone_t ccp_jumbo_zone; -}; - -struct cxgb_cache_system { - struct cxgb_cache_pcpu ccs_array[0]; -} *cxgb_caches; - -static int -buf_stack_init(struct buf_stack *bs, int size) -{ - bs->bs_size = size; - bs->bs_head = -1; - if((bs->bs_stack = malloc(sizeof(caddr_t)*size, M_DEVBUF, M_NOWAIT)) == NULL) - return (ENOMEM); - - return (0); -} - -static void -buf_stack_deinit(struct buf_stack *bs) -{ - if (bs->bs_stack != NULL) - free(bs->bs_stack, M_DEVBUF); -} - -static int -cxgb_cache_pcpu_init(struct cxgb_cache_pcpu *ccp) -{ - int err; - - if ((err = buf_stack_init(&ccp->ccp_jumbo_free, (JUMBO_Q_SIZE >> 2)))) - return (err); - - if ((err = buf_stack_init(&ccp->ccp_cluster_free, (FL_Q_SIZE >> 2)))) - return (err); - -#if __FreeBSD_version > 800000 - if (cxgb_use_16k_clusters) - ccp->ccp_jumbo_zone = zone_jumbo16; - else - ccp->ccp_jumbo_zone = zone_jumbo9; -#else - ccp->ccp_jumbo_zone = zone_jumbop; -#endif - return (0); -} - -static void -cxgb_cache_pcpu_deinit(struct cxgb_cache_pcpu *ccp) -{ - void *cl; - - while ((cl = buf_stack_pop(&ccp->ccp_jumbo_free)) != NULL) - uma_zfree(ccp->ccp_jumbo_zone, cl); - while ((cl = buf_stack_pop(&ccp->ccp_cluster_free)) != NULL) - uma_zfree(zone_clust, cl); - - buf_stack_deinit(&ccp->ccp_jumbo_free); - buf_stack_deinit(&ccp->ccp_cluster_free); - -} - -static int inited = 0; - -int -cxgb_cache_init(void) -{ - int i, err; - - if (inited++ > 0) - return (0); - - if ((cxgb_caches = malloc(sizeof(struct cxgb_cache_pcpu)*mp_ncpus, M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) - return (ENOMEM); - - for (i = 0; i < mp_ncpus; i++) - if ((err = cxgb_cache_pcpu_init(&cxgb_caches->ccs_array[i]))) - goto err; - - return (0); -err: - cxgb_cache_flush(); - - return (err); -} - -void -cxgb_cache_flush(void) -{ - int i; - - if (--inited > 0) - return; - - if (cxgb_caches == NULL) - return; - - for (i = 0; i < mp_ncpus; i++) - cxgb_cache_pcpu_deinit(&cxgb_caches->ccs_array[i]); - - free(cxgb_caches, M_DEVBUF); - cxgb_caches = NULL; -} - -caddr_t -cxgb_cache_get(uma_zone_t zone) -{ - caddr_t cl = NULL; - struct cxgb_cache_pcpu *ccp; - - if (cxgb_pcpu_cache_enable) { - critical_enter(); - ccp = &cxgb_caches->ccs_array[curcpu]; - if (zone == zone_clust) { - cl = buf_stack_pop(&ccp->ccp_cluster_free); - } else if (zone == ccp->ccp_jumbo_zone) { - cl = buf_stack_pop(&ccp->ccp_jumbo_free); - } - critical_exit(); - } - - if (cl == NULL) - cl = uma_zalloc(zone, M_NOWAIT); - else - cxgb_cached_allocations++; - - return (cl); -} - -void -cxgb_cache_put(uma_zone_t zone, void *cl) -{ - struct cxgb_cache_pcpu *ccp; - int err = ENOSPC; - - if (cxgb_pcpu_cache_enable) { - critical_enter(); - ccp = &cxgb_caches->ccs_array[curcpu]; - if (zone == zone_clust) { - err = buf_stack_push(&ccp->ccp_cluster_free, cl); - } else if (zone == ccp->ccp_jumbo_zone){ - err = buf_stack_push(&ccp->ccp_jumbo_free, cl); - } - critical_exit(); - } - - if (err) - uma_zfree(zone, cl); - else - cxgb_cached++; -} - -void -cxgb_cache_refill(void) -{ - struct cxgb_cache_pcpu *ccp; - caddr_t vec[8]; - uma_zone_t zone; - int i, count; - - - return; -restart: - critical_enter(); - ccp = &cxgb_caches->ccs_array[curcpu]; - zone = ccp->ccp_jumbo_zone; - if (!buf_stack_avail(&ccp->ccp_jumbo_free) && - !buf_stack_avail(&ccp->ccp_cluster_free)) { - critical_exit(); - return; - } - critical_exit(); - - - - for (i = 0; i < 8; i++) - if ((vec[i] = uma_zalloc(zone, M_NOWAIT)) == NULL) - goto free; - - critical_enter(); - ccp = &cxgb_caches->ccs_array[curcpu]; - for (i = 0; i < 8 && buf_stack_avail(&ccp->ccp_jumbo_free); i++) - if (buf_stack_push(&ccp->ccp_jumbo_free, vec[i])) - break; - critical_exit(); - - for (; i < 8; i++) - uma_zfree(zone, vec[i]); - - - - zone = zone_clust; - for (i = 0; i < 8; i++) - if ((vec[i] = uma_zalloc(zone, M_NOWAIT)) == NULL) - goto free; - - critical_enter(); - ccp = &cxgb_caches->ccs_array[curcpu]; - for (i = 0; i < 8 && buf_stack_avail(&ccp->ccp_cluster_free); i++) - if (buf_stack_push(&ccp->ccp_cluster_free, vec[i])) - break; - critical_exit(); - - for (; i < 8; i++) - uma_zfree(zone, vec[i]); - - goto restart; - - -free: - count = i; - for (; i < count; i++) - uma_zfree(zone, vec[i]); -} - diff --git a/sys/dev/cxgb/sys/mvec.h b/sys/dev/cxgb/sys/mvec.h index 855c039..e031948 100644 --- a/sys/dev/cxgb/sys/mvec.h +++ b/sys/dev/cxgb/sys/mvec.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright (c) 2007, Kip Macy kmacy@freebsd.org + * Copyright (c) 2007,2009 Kip Macy kmacy@freebsd.org * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,156 +33,35 @@ #define _MVEC_H_ #include <machine/bus.h> -int cxgb_cache_init(void); - -void cxgb_cache_flush(void); - -caddr_t cxgb_cache_get(uma_zone_t zone); - -void cxgb_cache_put(uma_zone_t zone, void *cl); - -void cxgb_cache_refill(void); - -extern int cxgb_cached_allocations; -extern int cxgb_cached; -extern int cxgb_ext_freed; -extern int cxgb_mbufs_outstanding; -extern int cxgb_pack_outstanding; - -#define mtomv(m) ((struct mbuf_vec *)((m)->m_pktdat)) -#define M_IOVEC 0x100000 /* mbuf immediate data area is used for cluster ptrs */ #define M_DDP 0x200000 /* direct data placement mbuf */ #define EXT_PHYS 10 /* physical/bus address */ -/* - * duplication from mbuf.h - can't use directly because - * m_ext is a define - */ -struct m_ext_ { - caddr_t ext_buf; /* start of buffer */ - void (*ext_free) /* free routine if not the usual */ - (void *, void *); -#if __FreeBSD_version >= 800016 - void *ext_arg1; /* optional argument pointer */ - void *ext_arg2; /* optional argument pointer */ -#else - void *ext_args; /* optional argument pointer */ -#endif - u_int ext_size; /* size of buffer, for ext_free */ - volatile u_int *ref_cnt; /* pointer to ref count info */ - int ext_type; /* type of external storage */ -}; - -#define MT_IOVEC 9 -#define MT_CLIOVEC 10 - -#define EXT_IOVEC 8 -#define EXT_CLIOVEC 9 -#define EXT_JMPIOVEC 10 - #define m_cur_offset m_ext.ext_size /* override to provide ddp offset */ #define m_seq m_pkthdr.csum_data /* stored sequence */ #define m_ddp_gl m_ext.ext_buf /* ddp list */ #define m_ddp_flags m_pkthdr.csum_flags /* ddp flags */ #define m_ulp_mode m_pkthdr.tso_segsz /* upper level protocol */ -extern uma_zone_t zone_miovec; - -struct mbuf_iovec { - struct m_ext_ mi_ext; - uint32_t mi_flags; - uint32_t mi_len; - caddr_t mi_data; - uint16_t mi_tso_segsz; - uint16_t mi_ether_vtag; - uint16_t mi_rss_hash; /* this can be shrunk down if something comes - * along that needs 1 byte - */ - uint16_t mi_pad; - struct mbuf *mi_mbuf; /* need to be able to handle the @#$@@#%$ing packet zone */ -#define mi_size mi_ext.ext_size -#define mi_base mi_ext.ext_buf -#define mi_args mi_ext.ext_args -#define mi_size mi_ext.ext_size -#define mi_size mi_ext.ext_size -#define mi_refcnt mi_ext.ref_cnt -#define mi_ext_free mi_ext.ext_free -#define mi_ext_flags mi_ext.ext_flags -#define mi_type mi_ext.ext_type -}; - -#define MIOVBYTES 512 -#define MAX_MBUF_IOV ((MHLEN-8)/sizeof(struct mbuf_iovec)) -#define MAX_MIOVEC_IOV ((MIOVBYTES-sizeof(struct m_hdr)-sizeof(struct pkthdr)-8)/sizeof(struct mbuf_iovec)) -#define MAX_CL_IOV ((MCLBYTES-sizeof(struct m_hdr)-sizeof(struct pkthdr)-8)/sizeof(struct mbuf_iovec)) -#define MAX_PAGE_IOV ((MJUMPAGESIZE-sizeof(struct m_hdr)-sizeof(struct pkthdr)-8)/sizeof(struct mbuf_iovec)) - -struct mbuf_vec { - uint16_t mv_first; /* first valid cluster */ - uint16_t mv_count; /* # of clusters */ - uint32_t mv_flags; /* flags for iovec */ - struct mbuf_iovec mv_vec[0]; /* depends on whether or not this is in a cluster or an mbuf */ -}; -void mi_init(void); -void mi_deinit(void); - -int _m_explode(struct mbuf *); -void mb_free_vec(struct mbuf *m); - -static __inline void -m_iovinit(struct mbuf *m) -{ - struct mbuf_vec *mv = mtomv(m); - - mv->mv_first = mv->mv_count = 0; - m->m_pkthdr.len = m->m_len = 0; - m->m_flags |= M_IOVEC; -} - -static __inline void -m_iovappend(struct mbuf *m, uint8_t *cl, int size, int len, caddr_t data, volatile uint32_t *ref) -{ - struct mbuf_vec *mv = mtomv(m); - struct mbuf_iovec *iov; - int idx = mv->mv_first + mv->mv_count; - - KASSERT(idx <= MAX_MBUF_IOV, ("tried to append too many clusters to mbuf iovec")); - if ((m->m_flags & M_EXT) != 0) - panic("invalid flags in %s", __func__); - - if (mv->mv_count == 0) - m->m_data = data; - - iov = &mv->mv_vec[idx]; - iov->mi_type = m_gettype(size); - iov->mi_base = cl; - iov->mi_len = len; - iov->mi_data = data; - iov->mi_refcnt = ref; - m->m_pkthdr.len += len; - m->m_len += len; - mv->mv_count++; -} - -static __inline int -m_explode(struct mbuf *m) -{ - if ((m->m_flags & M_IOVEC) == 0) - return (0); - - return _m_explode(m); -} - static __inline void -busdma_map_mbuf_fast(struct mbuf *m, bus_dma_segment_t *seg) +busdma_map_mbuf_fast(struct sge_txq *txq, struct tx_sw_desc *txsd, + struct mbuf *m, bus_dma_segment_t *seg) { +#if defined(__i386__) || defined(__amd64__) seg->ds_addr = pmap_kextract(mtod(m, vm_offset_t)); seg->ds_len = m->m_len; +#else + int nsegstmp; + + bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m, seg, + &nsegstmp, 0); +#endif } -int busdma_map_sg_collapse(struct mbuf **m, bus_dma_segment_t *segs, int *nsegs); -int busdma_map_sg_vec(struct mbuf **m, struct mbuf **mp, bus_dma_segment_t *segs, int count); -static __inline int busdma_map_sgl(bus_dma_segment_t *vsegs, bus_dma_segment_t *segs, int count) +int busdma_map_sg_collapse(struct sge_txq *txq, struct tx_sw_desc *txsd, + struct mbuf **m, bus_dma_segment_t *segs, int *nsegs); +void busdma_map_sg_vec(struct sge_txq *txq, struct tx_sw_desc *txsd, struct mbuf *m, bus_dma_segment_t *segs, int *nsegs); +static __inline int +busdma_map_sgl(bus_dma_segment_t *vsegs, bus_dma_segment_t *segs, int count) { while (count--) { segs->ds_addr = pmap_kextract((vm_offset_t)vsegs->ds_addr); @@ -193,156 +72,19 @@ static __inline int busdma_map_sgl(bus_dma_segment_t *vsegs, bus_dma_segment_t * return (0); } -struct mbuf *mi_collapse_mbuf(struct mbuf_iovec *mi, struct mbuf *m); -void *mcl_alloc(int seg_count, int *type); - -void mb_free_ext_fast(struct mbuf_iovec *mi, int type, int idx); - -static __inline void -mi_collapse_sge(struct mbuf_iovec *mi, bus_dma_segment_t *seg) -{ - mi->mi_flags = 0; - mi->mi_base = (caddr_t)seg->ds_addr; - mi->mi_len = seg->ds_len; - mi->mi_size = 0; - mi->mi_type = EXT_PHYS; - mi->mi_refcnt = NULL; -} - static __inline void -m_free_iovec(struct mbuf *m, int type) +m_freem_list(struct mbuf *m) { - int i; - struct mbuf_vec *mv; - struct mbuf_iovec *mi; - - mv = mtomv(m); - mi = mv->mv_vec; - for (i = 0; i < mv->mv_count; i++, mi++) { - DPRINTF("freeing buf=%d of %d\n", i, mv->mv_count); - mb_free_ext_fast(mi, mi->mi_type, i); - } - switch (type) { - case EXT_IOVEC: - uma_zfree(zone_miovec, m); - break; - case EXT_CLIOVEC: - cxgb_cache_put(zone_clust, m); - break; - case EXT_JMPIOVEC: - cxgb_cache_put(zone_jumbop, m); - break; - default: - panic("unexpected type %d\n", type); - } -} - -static __inline void -m_freem_iovec(struct mbuf_iovec *mi) -{ - struct mbuf *m = (struct mbuf *)mi->mi_base; - - switch (mi->mi_type) { - case EXT_MBUF: -#ifdef PIO_LEN - KASSERT(m->m_pkthdr.len > PIO_LEN, ("freeing PIO buf")); -#endif - KASSERT((mi->mi_flags & M_NOFREE) == 0, ("no free set on mbuf")); - KASSERT(m->m_next == NULL, ("freeing chain")); - cxgb_mbufs_outstanding--; - m_free_fast(m); - break; - case EXT_PACKET: - cxgb_pack_outstanding--; - m_free(mi->mi_mbuf); - break; - case EXT_IOVEC: - case EXT_CLIOVEC: - case EXT_JMPIOVEC: - m = (struct mbuf *)mi->mi_base; - m_free_iovec(m, mi->mi_type); - break; - case EXT_CLUSTER: - case EXT_JUMBOP: - case EXT_JUMBO9: - case EXT_JUMBO16: - case EXT_SFBUF: - case EXT_NET_DRV: - case EXT_MOD_TYPE: - case EXT_DISPOSABLE: - case EXT_EXTREF: - mb_free_ext_fast(mi, mi->mi_type, -1); - break; - default: - panic("unknown miov type: %d\n", mi->mi_type); - break; - } -} - -static __inline uma_zone_t -m_getzonefromtype(int type) -{ - uma_zone_t zone; - - switch (type) { - case EXT_MBUF: - zone = zone_mbuf; - break; - case EXT_CLUSTER: - zone = zone_clust; - break; -#if MJUMPAGESIZE != MCLBYTES - case EXT_JUMBOP: - zone = zone_jumbop; - break; -#endif - case EXT_JUMBO9: - zone = zone_jumbo9; - break; - case EXT_JUMBO16: - zone = zone_jumbo16; - break; -#ifdef PACKET_ZONE - case EXT_PACKET: - zone = zone_pack; - break; -#endif - default: - panic("%s: invalid cluster type %d", __func__, type); - } - return (zone); -} - -static __inline int -m_getsizefromtype(int type) -{ - int size; - - switch (type) { - case EXT_MBUF: - size = MSIZE; - break; - case EXT_CLUSTER: - case EXT_PACKET: - size = MCLBYTES; - break; -#if MJUMPAGESIZE != MCLBYTES - case EXT_JUMBOP: - size = MJUMPAGESIZE; - break; -#endif - case EXT_JUMBO9: - size = MJUM9BYTES; - break; - case EXT_JUMBO16: - size = MJUM16BYTES; - break; - default: - panic("%s: unrecognized cluster type %d", __func__, type); - } - return (size); + struct mbuf *n; + + while (m != NULL) { + n = m->m_nextpkt; + if (n != NULL) + prefetch(n); + m_freem(m); + m = n; + } } -void dump_mi(struct mbuf_iovec *mi); #endif /* _MVEC_H_ */ diff --git a/sys/dev/cxgb/sys/uipc_mvec.c b/sys/dev/cxgb/sys/uipc_mvec.c index eb91e97..fa6f0ed 100644 --- a/sys/dev/cxgb/sys/uipc_mvec.c +++ b/sys/dev/cxgb/sys/uipc_mvec.c @@ -48,8 +48,6 @@ __FBSDID("$FreeBSD$"); #include <cxgb_include.h> #include <sys/mvec.h> -#include "opt_zero.h" - #include <vm/vm.h> #include <vm/vm_page.h> #include <vm/pmap.h> @@ -60,180 +58,47 @@ __FBSDID("$FreeBSD$"); #define M_SANITY(a, b) #endif -#define MAX_BUFS 36 -#define MAX_HVEC 8 - -extern uint32_t collapse_free; -extern uint32_t mb_free_vec_free; - -uma_zone_t zone_miovec; -static int mi_inited = 0; -int cxgb_mbufs_outstanding = 0; -int cxgb_pack_outstanding = 0; - -void -mi_init(void) -{ - if (mi_inited > 0) - return; - else - mi_inited++; - zone_miovec = uma_zcreate("MBUF IOVEC", MIOVBYTES, - NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, UMA_ZONE_MAXBUCKET); -} - -void -mi_deinit(void) -{ - mi_inited--; - if (mi_inited == 0) - uma_zdestroy(zone_miovec); -} - -void -dump_mi(struct mbuf_iovec *mi) -{ - int i; - struct mbuf_vec *mv; - - printf("mi_flags=0x%08x mi_base=%p mi_data=%p mi_len=%d mi_type=%d\n", - mi->mi_flags, mi->mi_base, mi->mi_data, mi->mi_len, mi->mi_type); - - if (mi->mi_type == EXT_CLIOVEC || - mi->mi_type == EXT_IOVEC) { - mv = mtomv((struct mbuf *)mi->mi_base); - mi = mv->mv_vec; - for (i = 0; i < mv->mv_count; i++, mi++) - dump_mi(mi); - - } -} - -static __inline struct mbuf * -_mcl_collapse_mbuf(struct mbuf_iovec *mi, struct mbuf *m) -{ - struct mbuf *n = m->m_next; - - prefetch(n); - - mi->mi_flags = m->m_flags; - mi->mi_len = m->m_len; - mi->mi_mbuf = NULL; - - if (m->m_flags & M_PKTHDR) { - mi->mi_ether_vtag = m->m_pkthdr.ether_vtag; - mi->mi_tso_segsz = m->m_pkthdr.tso_segsz; -#ifdef IFNET_MULTIQUEUE - mi->mi_rss_hash = m->m_pkthdr.flowid; -#endif - if(!SLIST_EMPTY(&m->m_pkthdr.tags)) - m_tag_delete_chain(m, NULL); - } - if (m->m_type != MT_DATA) { - mi->mi_data = NULL; - mi->mi_base = (caddr_t)m; - /* - * XXX JMPIOVEC - */ - mi->mi_size = (m->m_type == EXT_CLIOVEC) ? MCLBYTES : MIOVBYTES; - mi->mi_type = m->m_type; - mi->mi_len = m->m_pkthdr.len; - KASSERT(mi->mi_len, ("empty packet")); - mi->mi_refcnt = NULL; - } else if (m->m_flags & M_EXT) { - memcpy(&mi->mi_ext, &m->m_ext, sizeof(struct m_ext_)); - mi->mi_data = m->m_data; - mi->mi_base = m->m_ext.ext_buf; - mi->mi_type = m->m_ext.ext_type; - mi->mi_size = m->m_ext.ext_size; - mi->mi_refcnt = m->m_ext.ref_cnt; - if (m->m_ext.ext_type == EXT_PACKET) { - mi->mi_mbuf = m; -#ifdef INVARIANTS - cxgb_pack_outstanding++; -#endif - } - } else { - mi->mi_base = (caddr_t)m; - mi->mi_data = m->m_data; - mi->mi_size = MSIZE; - mi->mi_type = EXT_MBUF; - mi->mi_refcnt = NULL; -#ifdef INVARIANTS - cxgb_mbufs_outstanding++; -#endif - } - KASSERT(mi->mi_len != 0, ("miov has len 0")); - KASSERT(mi->mi_type > 0, ("mi_type is invalid")); - KASSERT(mi->mi_base, ("mi_base is invalid")); - return (n); -} - -struct mbuf * -mi_collapse_mbuf(struct mbuf_iovec *mi, struct mbuf *m) -{ - return _mcl_collapse_mbuf(mi, m); -} - -void * -mcl_alloc(int seg_count, int *type) -{ - uma_zone_t zone; - - if (seg_count > MAX_CL_IOV) { - zone = zone_jumbop; - *type = EXT_JMPIOVEC; - } else if (seg_count > MAX_MIOVEC_IOV) { - zone = zone_clust; - *type = EXT_CLIOVEC; - } else { - *type = EXT_IOVEC; - zone = zone_miovec; - } - return uma_zalloc_arg(zone, NULL, M_NOWAIT); -} - int -busdma_map_sg_collapse(struct mbuf **m, bus_dma_segment_t *segs, int *nsegs) +busdma_map_sg_collapse(struct sge_txq *txq, struct tx_sw_desc *txsd, + struct mbuf **m, bus_dma_segment_t *segs, int *nsegs) { - struct mbuf *m0, *mhead, *n = *m; - struct mbuf_iovec *mi; - struct mbuf *marray[TX_MAX_SEGS]; - int i, type, seg_count, defragged = 0, err = 0; - struct mbuf_vec *mv; - int skipped, freed; + struct mbuf *n = *m; + int seg_count, defragged = 0, err = 0; + bus_dma_segment_t *psegs; KASSERT(n->m_pkthdr.len, ("packet has zero header len")); if (n->m_pkthdr.len <= PIO_LEN) return (0); retry: + psegs = segs; seg_count = 0; if (n->m_next == NULL) { - busdma_map_mbuf_fast(n, segs); + busdma_map_mbuf_fast(txq, txsd, n, segs); *nsegs = 1; return (0); } - skipped = freed = 0; +#if defined(__i386__) || defined(__amd64__) while (n && seg_count < TX_MAX_SEGS) { - marray[seg_count] = n; - /* * firmware doesn't like empty segments */ - if (__predict_true(n->m_len != 0)) + if (__predict_true(n->m_len != 0)) { seg_count++; - else - skipped++; - + busdma_map_mbuf_fast(txq, txsd, n, psegs); + psegs++; + } n = n->m_next; } +#else + err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m, segs, + &seg_count, 0); +#endif if (seg_count == 0) { if (cxgb_debug) printf("empty segment chain\n"); err = EFBIG; goto err_out; - } else if (seg_count >= TX_MAX_SEGS) { + } else if (err == EFBIG || seg_count >= TX_MAX_SEGS) { if (cxgb_debug) printf("mbuf chain too long: %d max allowed %d\n", seg_count, TX_MAX_SEGS); @@ -251,171 +116,17 @@ retry: goto err_out; } - if ((m0 = mcl_alloc(seg_count, &type)) == NULL) { - err = ENOMEM; - goto err_out; - } - - memcpy(m0, *m, sizeof(struct m_hdr) + sizeof(struct pkthdr)); - m0->m_type = type; - KASSERT(m0->m_pkthdr.len, ("empty packet being marshalled")); - mv = mtomv(m0); - mv->mv_count = seg_count; - mv->mv_first = 0; - for (i = 0, mi = mv->mv_vec; i < seg_count; mi++, segs++, i++) { - n = marray[i]; - busdma_map_mbuf_fast(n, segs); - _mcl_collapse_mbuf(mi, n); - } - n = *m; - while (n) { - if (n->m_len == 0) - /* do nothing - free if mbuf or cluster */; - else if ((n->m_flags & M_EXT) == 0) { - goto skip; - } else if ((n->m_flags & M_EXT) && - (n->m_ext.ext_type == EXT_PACKET)) { - goto skip; - } else if (n->m_flags & M_NOFREE) - goto skip; - else if ((n->m_flags & (M_EXT|M_NOFREE)) == M_EXT) - n->m_flags &= ~M_EXT; - mhead = n->m_next; - m_free(n); - n = mhead; - freed++; - continue; - skip: - /* - * is an immediate mbuf or is from the packet zone - */ - n = n->m_next; - } *nsegs = seg_count; - *m = m0; - DPRINTF("pktlen=%d m0=%p *m=%p m=%p\n", m0->m_pkthdr.len, m0, *m, m); - return (0); -err_out: - m_freem(*m); - *m = NULL; +err_out: return (err); } -int -busdma_map_sg_vec(struct mbuf **m, struct mbuf **mret, - bus_dma_segment_t *segs, int pkt_count) -{ - struct mbuf *m0, **mp; - struct mbuf_iovec *mi; - struct mbuf_vec *mv; - int i, type; - - if ((m0 = mcl_alloc(pkt_count, &type)) == NULL) - return (ENOMEM); - - memcpy(m0, *m, sizeof(struct m_hdr) + - sizeof(struct pkthdr)); - m0->m_type = type; - mv = mtomv(m0); - mv->mv_count = pkt_count; - mv->mv_first = 0; - for (mp = m, i = 0, mi = mv->mv_vec; i < pkt_count; - mp++, segs++, mi++, i++) { - busdma_map_mbuf_fast(*mp, segs); - _mcl_collapse_mbuf(mi, *mp); - KASSERT(mi->mi_len, ("empty packet")); - } - - for (mp = m, i = 0; i < pkt_count; i++, mp++) { - if ((((*mp)->m_flags & (M_EXT|M_NOFREE)) == M_EXT) - && ((*mp)->m_ext.ext_type != EXT_PACKET)) { - (*mp)->m_flags &= ~M_EXT; - m_free(*mp); - } - } - - *mret = m0; - return (0); -} - void -mb_free_ext_fast(struct mbuf_iovec *mi, int type, int idx) +busdma_map_sg_vec(struct sge_txq *txq, struct tx_sw_desc *txsd, + struct mbuf *m, bus_dma_segment_t *segs, int *nsegs) { - int dofree; - caddr_t cl; - - cl = mi->mi_base; - switch (type) { - case EXT_PACKET: -#ifdef INVARIANTS - cxgb_pack_outstanding--; -#endif - m_free(mi->mi_mbuf); - return; - case EXT_MBUF: - KASSERT((mi->mi_flags & M_NOFREE) == 0, ("no free set on mbuf")); -#ifdef INVARIANTS - cxgb_mbufs_outstanding--; -#endif - m_free_fast((struct mbuf *)cl); - return; - default: - break; - } - /* Account for lazy ref count assign. */ - dofree = (mi->mi_refcnt == NULL); - if (dofree == 0) { - if (*(mi->mi_refcnt) == 1 || - atomic_fetchadd_int(mi->mi_refcnt, -1) == 1) - dofree = 1; - } - if (dofree == 0) - return; - - switch (type) { - case EXT_CLUSTER: - cxgb_cache_put(zone_clust, cl); - break; - case EXT_JUMBOP: - cxgb_cache_put(zone_jumbop, cl); - break; - case EXT_JUMBO9: - cxgb_cache_put(zone_jumbo9, cl); - break; - case EXT_JUMBO16: - cxgb_cache_put(zone_jumbo16, cl); - break; - case EXT_SFBUF: - case EXT_NET_DRV: - case EXT_MOD_TYPE: - case EXT_DISPOSABLE: - *(mi->mi_refcnt) = 0; - uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *, - mi->mi_ext.ref_cnt)); - /* FALLTHROUGH */ - case EXT_EXTREF: - KASSERT(mi->mi_ext.ext_free != NULL, - ("%s: ext_free not set", __func__)); -#if __FreeBSD_version >= 800016 - (*(mi->mi_ext.ext_free))(mi->mi_ext.ext_arg1, - mi->mi_ext.ext_arg2); -#else - (*(mi->mi_ext.ext_free))(mi->mi_ext.ext_buf, - mi->mi_ext.ext_args); -#endif - break; - default: - dump_mi(mi); - panic("unknown mv type in m_free_vec type=%d idx=%d", type, idx); - break; - } + for (*nsegs = 0; m != NULL ; segs++, *nsegs += 1, m = m->m_nextpkt) + busdma_map_mbuf_fast(txq, txsd, m, segs); } -int -_m_explode(struct mbuf *m) -{ - panic("IMPLEMENT ME!!!"); -} - - diff --git a/sys/modules/cxgb/cxgb/Makefile b/sys/modules/cxgb/cxgb/Makefile index 15a7eb7..8a5791c 100644 --- a/sys/modules/cxgb/cxgb/Makefile +++ b/sys/modules/cxgb/cxgb/Makefile @@ -9,7 +9,7 @@ SRCS+= cxgb_xgmac.c cxgb_vsc7323.c cxgb_t3_hw.c cxgb_main.c SRCS+= cxgb_sge.c cxgb_offload.c cxgb_tn1010.c SRCS+= device_if.h bus_if.h pci_if.h SRCS+= opt_inet.h opt_zero.h opt_sched.h -SRCS+= uipc_mvec.c cxgb_support.c cxgb_multiq.c +SRCS+= uipc_mvec.c CFLAGS+= -g -DDEFAULT_JUMBO -I${CXGB} CFLAGS+= -DDISABLE_MBUF_IOVEC |