diff options
Diffstat (limited to 'sys/dev/vmware')
-rw-r--r-- | sys/dev/vmware/vmxnet3/if_vmx.c | 718 | ||||
-rw-r--r-- | sys/dev/vmware/vmxnet3/if_vmxreg.h | 27 | ||||
-rw-r--r-- | sys/dev/vmware/vmxnet3/if_vmxvar.h | 52 |
3 files changed, 704 insertions, 93 deletions
diff --git a/sys/dev/vmware/vmxnet3/if_vmx.c b/sys/dev/vmware/vmxnet3/if_vmx.c index 21f0947..3acc672 100644 --- a/sys/dev/vmware/vmxnet3/if_vmx.c +++ b/sys/dev/vmware/vmxnet3/if_vmx.c @@ -32,6 +32,8 @@ __FBSDID("$FreeBSD$"); #include <sys/module.h> #include <sys/socket.h> #include <sys/sysctl.h> +#include <sys/smp.h> +#include <sys/taskqueue.h> #include <vm/vm.h> #include <vm/pmap.h> @@ -67,9 +69,6 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" -/* Always enable for now - useful for queue hangs. */ -#define VMXNET3_DEBUG_SYSCTL - #ifdef VMXNET3_FAILPOINTS #include <sys/fail.h> static SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0, @@ -86,6 +85,7 @@ static int vmxnet3_alloc_resources(struct vmxnet3_softc *); static void vmxnet3_free_resources(struct vmxnet3_softc *); static int vmxnet3_check_version(struct vmxnet3_softc *); static void vmxnet3_initial_config(struct vmxnet3_softc *); +static void vmxnet3_check_multiqueue(struct vmxnet3_softc *); static int vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *); static int vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *); @@ -102,6 +102,13 @@ static void vmxnet3_free_interrupt(struct vmxnet3_softc *, struct vmxnet3_interrupt *); static void vmxnet3_free_interrupts(struct vmxnet3_softc *); +#ifndef VMXNET3_LEGACY_TX +static int vmxnet3_alloc_taskqueue(struct vmxnet3_softc *); +static void vmxnet3_start_taskqueue(struct vmxnet3_softc *); +static void vmxnet3_drain_taskqueue(struct vmxnet3_softc *); +static void vmxnet3_free_taskqueue(struct vmxnet3_softc *); +#endif + static int vmxnet3_init_rxq(struct vmxnet3_softc *, int); static int vmxnet3_init_txq(struct vmxnet3_softc *, int); static int vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *); @@ -120,6 +127,7 @@ static void vmxnet3_free_queue_data(struct vmxnet3_softc *); static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *); static void vmxnet3_init_shared_data(struct vmxnet3_softc *); static void vmxnet3_reinit_interface(struct vmxnet3_softc *); +static void vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *); static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *); static int vmxnet3_alloc_data(struct vmxnet3_softc *); static void vmxnet3_free_data(struct vmxnet3_softc *); @@ -150,13 +158,24 @@ static int vmxnet3_reinit(struct vmxnet3_softc *); static void vmxnet3_init_locked(struct vmxnet3_softc *); static void vmxnet3_init(void *); -static int vmxnet3_txq_offload_ctx(struct mbuf *, int *, int *, int *); +static int vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *,struct mbuf *, + int *, int *, int *); static int vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **, bus_dmamap_t, bus_dma_segment_t [], int *); static void vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t); static int vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **); +static void vmxnet3_txq_update_pending(struct vmxnet3_txqueue *); +#ifdef VMXNET3_LEGACY_TX static void vmxnet3_start_locked(struct ifnet *); static void vmxnet3_start(struct ifnet *); +#else +static int vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *, + struct mbuf *); +static int vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *); +static void vmxnet3_txq_tq_deferred(void *, int); +#endif +static void vmxnet3_txq_start(struct vmxnet3_txqueue *); +static void vmxnet3_tx_start_all(struct vmxnet3_softc *); static void vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int, uint16_t); @@ -166,7 +185,16 @@ static void vmxnet3_set_rxfilter(struct vmxnet3_softc *); static int vmxnet3_change_mtu(struct vmxnet3_softc *, int); static int vmxnet3_ioctl(struct ifnet *, u_long, caddr_t); +#ifndef VMXNET3_LEGACY_TX +static void vmxnet3_qflush(struct ifnet *); +#endif + static int vmxnet3_watchdog(struct vmxnet3_txqueue *); +static void vmxnet3_refresh_host_stats(struct vmxnet3_softc *); +static void vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *, + struct vmxnet3_txq_stats *); +static void vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *, + struct vmxnet3_rxq_stats *); static void vmxnet3_tick(void *); static void vmxnet3_link_status(struct vmxnet3_softc *); static void vmxnet3_media_status(struct ifnet *, struct ifmediareq *); @@ -211,6 +239,12 @@ typedef enum { static void vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t); /* Tunables. */ +static int vmxnet3_mq_disable = 0; +TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable); +static int vmxnet3_default_txnqueue = VMXNET3_DEF_TX_QUEUES; +TUNABLE_INT("hw.vmx.txnqueue", &vmxnet3_default_txnqueue); +static int vmxnet3_default_rxnqueue = VMXNET3_DEF_RX_QUEUES; +TUNABLE_INT("hw.vmx.rxnqueue", &vmxnet3_default_rxnqueue); static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC; TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc); static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC; @@ -280,10 +314,18 @@ vmxnet3_attach(device_t dev) if (error) goto fail; +#ifndef VMXNET3_LEGACY_TX + error = vmxnet3_alloc_taskqueue(sc); + if (error) + goto fail; +#endif + error = vmxnet3_alloc_interrupts(sc); if (error) goto fail; + vmxnet3_check_multiqueue(sc); + error = vmxnet3_alloc_data(sc); if (error) goto fail; @@ -300,7 +342,9 @@ vmxnet3_attach(device_t dev) } vmxnet3_setup_sysctl(sc); - vmxnet3_link_status(sc); +#ifndef VMXNET3_LEGACY_TX + vmxnet3_start_taskqueue(sc); +#endif fail: if (error) @@ -319,11 +363,16 @@ vmxnet3_detach(device_t dev) ifp = sc->vmx_ifp; if (device_is_attached(dev)) { - ether_ifdetach(ifp); VMXNET3_CORE_LOCK(sc); vmxnet3_stop(sc); VMXNET3_CORE_UNLOCK(sc); + callout_drain(&sc->vmx_tick); +#ifndef VMXNET3_LEGACY_TX + vmxnet3_drain_taskqueue(sc); +#endif + + ether_ifdetach(ifp); } if (sc->vmx_vlan_attach != NULL) { @@ -335,6 +384,9 @@ vmxnet3_detach(device_t dev) sc->vmx_vlan_detach = NULL; } +#ifndef VMXNET3_LEGACY_TX + vmxnet3_free_taskqueue(sc); +#endif vmxnet3_free_interrupts(sc); if (ifp != NULL) { @@ -461,14 +513,26 @@ vmxnet3_check_version(struct vmxnet3_softc *sc) static void vmxnet3_initial_config(struct vmxnet3_softc *sc) { - int ndesc; + int nqueue, ndesc; - /* - * BMV Much of the work is already done, but this driver does - * not support multiqueue yet. - */ - sc->vmx_ntxqueues = VMXNET3_TX_QUEUES; - sc->vmx_nrxqueues = VMXNET3_RX_QUEUES; + nqueue = vmxnet3_tunable_int(sc, "txnqueue", vmxnet3_default_txnqueue); + if (nqueue > VMXNET3_MAX_TX_QUEUES || nqueue < 1) + nqueue = VMXNET3_DEF_TX_QUEUES; + if (nqueue > mp_ncpus) + nqueue = mp_ncpus; + sc->vmx_max_ntxqueues = nqueue; + + nqueue = vmxnet3_tunable_int(sc, "rxnqueue", vmxnet3_default_rxnqueue); + if (nqueue > VMXNET3_MAX_RX_QUEUES || nqueue < 1) + nqueue = VMXNET3_DEF_RX_QUEUES; + if (nqueue > mp_ncpus) + nqueue = mp_ncpus; + sc->vmx_max_nrxqueues = nqueue; + + if (vmxnet3_tunable_int(sc, "mq_disable", vmxnet3_mq_disable)) { + sc->vmx_max_nrxqueues = 1; + sc->vmx_max_ntxqueues = 1; + } ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc); if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC) @@ -486,6 +550,27 @@ vmxnet3_initial_config(struct vmxnet3_softc *sc) sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS; } +static void +vmxnet3_check_multiqueue(struct vmxnet3_softc *sc) +{ + + if (sc->vmx_intr_type != VMXNET3_IT_MSIX) + goto out; + + /* BMV: Just use the maximum configured for now. */ + sc->vmx_nrxqueues = sc->vmx_max_nrxqueues; + sc->vmx_ntxqueues = sc->vmx_max_ntxqueues; + + if (sc->vmx_nrxqueues > 1) + sc->vmx_flags |= VMXNET3_FLAG_RSS; + + return; + +out: + sc->vmx_ntxqueues = 1; + sc->vmx_nrxqueues = 1; +} + static int vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc) { @@ -498,7 +583,7 @@ vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc) return (1); /* Allocate an additional vector for the events interrupt. */ - required = sc->vmx_nrxqueues + sc->vmx_ntxqueues + 1; + required = sc->vmx_max_nrxqueues + sc->vmx_max_ntxqueues + 1; nmsix = pci_msix_count(dev); if (nmsix < required) @@ -511,6 +596,8 @@ vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc) } else pci_release_msi(dev); + /* BMV TODO Fallback to sharing MSIX vectors if possible. */ + return (1); } @@ -584,10 +671,6 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc) return (0); } -/* - * NOTE: We only support the simple case of each Rx and Tx queue on its - * own MSIX vector. This is good enough until we support mulitqueue. - */ static int vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc) { @@ -649,10 +732,6 @@ vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc) return (error); } -/* - * XXX BMV Should probably reorganize the attach and just do - * this in vmxnet3_init_shared_data(). - */ static void vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc) { @@ -782,6 +861,71 @@ vmxnet3_free_interrupts(struct vmxnet3_softc *sc) pci_release_msi(sc->vmx_dev); } +#ifndef VMXNET3_LEGACY_TX +static int +vmxnet3_alloc_taskqueue(struct vmxnet3_softc *sc) +{ + device_t dev; + + dev = sc->vmx_dev; + + sc->vmx_tq = taskqueue_create(device_get_nameunit(dev), M_NOWAIT, + taskqueue_thread_enqueue, &sc->vmx_tq); + if (sc->vmx_tq == NULL) + return (ENOMEM); + + return (0); +} + +static void +vmxnet3_start_taskqueue(struct vmxnet3_softc *sc) +{ + device_t dev; + int nthreads, error; + + dev = sc->vmx_dev; + + /* + * The taskqueue is typically not frequently used, so a dedicated + * thread for each queue is unnecessary. + */ + nthreads = MAX(1, sc->vmx_ntxqueues / 2); + + /* + * Most drivers just ignore the return value - it only fails + * with ENOMEM so an error is not likely. It is hard for us + * to recover from an error here. + */ + error = taskqueue_start_threads(&sc->vmx_tq, nthreads, PI_NET, + "%s taskq", device_get_nameunit(dev)); + if (error) + device_printf(dev, "failed to start taskqueue: %d", error); +} + +static void +vmxnet3_drain_taskqueue(struct vmxnet3_softc *sc) +{ + struct vmxnet3_txqueue *txq; + int i; + + if (sc->vmx_tq != NULL) { + for (i = 0; i < sc->vmx_max_ntxqueues; i++) { + txq = &sc->vmx_txq[i]; + taskqueue_drain(sc->vmx_tq, &txq->vxtxq_defrtask); + } + } +} + +static void +vmxnet3_free_taskqueue(struct vmxnet3_softc *sc) +{ + if (sc->vmx_tq != NULL) { + taskqueue_free(sc->vmx_tq); + sc->vmx_tq = NULL; + } +} +#endif + static int vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q) { @@ -837,6 +981,15 @@ vmxnet3_init_txq(struct vmxnet3_softc *sc, int q) txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs; +#ifndef VMXNET3_LEGACY_TX + TASK_INIT(&txq->vxtxq_defrtask, 0, vmxnet3_txq_tq_deferred, txq); + + txq->vxtxq_br = buf_ring_alloc(VMXNET3_DEF_BUFRING_SIZE, M_DEVBUF, + M_NOWAIT, &txq->vxtxq_mtx); + if (txq->vxtxq_br == NULL) + return (ENOMEM); +#endif + return (0); } @@ -845,20 +998,32 @@ vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc) { int i, error; + /* + * Only attempt to create multiple queues if MSIX is available. MSIX is + * disabled by default because its apparently broken for devices passed + * through by at least ESXi 5.1. The hw.pci.honor_msi_blacklist tunable + * must be set to zero for MSIX. This check prevents us from allocating + * queue structures that we will not use. + */ + if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) { + sc->vmx_max_nrxqueues = 1; + sc->vmx_max_ntxqueues = 1; + } + sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) * - sc->vmx_nrxqueues, M_DEVBUF, M_NOWAIT | M_ZERO); + sc->vmx_max_nrxqueues, M_DEVBUF, M_NOWAIT | M_ZERO); sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) * - sc->vmx_ntxqueues, M_DEVBUF, M_NOWAIT | M_ZERO); + sc->vmx_max_ntxqueues, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vmx_rxq == NULL || sc->vmx_txq == NULL) return (ENOMEM); - for (i = 0; i < sc->vmx_nrxqueues; i++) { + for (i = 0; i < sc->vmx_max_nrxqueues; i++) { error = vmxnet3_init_rxq(sc, i); if (error) return (error); } - for (i = 0; i < sc->vmx_ntxqueues; i++) { + for (i = 0; i < sc->vmx_max_ntxqueues; i++) { error = vmxnet3_init_txq(sc, i); if (error) return (error); @@ -899,6 +1064,13 @@ vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq) txq->vxtxq_sc = NULL; txq->vxtxq_id = -1; +#ifndef VMXNET3_LEGACY_TX + if (txq->vxtxq_br != NULL) { + buf_ring_free(txq->vxtxq_br, M_DEVBUF); + txq->vxtxq_br = NULL; + } +#endif + if (txr->vxtxr_txbuf != NULL) { free(txr->vxtxr_txbuf, M_DEVBUF); txr->vxtxr_txbuf = NULL; @@ -914,14 +1086,14 @@ vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc) int i; if (sc->vmx_rxq != NULL) { - for (i = 0; i < sc->vmx_nrxqueues; i++) + for (i = 0; i < sc->vmx_max_nrxqueues; i++) vmxnet3_destroy_rxq(&sc->vmx_rxq[i]); free(sc->vmx_rxq, M_DEVBUF); sc->vmx_rxq = NULL; } if (sc->vmx_txq != NULL) { - for (i = 0; i < sc->vmx_ntxqueues; i++) + for (i = 0; i < sc->vmx_max_ntxqueues; i++) vmxnet3_destroy_txq(&sc->vmx_txq[i]); free(sc->vmx_txq, M_DEVBUF); sc->vmx_txq = NULL; @@ -965,6 +1137,17 @@ vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc) kva += sizeof(struct vmxnet3_rxq_shared); } + if (sc->vmx_flags & VMXNET3_FLAG_RSS) { + size = sizeof(struct vmxnet3_rss_shared); + error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma); + if (error) { + device_printf(dev, "cannot alloc rss shared memory\n"); + return (error); + } + sc->vmx_rss = + (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr; + } + return (0); } @@ -972,6 +1155,11 @@ static void vmxnet3_free_shared_data(struct vmxnet3_softc *sc) { + if (sc->vmx_rss != NULL) { + vmxnet3_dma_free(sc, &sc->vmx_rss_dma); + sc->vmx_rss = NULL; + } + if (sc->vmx_qs != NULL) { vmxnet3_dma_free(sc, &sc->vmx_qs_dma); sc->vmx_qs = NULL; @@ -1008,7 +1196,7 @@ vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc) BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ - VMXNET3_TSO_MAXSIZE, /* maxsize */ + VMXNET3_TX_MAXSIZE, /* maxsize */ VMXNET3_TX_MAXSEGS, /* nsegments */ VMXNET3_TX_MAXSEGSIZE, /* maxsegsize */ 0, /* flags */ @@ -1333,6 +1521,13 @@ vmxnet3_init_shared_data(struct vmxnet3_softc *sc) ds->queue_shared_len = sc->vmx_qs_dma.dma_size; ds->nrxsg_max = sc->vmx_max_rxsegs; + /* RSS conf */ + if (sc->vmx_flags & VMXNET3_FLAG_RSS) { + ds->rss.version = 1; + ds->rss.paddr = sc->vmx_rss_dma.dma_paddr; + ds->rss.len = sc->vmx_rss_dma.dma_size; + } + /* Interrupt control. */ ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO; ds->nintr = sc->vmx_nintrs; @@ -1392,9 +1587,43 @@ vmxnet3_reinit_interface(struct vmxnet3_softc *sc) if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD_IPV6; if (ifp->if_capenable & IFCAP_TSO4) - ifp->if_hwassist |= CSUM_TSO; + ifp->if_hwassist |= CSUM_IP_TSO; if (ifp->if_capenable & IFCAP_TSO6) - ifp->if_hwassist |= CSUM_TSO; /* No CSUM_TSO_IPV6. */ + ifp->if_hwassist |= CSUM_IP6_TSO; +} + +static void +vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc) +{ + /* + * Use the same key as the Linux driver until FreeBSD can do + * RSS (presumably Toeplitz) in software. + */ + static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = { + 0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac, + 0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28, + 0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70, + 0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3, + 0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9, + }; + + struct vmxnet3_driver_shared *ds; + struct vmxnet3_rss_shared *rss; + int i; + + ds = sc->vmx_ds; + rss = sc->vmx_rss; + + rss->hash_type = + UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 | + UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6; + rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ; + rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE; + rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE; + memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE); + + for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) + rss->ind_table[i] = i % sc->vmx_nrxqueues; } static void @@ -1406,6 +1635,10 @@ vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc) ifp = sc->vmx_ifp; ds = sc->vmx_ds; + ds->mtu = ifp->if_mtu; + ds->ntxqueue = sc->vmx_ntxqueues; + ds->nrxqueue = sc->vmx_nrxqueues; + ds->upt_features = 0; if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) ds->upt_features |= UPT1_F_CSUM; @@ -1414,9 +1647,10 @@ vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc) if (ifp->if_capenable & IFCAP_LRO) ds->upt_features |= UPT1_F_LRO; - ds->mtu = ifp->if_mtu; - ds->ntxqueue = sc->vmx_ntxqueues; - ds->nrxqueue = sc->vmx_nrxqueues; + if (sc->vmx_flags & VMXNET3_FLAG_RSS) { + ds->upt_features |= UPT1_F_RSS; + vmxnet3_reinit_rss_shared_data(sc); + } vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr); vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH, @@ -1471,17 +1705,26 @@ vmxnet3_setup_interface(struct vmxnet3_softc *sc) if_initname(ifp, device_get_name(dev), device_get_unit(dev)); #if __FreeBSD_version < 1000025 ifp->if_baudrate = 1000000000; -#else +#elif __FreeBSD_version < 1100011 if_initbaudrate(ifp, IF_Gbps(10)); +#else + ifp->if_baudrate = IF_Gbps(10); #endif ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = vmxnet3_init; ifp->if_ioctl = vmxnet3_ioctl; + ifp->if_hw_tsomax = VMXNET3_TSO_MAXSIZE; + +#ifdef VMXNET3_LEGACY_TX ifp->if_start = vmxnet3_start; ifp->if_snd.ifq_drv_maxlen = sc->vmx_ntxdescs - 1; IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs - 1); IFQ_SET_READY(&ifp->if_snd); +#else + ifp->if_transmit = vmxnet3_txq_mq_start; + ifp->if_qflush = vmxnet3_qflush; +#endif vmxnet3_get_lladdr(sc); ether_ifattach(ifp, sc->vmx_lladdr); @@ -1529,8 +1772,11 @@ vmxnet3_evintr(struct vmxnet3_softc *sc) event = sc->vmx_ds->event; vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event); - if (event & VMXNET3_EVENT_LINK) + if (event & VMXNET3_EVENT_LINK) { vmxnet3_link_status(sc); + if (sc->vmx_link_active != 0) + vmxnet3_tx_start_all(sc); + } if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) { reset = 1; @@ -1566,6 +1812,7 @@ vmxnet3_txq_eof(struct vmxnet3_txqueue *txq) struct vmxnet3_comp_ring *txc; struct vmxnet3_txcompdesc *txcd; struct vmxnet3_txbuf *txb; + struct mbuf *m; u_int sop; sc = txq->vxtxq_sc; @@ -1589,15 +1836,18 @@ vmxnet3_txq_eof(struct vmxnet3_txqueue *txq) sop = txr->vxtxr_next; txb = &txr->vxtxr_txbuf[sop]; - if (txb->vtxb_m != NULL) { + if ((m = txb->vtxb_m) != NULL) { bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap); - m_freem(txb->vtxb_m); - txb->vtxb_m = NULL; + txq->vxtxq_stats.vmtxs_opackets++; + txq->vxtxq_stats.vmtxs_obytes += m->m_pkthdr.len; + if (m->m_flags & M_MCAST) + txq->vxtxq_stats.vmtxs_omcasts++; - ifp->if_opackets++; + m_freem(m); + txb->vtxb_m = NULL; } txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc; @@ -1771,11 +2021,39 @@ vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq, ifp = sc->vmx_ifp; if (rxcd->error) { - ifp->if_ierrors++; + rxq->vxrxq_stats.vmrxs_ierrors++; m_freem(m); return; } +#ifdef notyet + switch (rxcd->rss_type) { + case VMXNET3_RCD_RSS_TYPE_IPV4: + m->m_pkthdr.flowid = rxcd->rss_hash; + M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV4); + break; + case VMXNET3_RCD_RSS_TYPE_TCPIPV4: + m->m_pkthdr.flowid = rxcd->rss_hash; + M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV4); + break; + case VMXNET3_RCD_RSS_TYPE_IPV6: + m->m_pkthdr.flowid = rxcd->rss_hash; + M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6); + break; + case VMXNET3_RCD_RSS_TYPE_TCPIPV6: + m->m_pkthdr.flowid = rxcd->rss_hash; + M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV6); + break; + default: /* VMXNET3_RCD_RSS_TYPE_NONE */ + m->m_pkthdr.flowid = rxq->vxrxq_id; + M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); + break; + } +#else + m->m_pkthdr.flowid = rxq->vxrxq_id; + m->m_flags |= M_FLOWID; +#endif + if (!rxcd->no_csum) vmxnet3_rx_csum(rxcd, m); if (rxcd->vlan) { @@ -1783,7 +2061,9 @@ vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq, m->m_pkthdr.ether_vtag = rxcd->vtag; } - ifp->if_ipackets++; + rxq->vxrxq_stats.vmrxs_ipackets++; + rxq->vxrxq_stats.vmrxs_ibytes += m->m_pkthdr.len; + VMXNET3_RXQ_UNLOCK(rxq); (*ifp->if_input)(ifp, m); VMXNET3_RXQ_LOCK(rxq); @@ -1865,7 +2145,7 @@ vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq) } if (vmxnet3_newbuf(sc, rxr) != 0) { - ifp->if_iqdrops++; + rxq->vxrxq_stats.vmrxs_iqdrops++; vmxnet3_rxq_eof_discard(rxq, rxr, idx); if (!rxcd->eop) vmxnet3_rxq_discard_chain(rxq); @@ -1884,7 +2164,7 @@ vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq) ("%s: frame not started?", __func__)); if (vmxnet3_newbuf(sc, rxr) != 0) { - ifp->if_iqdrops++; + rxq->vxrxq_stats.vmrxs_iqdrops++; vmxnet3_rxq_eof_discard(rxq, rxr, idx); if (!rxcd->eop) vmxnet3_rxq_discard_chain(rxq); @@ -1953,8 +2233,7 @@ vmxnet3_legacy_intr(void *xsc) VMXNET3_TXQ_LOCK(txq); vmxnet3_txq_eof(txq); - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - vmxnet3_start_locked(ifp); + vmxnet3_txq_start(txq); VMXNET3_TXQ_UNLOCK(txq); vmxnet3_enable_all_intrs(sc); @@ -1976,8 +2255,7 @@ vmxnet3_txq_intr(void *xtxq) VMXNET3_TXQ_LOCK(txq); vmxnet3_txq_eof(txq); - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - vmxnet3_start_locked(ifp); + vmxnet3_txq_start(txq); VMXNET3_TXQ_UNLOCK(txq); vmxnet3_enable_intr(sc, txq->vxtxq_intr_idx); @@ -2056,6 +2334,7 @@ vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq) if (rxb->vrxb_m == NULL) continue; + bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->vxrxr_rxtag, rxb->vrxb_dmamap); @@ -2318,7 +2597,8 @@ vmxnet3_init(void *xsc) * the mbuf packet header. Bug andre@. */ static int -vmxnet3_txq_offload_ctx(struct mbuf *m, int *etype, int *proto, int *start) +vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m, + int *etype, int *proto, int *start) { struct ether_vlan_header *evh; int offset; @@ -2342,7 +2622,7 @@ vmxnet3_txq_offload_ctx(struct mbuf *m, int *etype, int *proto, int *start) (caddr_t) &iphdr); ip = &iphdr; } else - ip = (struct ip *)(m->m_data + offset); + ip = mtodo(m, offset); *proto = ip->ip_p; *start = offset + (ip->ip_hl << 2); break; @@ -2370,19 +2650,21 @@ vmxnet3_txq_offload_ctx(struct mbuf *m, int *etype, int *proto, int *start) return (EINVAL); } - if (m->m_len < *start + sizeof(struct tcphdr)) { - m_copydata(m, offset, sizeof(struct tcphdr), - (caddr_t) &tcphdr); - tcp = &tcphdr; - } else - tcp = (struct tcphdr *)(m->m_data + *start); + txq->vxtxq_stats.vmtxs_tso++; /* * For TSO, the size of the protocol header is also * included in the descriptor header size. */ + if (m->m_len < *start + sizeof(struct tcphdr)) { + m_copydata(m, offset, sizeof(struct tcphdr), + (caddr_t) &tcphdr); + tcp = &tcphdr; + } else + tcp = mtodo(m, *start); *start += (tcp->th_off << 2); - } + } else + txq->vxtxq_stats.vmtxs_csum++; return (0); } @@ -2394,18 +2676,17 @@ vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0, struct vmxnet3_txring *txr; struct mbuf *m; bus_dma_tag_t tag; - int maxsegs, error; + int error; txr = &txq->vxtxq_cmd_ring; m = *m0; tag = txr->vxtxr_txtag; - maxsegs = VMXNET3_TX_MAXSEGS; error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0); if (error == 0 || error != EFBIG) return (error); - m = m_collapse(m, M_NOWAIT, maxsegs); + m = m_defrag(m, M_NOWAIT); if (m != NULL) { *m0 = m; error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0); @@ -2415,8 +2696,9 @@ vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0, if (error) { m_freem(*m0); *m0 = NULL; + txq->vxtxq_sc->vmx_stats.vmst_defrag_failed++; } else - txq->vxtxq_sc->vmx_stats.vmst_collapsed++; + txq->vxtxq_sc->vmx_stats.vmst_defragged++; return (error); } @@ -2459,13 +2741,13 @@ vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0) ("%s: mbuf %p with too many segments %d", __func__, m, nsegs)); if (VMXNET3_TXRING_AVAIL(txr) < nsegs) { - txq->vxtxq_stats.vtxrs_full++; + txq->vxtxq_stats.vmtxs_full++; vmxnet3_txq_unload_mbuf(txq, dmap); return (ENOSPC); } else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) { - error = vmxnet3_txq_offload_ctx(m, &etype, &proto, &start); + error = vmxnet3_txq_offload_ctx(txq, m, &etype, &proto, &start); if (error) { - txq->vxtxq_stats.vtxrs_offload_failed++; + txq->vxtxq_stats.vmtxs_offload_failed++; vmxnet3_txq_unload_mbuf(txq, dmap); m_freem(m); *m0 = NULL; @@ -2531,6 +2813,22 @@ vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0) } static void +vmxnet3_txq_update_pending(struct vmxnet3_txqueue *txq) +{ + struct vmxnet3_txring *txr; + + txr = &txq->vxtxq_cmd_ring; + + if (txq->vxtxq_ts->npending > 0) { + txq->vxtxq_ts->npending = 0; + vmxnet3_write_bar0(txq->vxtxq_sc, + VMXNET3_BAR0_TXH(txq->vxtxq_id), txr->vxtxr_head); + } +} + +#ifdef VMXNET3_LEGACY_TX + +static void vmxnet3_start_locked(struct ifnet *ifp) { struct vmxnet3_softc *sc; @@ -2575,11 +2873,7 @@ vmxnet3_start_locked(struct ifnet *ifp) } if (tx > 0) { - if (txq->vxtxq_ts->npending > 0) { - txq->vxtxq_ts->npending = 0; - vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), - txr->vxtxr_head); - } + vmxnet3_txq_update_pending(txq); txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT; } } @@ -2598,6 +2892,153 @@ vmxnet3_start(struct ifnet *ifp) VMXNET3_TXQ_UNLOCK(txq); } +#else /* !VMXNET3_LEGACY_TX */ + +static int +vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *txq, struct mbuf *m) +{ + struct vmxnet3_softc *sc; + struct vmxnet3_txring *txr; + struct buf_ring *br; + struct ifnet *ifp; + int tx, avail, error; + + sc = txq->vxtxq_sc; + br = txq->vxtxq_br; + ifp = sc->vmx_ifp; + txr = &txq->vxtxq_cmd_ring; + tx = 0; + error = 0; + + VMXNET3_TXQ_LOCK_ASSERT(txq); + + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || + sc->vmx_link_active == 0) { + if (m != NULL) + error = drbr_enqueue(ifp, br, m); + return (error); + } + + if (m != NULL) { + error = drbr_enqueue(ifp, br, m); + if (error) + return (error); + } + + while ((avail = VMXNET3_TXRING_AVAIL(txr)) >= 2) { + m = drbr_peek(ifp, br); + if (m == NULL) + break; + + /* Assume worse case if this mbuf is the head of a chain. */ + if (m->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) { + drbr_putback(ifp, br, m); + error = ENOBUFS; + break; + } + + error = vmxnet3_txq_encap(txq, &m); + if (error) { + if (m != NULL) + drbr_putback(ifp, br, m); + else + drbr_advance(ifp, br); + break; + } + drbr_advance(ifp, br); + + tx++; + ETHER_BPF_MTAP(ifp, m); + } + + if (tx > 0) { + vmxnet3_txq_update_pending(txq); + txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT; + } + + return (error); +} + +static int +vmxnet3_txq_mq_start(struct ifnet *ifp, struct mbuf *m) +{ + struct vmxnet3_softc *sc; + struct vmxnet3_txqueue *txq; + int i, ntxq, error; + + sc = ifp->if_softc; + ntxq = sc->vmx_ntxqueues; + + if (m->m_flags & M_FLOWID) + i = m->m_pkthdr.flowid % ntxq; + else + i = curcpu % ntxq; + + txq = &sc->vmx_txq[i]; + + if (VMXNET3_TXQ_TRYLOCK(txq) != 0) { + error = vmxnet3_txq_mq_start_locked(txq, m); + VMXNET3_TXQ_UNLOCK(txq); + } else { + error = drbr_enqueue(ifp, txq->vxtxq_br, m); + taskqueue_enqueue(sc->vmx_tq, &txq->vxtxq_defrtask); + } + + return (error); +} + +static void +vmxnet3_txq_tq_deferred(void *xtxq, int pending) +{ + struct vmxnet3_softc *sc; + struct vmxnet3_txqueue *txq; + + txq = xtxq; + sc = txq->vxtxq_sc; + + VMXNET3_TXQ_LOCK(txq); + if (!drbr_empty(sc->vmx_ifp, txq->vxtxq_br)) + vmxnet3_txq_mq_start_locked(txq, NULL); + VMXNET3_TXQ_UNLOCK(txq); +} + +#endif /* VMXNET3_LEGACY_TX */ + +static void +vmxnet3_txq_start(struct vmxnet3_txqueue *txq) +{ + struct vmxnet3_softc *sc; + struct ifnet *ifp; + + sc = txq->vxtxq_sc; + ifp = sc->vmx_ifp; + +#ifdef VMXNET3_LEGACY_TX + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + vmxnet3_start_locked(ifp); +#else + if (!drbr_empty(ifp, txq->vxtxq_br)) + vmxnet3_txq_mq_start_locked(txq, NULL); +#endif +} + +static void +vmxnet3_tx_start_all(struct vmxnet3_softc *sc) +{ + struct vmxnet3_txqueue *txq; + int i; + + VMXNET3_CORE_LOCK_ASSERT(sc); + + for (i = 0; i < sc->vmx_ntxqueues; i++) { + txq = &sc->vmx_txq[i]; + + VMXNET3_TXQ_LOCK(txq); + vmxnet3_txq_start(txq); + VMXNET3_TXQ_UNLOCK(txq); + } +} + static void vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag) { @@ -2657,9 +3098,7 @@ vmxnet3_set_rxfilter(struct vmxnet3_softc *sc) ifp = sc->vmx_ifp; ds = sc->vmx_ds; - mode = VMXNET3_RXMODE_UCAST; - if (ifp->if_flags & IFF_BROADCAST) - mode |= VMXNET3_RXMODE_BCAST; + mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST; if (ifp->if_flags & IFF_PROMISC) mode |= VMXNET3_RXMODE_PROMISC; if (ifp->if_flags & IFF_ALLMULTI) @@ -2820,6 +3259,30 @@ vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) return (error); } +#ifndef VMXNET3_LEGACY_TX +static void +vmxnet3_qflush(struct ifnet *ifp) +{ + struct vmxnet3_softc *sc; + struct vmxnet3_txqueue *txq; + struct mbuf *m; + int i; + + sc = ifp->if_softc; + + for (i = 0; i < sc->vmx_ntxqueues; i++) { + txq = &sc->vmx_txq[i]; + + VMXNET3_TXQ_LOCK(txq); + while ((m = buf_ring_dequeue_sc(txq->vxtxq_br)) != NULL) + m_freem(m); + VMXNET3_TXQ_UNLOCK(txq); + } + + if_qflush(ifp); +} +#endif + static int vmxnet3_watchdog(struct vmxnet3_txqueue *txq) { @@ -2840,13 +3303,80 @@ vmxnet3_watchdog(struct vmxnet3_txqueue *txq) } static void -vmxnet3_refresh_stats(struct vmxnet3_softc *sc) +vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc) { vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS); } static void +vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *txq, + struct vmxnet3_txq_stats *accum) +{ + struct vmxnet3_txq_stats *st; + + st = &txq->vxtxq_stats; + + accum->vmtxs_opackets += st->vmtxs_opackets; + accum->vmtxs_obytes += st->vmtxs_obytes; + accum->vmtxs_omcasts += st->vmtxs_omcasts; + accum->vmtxs_csum += st->vmtxs_csum; + accum->vmtxs_tso += st->vmtxs_tso; + accum->vmtxs_full += st->vmtxs_full; + accum->vmtxs_offload_failed += st->vmtxs_offload_failed; +} + +static void +vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *rxq, + struct vmxnet3_rxq_stats *accum) +{ + struct vmxnet3_rxq_stats *st; + + st = &rxq->vxrxq_stats; + + accum->vmrxs_ipackets += st->vmrxs_ipackets; + accum->vmrxs_ibytes += st->vmrxs_ibytes; + accum->vmrxs_iqdrops += st->vmrxs_iqdrops; + accum->vmrxs_ierrors += st->vmrxs_ierrors; +} + +static void +vmxnet3_accumulate_stats(struct vmxnet3_softc *sc) +{ + struct ifnet *ifp; + struct vmxnet3_statistics *st; + struct vmxnet3_txq_stats txaccum; + struct vmxnet3_rxq_stats rxaccum; + int i; + + ifp = sc->vmx_ifp; + st = &sc->vmx_stats; + + bzero(&txaccum, sizeof(struct vmxnet3_txq_stats)); + bzero(&rxaccum, sizeof(struct vmxnet3_rxq_stats)); + + for (i = 0; i < sc->vmx_ntxqueues; i++) + vmxnet3_txq_accum_stats(&sc->vmx_txq[i], &txaccum); + for (i = 0; i < sc->vmx_nrxqueues; i++) + vmxnet3_rxq_accum_stats(&sc->vmx_rxq[i], &rxaccum); + + /* + * With the exception of if_ierrors, these ifnet statistics are + * only updated in the driver, so just set them to our accumulated + * values. if_ierrors is updated in ether_input() for malformed + * frames that we should have already discarded. + */ + ifp->if_ipackets = rxaccum.vmrxs_ipackets; + ifp->if_iqdrops = rxaccum.vmrxs_iqdrops; + ifp->if_ierrors = rxaccum.vmrxs_ierrors; + ifp->if_opackets = txaccum.vmtxs_opackets; +#ifndef VMXNET3_LEGACY_TX + ifp->if_obytes = txaccum.vmtxs_obytes; + ifp->if_omcasts = txaccum.vmtxs_omcasts; +#endif +} + +static void vmxnet3_tick(void *xsc) { struct vmxnet3_softc *sc; @@ -2858,7 +3388,9 @@ vmxnet3_tick(void *xsc) timedout = 0; VMXNET3_CORE_LOCK_ASSERT(sc); - vmxnet3_refresh_stats(sc); + + vmxnet3_accumulate_stats(sc); + vmxnet3_refresh_host_stats(sc); for (i = 0; i < sc->vmx_ntxqueues; i++) timedout |= vmxnet3_watchdog(&sc->vmx_txq[i]); @@ -2975,10 +3507,20 @@ vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq, NULL, "Transmit Queue"); txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD, + &stats->vmtxs_opackets, "Transmit packets"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD, + &stats->vmtxs_obytes, "Transmit bytes"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD, + &stats->vmtxs_omcasts, "Transmit multicasts"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, + &stats->vmtxs_csum, "Transmit checksum offloaded"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD, + &stats->vmtxs_tso, "Transmit TCP segmentation offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ringfull", CTLFLAG_RD, - &stats->vtxrs_full, "Tx ring full"); + &stats->vmtxs_full, "Transmit ring full"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "offload_failed", CTLFLAG_RD, - &stats->vtxrs_offload_failed, "Tx checksum offload failed"); + &stats->vmtxs_offload_failed, "Transmit checksum offload failed"); /* * Add statistics reported by the host. These are updated once @@ -3023,6 +3565,15 @@ vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq, NULL, "Receive Queue"); rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD, + &stats->vmrxs_ipackets, "Receive packets"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD, + &stats->vmrxs_ibytes, "Receive bytes"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD, + &stats->vmrxs_iqdrops, "Receive drops"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD, + &stats->vmrxs_ierrors, "Receive errors"); + /* * Add statistics reported by the host. These are updated once * per second. @@ -3052,7 +3603,6 @@ vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq, &rxstats->error, "Errors"); } -#ifdef VMXNET3_DEBUG_SYSCTL static void vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) @@ -3111,7 +3661,6 @@ vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc, &rxq->vxrxq_comp_ring.vxcr_gen, 0, ""); } } -#endif static void vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc, @@ -3124,9 +3673,7 @@ vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc, for (i = 0; i < sc->vmx_nrxqueues; i++) vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child); -#ifdef VMXNET3_DEBUG_SYSCTL vmxnet3_setup_debug_sysctl(sc, ctx, child); -#endif } static void @@ -3143,14 +3690,21 @@ vmxnet3_setup_sysctl(struct vmxnet3_softc *sc) tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); + SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_ntxqueues", CTLFLAG_RD, + &sc->vmx_max_ntxqueues, 0, "Maximum number of Tx queues"); + SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_nrxqueues", CTLFLAG_RD, + &sc->vmx_max_nrxqueues, 0, "Maximum number of Rx queues"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "ntxqueues", CTLFLAG_RD, &sc->vmx_ntxqueues, 0, "Number of Tx queues"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nrxqueues", CTLFLAG_RD, &sc->vmx_nrxqueues, 0, "Number of Rx queues"); stats = &sc->vmx_stats; - SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "collapsed", CTLFLAG_RD, - &stats->vmst_collapsed, 0, "Tx mbuf chains collapsed"); + SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defragged", CTLFLAG_RD, + &stats->vmst_defragged, 0, "Tx mbuf chains defragged"); + SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defrag_failed", CTLFLAG_RD, + &stats->vmst_defrag_failed, 0, + "Tx mbuf dropped because defrag failed"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mgetcl_failed", CTLFLAG_RD, &stats->vmst_mgetcl_failed, 0, "mbuf cluster allocation failed"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mbuf_load_failed", CTLFLAG_RD, diff --git a/sys/dev/vmware/vmxnet3/if_vmxreg.h b/sys/dev/vmware/vmxnet3/if_vmxreg.h index 8b554b5..5e22920 100644 --- a/sys/dev/vmware/vmxnet3/if_vmxreg.h +++ b/sys/dev/vmware/vmxnet3/if_vmxreg.h @@ -170,6 +170,12 @@ struct vmxnet3_rxcompdesc { uint32_t gen:1; } __packed; +#define VMXNET3_RCD_RSS_TYPE_NONE 0 +#define VMXNET3_RCD_RSS_TYPE_IPV4 1 +#define VMXNET3_RCD_RSS_TYPE_TCPIPV4 2 +#define VMXNET3_RCD_RSS_TYPE_IPV6 3 +#define VMXNET3_RCD_RSS_TYPE_TCPIPV6 4 + #define VMXNET3_REV1_MAGIC 0XBABEFEE1 #define VMXNET3_GOS_UNKNOWN 0x00 @@ -313,4 +319,25 @@ struct vmxnet3_rxq_shared { uint8_t pad4[88]; } __packed; +#define UPT1_RSS_HASH_TYPE_NONE 0x00 +#define UPT1_RSS_HASH_TYPE_IPV4 0x01 +#define UPT1_RSS_HASH_TYPE_TCP_IPV4 0x02 +#define UPT1_RSS_HASH_TYPE_IPV6 0x04 +#define UPT1_RSS_HASH_TYPE_TCP_IPV6 0x08 + +#define UPT1_RSS_HASH_FUNC_NONE 0x00 +#define UPT1_RSS_HASH_FUNC_TOEPLITZ 0x01 + +#define UPT1_RSS_MAX_KEY_SIZE 40 +#define UPT1_RSS_MAX_IND_TABLE_SIZE 128 + +struct vmxnet3_rss_shared { + uint16_t hash_type; + uint16_t hash_func; + uint16_t hash_key_size; + uint16_t ind_table_size; + uint8_t hash_key[UPT1_RSS_MAX_KEY_SIZE]; + uint8_t ind_table[UPT1_RSS_MAX_IND_TABLE_SIZE]; +} __packed; + #endif /* _IF_VMXREG_H */ diff --git a/sys/dev/vmware/vmxnet3/if_vmxvar.h b/sys/dev/vmware/vmxnet3/if_vmxvar.h index c905eb8..50eb1c3 100644 --- a/sys/dev/vmware/vmxnet3/if_vmxvar.h +++ b/sys/dev/vmware/vmxnet3/if_vmxvar.h @@ -31,10 +31,10 @@ struct vmxnet3_dma_alloc { }; /* - * The number of Rx/Tx queues this driver supports. + * The number of Rx/Tx queues this driver prefers. */ -#define VMXNET3_RX_QUEUES 1 -#define VMXNET3_TX_QUEUES 1 +#define VMXNET3_DEF_RX_QUEUES 8 +#define VMXNET3_DEF_TX_QUEUES 8 /* * The number of Rx rings in each Rx queue. @@ -119,13 +119,21 @@ struct vmxnet3_comp_ring { }; struct vmxnet3_txq_stats { - uint64_t vtxrs_full; - uint64_t vtxrs_offload_failed; + uint64_t vmtxs_opackets; /* if_opackets */ + uint64_t vmtxs_obytes; /* if_obytes */ + uint64_t vmtxs_omcasts; /* if_omcasts */ + uint64_t vmtxs_csum; + uint64_t vmtxs_tso; + uint64_t vmtxs_full; + uint64_t vmtxs_offload_failed; }; struct vmxnet3_txqueue { struct mtx vxtxq_mtx; struct vmxnet3_softc *vxtxq_sc; +#ifndef VMXNET3_TX_LEGACY + struct buf_ring *vxtxq_br; +#endif int vxtxq_id; int vxtxq_intr_idx; int vxtxq_watchdog; @@ -134,8 +142,11 @@ struct vmxnet3_txqueue { struct vmxnet3_txq_stats vxtxq_stats; struct vmxnet3_txq_shared *vxtxq_ts; struct sysctl_oid_list *vxtxq_sysctl; +#ifndef VMXNET3_TX_LEGACY + struct task vxtxq_defrtask; +#endif char vxtxq_name[16]; -}; +} __aligned(CACHE_LINE_SIZE); #define VMXNET3_TXQ_LOCK(_txq) mtx_lock(&(_txq)->vxtxq_mtx) #define VMXNET3_TXQ_TRYLOCK(_txq) mtx_trylock(&(_txq)->vxtxq_mtx) @@ -146,7 +157,10 @@ struct vmxnet3_txqueue { mtx_assert(&(_txq)->vxtxq_mtx, MA_NOTOWNED) struct vmxnet3_rxq_stats { - + uint64_t vmrxs_ipackets; /* if_ipackets */ + uint64_t vmrxs_ibytes; /* if_ibytes */ + uint64_t vmrxs_iqdrops; /* if_iqdrops */ + uint64_t vmrxs_ierrors; /* if_ierrors */ }; struct vmxnet3_rxqueue { @@ -160,7 +174,7 @@ struct vmxnet3_rxqueue { struct vmxnet3_rxq_shared *vxrxq_rs; struct sysctl_oid_list *vxrxq_sysctl; char vxrxq_name[16]; -}; +} __aligned(CACHE_LINE_SIZE); #define VMXNET3_RXQ_LOCK(_rxq) mtx_lock(&(_rxq)->vxrxq_mtx) #define VMXNET3_RXQ_UNLOCK(_rxq) mtx_unlock(&(_rxq)->vxrxq_mtx) @@ -170,10 +184,10 @@ struct vmxnet3_rxqueue { mtx_assert(&(_rxq)->vxrxq_mtx, MA_NOTOWNED) struct vmxnet3_statistics { - uint32_t vmst_collapsed; + uint32_t vmst_defragged; + uint32_t vmst_defrag_failed; uint32_t vmst_mgetcl_failed; uint32_t vmst_mbuf_load_failed; - }; struct vmxnet3_interrupt { @@ -188,6 +202,7 @@ struct vmxnet3_softc { struct vmxnet3_driver_shared *vmx_ds; uint32_t vmx_flags; #define VMXNET3_FLAG_NO_MSIX 0x0001 +#define VMXNET3_FLAG_RSS 0x0002 struct vmxnet3_rxqueue *vmx_rxq; struct vmxnet3_txqueue *vmx_txq; @@ -219,13 +234,20 @@ struct vmxnet3_softc { struct vmxnet3_interrupt vmx_intrs[VMXNET3_MAX_INTRS]; struct mtx vmx_mtx; +#ifndef VMXNET3_LEGACY_TX + struct taskqueue *vmx_tq; +#endif uint8_t *vmx_mcast; void *vmx_qs; + struct vmxnet3_rss_shared *vmx_rss; struct callout vmx_tick; struct vmxnet3_dma_alloc vmx_ds_dma; struct vmxnet3_dma_alloc vmx_qs_dma; struct vmxnet3_dma_alloc vmx_mcast_dma; + struct vmxnet3_dma_alloc vmx_rss_dma; struct ifmedia vmx_media; + int vmx_max_ntxqueues; + int vmx_max_nrxqueues; eventhandler_tag vmx_vlan_attach; eventhandler_tag vmx_vlan_detach; uint32_t vmx_vlan_filter[4096/32]; @@ -252,7 +274,9 @@ struct vmxnet3_softc { * any TSO packets based on the number of segments. */ #define VMXNET3_TX_MAXSEGS 32 -#define VMXNET3_TSO_MAXSIZE 65550 +#define VMXNET3_TX_MAXSIZE (VMXNET3_TX_MAXSEGS * MCLBYTES) +#define VMXNET3_TSO_MAXSIZE \ + (VMXNET3_TX_MAXSIZE - sizeof(struct ether_vlan_header)) /* * Maximum support Tx segments size. The length field in the @@ -280,6 +304,12 @@ struct vmxnet3_softc { #define VMXNET3_WATCHDOG_TIMEOUT 5 /* + * Number of slots in the Tx bufrings. This value matches most other + * multiqueue drivers. + */ +#define VMXNET3_DEF_BUFRING_SIZE 4096 + +/* * IP protocols that we can perform Tx checksum offloading of. */ #define VMXNET3_CSUM_OFFLOAD (CSUM_TCP | CSUM_UDP) |