Import multiqueue VirtIO net driver from my user/bryanv/vtnetmq branch

This is a significant rewrite of much of the previous driver; lots of misc. cleanup was also performed, and support for a few other minor features was also added.
author: bryanv <bryanv@FreeBSD.org> 2013-09-01 04:33:47 +0000
committer: bryanv <bryanv@FreeBSD.org> 2013-09-01 04:33:47 +0000
commit: c4011595924e79ff02f7f896f09c0b57011ce00d (patch)
tree: 3e1b266e931116c1d854704fde099d838a45bb58
parent: 4174a823010710068b29ea7f013486fc98f3ffbe (diff)
download: FreeBSD-src-c4011595924e79ff02f7f896f09c0b57011ce00d.zip
FreeBSD-src-c4011595924e79ff02f7f896f09c0b57011ce00d.tar.gz
4 files changed, 2571 insertions, 1326 deletions
diff --git a/share/man/man4/vtnet.4 b/share/man/man4/vtnet.4
index 8d4d202..c7b2189 100644
--- a/share/man/man4/vtnet.4
+++ b/share/man/man4/vtnet.4
@@ -69,14 +69,30 @@ prompt before booting the kernel or stored in
 .Xr loader.conf 5 .
 .Bl -tag -width "xxxxxx"
 .It Va hw.vtnet.csum_disable
+.It Va hw.vtnet. Ns Ar X Ns Va .csum_disable
 This tunable disables receive and send checksum offload.
 The default value is 0.
 .It Va hw.vtnet.tso_disable
+.It Va hw.vtnet. Ns Ar X Ns Va .tso_disable
 This tunable disables TSO.
 The default value is 0.
 .It Va hw.vtnet.lro_disable
+.It Va hw.vtnet. Ns Ar X Ns Va .lro_disable
 This tunable disables LRO.
 The default value is 0.
+.It Va hw.vtnet.mq_disable
+.It Va hw.vtnet. Ns Ar X Ns Va .mq_disable
+This tunable disables multiqueue.
+The default value is 0.
+.It Va hw.vtnet.mq_max_pairs
+.It Va hw.vtnet. Ns Ar X Ns Va .mq_max_pairs
+This tunable sets the maximum number of transmit and receive queue pairs.
+Multiple queues are only supported when the Multiqueue feature is negotiated.
+This driver supports a maximum of 8 queue pairs.
+The number of queue pairs used is the lesser of the maximum supported by the
+driver and the hypervisor, the number of CPUs present in the guest, and this
+tunable if not zero.
+The default value is 0.
 .El
 .Sh SEE ALSO
 .Xr arp 4 ,
diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c
index 89604d1..f757394 100644
--- a/sys/dev/virtio/network/if_vtnet.c
+++ b/sys/dev/virtio/network/if_vtnet.c
@@ -29,10 +29,6 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#ifdef HAVE_KERNEL_OPTION_HEADERS
-#include "opt_device_polling.h"
-#endif
-
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -46,6 +42,9 @@ __FBSDID("$FreeBSD$");
 #include <sys/sglist.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
+#include <sys/taskqueue.h>
+#include <sys/smp.h>
+#include <machine/smp.h>
 
 #include <vm/uma.h>
 
@@ -63,6 +62,7 @@ __FBSDID("$FreeBSD$");
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
 #include <netinet/udp.h>
 #include <netinet/tcp.h>
 #include <netinet/sctp.h>
@@ -79,6 +79,9 @@ __FBSDID("$FreeBSD$");
 
 #include "virtio_if.h"
 
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
 static int	vtnet_modevent(module_t, int, void *);
 
 static int	vtnet_probe(device_t);
@@ -87,82 +90,139 @@ static int	vtnet_detach(device_t);
 static int	vtnet_suspend(device_t);
 static int	vtnet_resume(device_t);
 static int	vtnet_shutdown(device_t);
+static int	vtnet_attach_completed(device_t);
 static int	vtnet_config_change(device_t);
 
 static void	vtnet_negotiate_features(struct vtnet_softc *);
+static void	vtnet_setup_features(struct vtnet_softc *);
+static int	vtnet_init_rxq(struct vtnet_softc *, int);
+static int	vtnet_init_txq(struct vtnet_softc *, int);
+static int	vtnet_alloc_rxtx_queues(struct vtnet_softc *);
+static void	vtnet_free_rxtx_queues(struct vtnet_softc *);
+static int	vtnet_alloc_rx_filters(struct vtnet_softc *);
+static void	vtnet_free_rx_filters(struct vtnet_softc *);
 static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
-static void	vtnet_get_hwaddr(struct vtnet_softc *);
-static void	vtnet_set_hwaddr(struct vtnet_softc *);
-static int	vtnet_is_link_up(struct vtnet_softc *);
-static void	vtnet_update_link_status(struct vtnet_softc *);
-static void	vtnet_watchdog(struct vtnet_softc *);
+static int	vtnet_setup_interface(struct vtnet_softc *);
 static int	vtnet_change_mtu(struct vtnet_softc *, int);
 static int	vtnet_ioctl(struct ifnet *, u_long, caddr_t);
 
-static int	vtnet_init_rx_vq(struct vtnet_softc *);
-static void	vtnet_free_rx_mbufs(struct vtnet_softc *);
-static void	vtnet_free_tx_mbufs(struct vtnet_softc *);
-static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
-
-#ifdef DEVICE_POLLING
-static poll_handler_t vtnet_poll;
-#endif
-
-static struct mbuf * vtnet_alloc_rxbuf(struct vtnet_softc *, int,
-		    struct mbuf **);
-static int	vtnet_replace_rxbuf(struct vtnet_softc *,
+static int	vtnet_rxq_populate(struct vtnet_rxq *);
+static void	vtnet_rxq_free_mbufs(struct vtnet_rxq *);
+static struct mbuf *
+		vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
+static int	vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *,
 		    struct mbuf *, int);
-static int	vtnet_newbuf(struct vtnet_softc *);
-static void	vtnet_discard_merged_rxbuf(struct vtnet_softc *, int);
-static void	vtnet_discard_rxbuf(struct vtnet_softc *, struct mbuf *);
-static int	vtnet_enqueue_rxbuf(struct vtnet_softc *, struct mbuf *);
-static void	vtnet_vlan_tag_remove(struct mbuf *);
-static int	vtnet_rx_csum(struct vtnet_softc *, struct mbuf *,
+static int	vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
+static int	vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
+static int	vtnet_rxq_new_buf(struct vtnet_rxq *);
+static int	vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
+		     struct virtio_net_hdr *);
+static void	vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
+static void	vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
+static int	vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
+static void	vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
 		    struct virtio_net_hdr *);
-static int	vtnet_rxeof_merged(struct vtnet_softc *, struct mbuf *, int);
-static int	vtnet_rxeof(struct vtnet_softc *, int, int *);
+static int	vtnet_rxq_eof(struct vtnet_rxq *);
 static void	vtnet_rx_vq_intr(void *);
+static void	vtnet_rxq_tq_intr(void *, int);
 
-static void	vtnet_txeof(struct vtnet_softc *);
-static struct mbuf * vtnet_tx_offload(struct vtnet_softc *, struct mbuf *,
+static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
+static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
+		    int *, int *, int *);
+static int	vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
+		    int, struct virtio_net_hdr *);
+static struct mbuf *
+		vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
 		    struct virtio_net_hdr *);
-static int	vtnet_enqueue_txbuf(struct vtnet_softc *, struct mbuf **,
+static int	vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
 		    struct vtnet_tx_header *);
-static int	vtnet_encap(struct vtnet_softc *, struct mbuf **);
-static void	vtnet_start_locked(struct ifnet *);
+static int	vtnet_txq_encap(struct vtnet_txq *, struct mbuf **);
+#ifdef VTNET_LEGACY_TX
+static void	vtnet_start_locked(struct vtnet_txq *, struct ifnet *);
 static void	vtnet_start(struct ifnet *);
-static void	vtnet_tick(void *);
+#else
+static int	vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
+static int	vtnet_txq_mq_start(struct ifnet *, struct mbuf *);
+static void	vtnet_txq_tq_deferred(void *, int);
+#endif
+static void	vtnet_txq_tq_intr(void *, int);
+static void	vtnet_txq_eof(struct vtnet_txq *);
 static void	vtnet_tx_vq_intr(void *);
+static void	vtnet_tx_start_all(struct vtnet_softc *);
+
+#ifndef VTNET_LEGACY_TX
+static void	vtnet_qflush(struct ifnet *);
+#endif
+
+static int	vtnet_watchdog(struct vtnet_txq *);
+static void	vtnet_rxq_accum_stats(struct vtnet_rxq *,
+		    struct vtnet_rxq_stats *);
+static void	vtnet_txq_accum_stats(struct vtnet_txq *,
+		    struct vtnet_txq_stats *);
+static void	vtnet_accumulate_stats(struct vtnet_softc *);
+static void	vtnet_tick(void *);
 
+static void	vtnet_start_taskqueues(struct vtnet_softc *);
+static void	vtnet_free_taskqueues(struct vtnet_softc *);
+static void	vtnet_drain_taskqueues(struct vtnet_softc *);
+
+static void	vtnet_drain_rxtx_queues(struct vtnet_softc *);
+static void	vtnet_stop_rendezvous(struct vtnet_softc *);
 static void	vtnet_stop(struct vtnet_softc *);
+static int	vtnet_virtio_reinit(struct vtnet_softc *);
+static void	vtnet_init_rx_filters(struct vtnet_softc *);
+static int	vtnet_init_rx_queues(struct vtnet_softc *);
+static int	vtnet_init_tx_queues(struct vtnet_softc *);
+static int	vtnet_init_rxtx_queues(struct vtnet_softc *);
+static void	vtnet_set_active_vq_pairs(struct vtnet_softc *);
 static int	vtnet_reinit(struct vtnet_softc *);
 static void	vtnet_init_locked(struct vtnet_softc *);
 static void	vtnet_init(void *);
 
+static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
 static void	vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
 		    struct sglist *, int, int);
-
-static void	vtnet_rx_filter(struct vtnet_softc *sc);
+static int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
+static int	vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
 static int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
 static int	vtnet_set_promisc(struct vtnet_softc *, int);
 static int	vtnet_set_allmulti(struct vtnet_softc *, int);
+static void	vtnet_attach_disable_promisc(struct vtnet_softc *);
+static void	vtnet_rx_filter(struct vtnet_softc *);
 static void	vtnet_rx_filter_mac(struct vtnet_softc *);
-
 static int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
 static void	vtnet_rx_filter_vlan(struct vtnet_softc *);
-static void	vtnet_set_vlan_filter(struct vtnet_softc *, int, uint16_t);
+static void	vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
 static void	vtnet_register_vlan(void *, struct ifnet *, uint16_t);
 static void	vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
 
+static int	vtnet_is_link_up(struct vtnet_softc *);
+static void	vtnet_update_link_status(struct vtnet_softc *);
 static int	vtnet_ifmedia_upd(struct ifnet *);
 static void	vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
+static void	vtnet_get_hwaddr(struct vtnet_softc *);
+static void	vtnet_set_hwaddr(struct vtnet_softc *);
+static void	vtnet_vlan_tag_remove(struct mbuf *);
 
-static void	vtnet_add_statistics(struct vtnet_softc *);
-
-static int	vtnet_enable_rx_intr(struct vtnet_softc *);
-static int	vtnet_enable_tx_intr(struct vtnet_softc *);
-static void	vtnet_disable_rx_intr(struct vtnet_softc *);
-static void	vtnet_disable_tx_intr(struct vtnet_softc *);
+static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
+		    struct sysctl_oid_list *, struct vtnet_rxq *);
+static void	vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
+		    struct sysctl_oid_list *, struct vtnet_txq *);
+static void	vtnet_setup_queue_sysctl(struct vtnet_softc *);
+static void	vtnet_setup_sysctl(struct vtnet_softc *);
+
+static int	vtnet_rxq_enable_intr(struct vtnet_rxq *);
+static void	vtnet_rxq_disable_intr(struct vtnet_rxq *);
+static int	vtnet_txq_enable_intr(struct vtnet_txq *);
+static void	vtnet_txq_disable_intr(struct vtnet_txq *);
+static void	vtnet_enable_rx_interrupts(struct vtnet_softc *);
+static void	vtnet_enable_tx_interrupts(struct vtnet_softc *);
+static void	vtnet_enable_interrupts(struct vtnet_softc *);
+static void	vtnet_disable_rx_interrupts(struct vtnet_softc *);
+static void	vtnet_disable_tx_interrupts(struct vtnet_softc *);
+static void	vtnet_disable_interrupts(struct vtnet_softc *);
+
+static int	vtnet_tunable_int(struct vtnet_softc *, const char *, int);
 
 /* Tunables. */
 static int vtnet_csum_disable = 0;
@@ -171,16 +231,25 @@ static int vtnet_tso_disable = 0;
 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
 static int vtnet_lro_disable = 0;
 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
+static int vtnet_mq_disable = 0;
+TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
+static int vtnet_mq_max_pairs = 0;
+TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
+static int vtnet_rx_process_limit = 512;
+TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
 
 /*
- * Reducing the number of transmit completed interrupts can
- * improve performance. To do so, the define below keeps the
- * Tx vq interrupt disabled and adds calls to vtnet_txeof()
- * in the start and watchdog paths. The price to pay for this
- * is the m_free'ing of transmitted mbufs may be delayed until
- * the watchdog fires.
+ * Reducing the number of transmit completed interrupts can improve
+ * performance. To do so, the define below keeps the Tx vq interrupt
+ * disabled and adds calls to vtnet_txeof() in the start and watchdog
+ * paths. The price to pay for this is the m_free'ing of transmitted
+ * mbufs may be delayed until the watchdog fires.
+ *
+ * BMV: Reintroduce this later as a run-time option, if it makes
+ * sense after the EVENT_IDX feature is supported.
+ *
+ * #define VTNET_TX_INTR_MODERATION
  */
-#define VTNET_TX_INTR_MODERATION
 
 static uma_zone_t vtnet_tx_header_zone;
 
@@ -203,21 +272,25 @@ static struct virtio_feature_desc vtnet_feature_desc[] = {
 	{ VIRTIO_NET_F_CTRL_RX,		"RxMode"	},
 	{ VIRTIO_NET_F_CTRL_VLAN,	"VLanFilter"	},
 	{ VIRTIO_NET_F_CTRL_RX_EXTRA,	"RxModeExtra"	},
+	{ VIRTIO_NET_F_GUEST_ANNOUNCE,	"GuestAnnounce"	},
+	{ VIRTIO_NET_F_MQ,		"Multiqueue"	},
+	{ VIRTIO_NET_F_CTRL_MAC_ADDR,	"SetMacAddress"	},
 
 	{ 0, NULL }
 };
 
 static device_method_t vtnet_methods[] = {
 	/* Device methods. */
-	DEVMETHOD(device_probe,		vtnet_probe),
-	DEVMETHOD(device_attach,	vtnet_attach),
-	DEVMETHOD(device_detach,	vtnet_detach),
-	DEVMETHOD(device_suspend,	vtnet_suspend),
-	DEVMETHOD(device_resume,	vtnet_resume),
-	DEVMETHOD(device_shutdown,	vtnet_shutdown),
+	DEVMETHOD(device_probe,			vtnet_probe),
+	DEVMETHOD(device_attach,		vtnet_attach),
+	DEVMETHOD(device_detach,		vtnet_detach),
+	DEVMETHOD(device_suspend,		vtnet_suspend),
+	DEVMETHOD(device_resume,		vtnet_resume),
+	DEVMETHOD(device_shutdown,		vtnet_shutdown),
 
 	/* VirtIO methods. */
-	DEVMETHOD(virtio_config_change, vtnet_config_change),
+	DEVMETHOD(virtio_attach_completed,	vtnet_attach_completed),
+	DEVMETHOD(virtio_config_change,		vtnet_config_change),
 
 	DEVMETHOD_END
 };
@@ -282,56 +355,31 @@ static int
 vtnet_attach(device_t dev)
 {
 	struct vtnet_softc *sc;
-	struct ifnet *ifp;
-	int tx_size, error;
+	int error;
 
 	sc = device_get_softc(dev);
 	sc->vtnet_dev = dev;
 
-	VTNET_LOCK_INIT(sc);
-	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_MTX(sc), 0);
-
-	ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
-	    vtnet_ifmedia_sts);
-	ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
-	ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
-
-	vtnet_add_statistics(sc);
-
+	/* Register our feature descriptions. */
 	virtio_set_feature_desc(dev, vtnet_feature_desc);
-	vtnet_negotiate_features(sc);
-
-	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
-		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
-		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
-	} else
-		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
-
-	sc->vtnet_rx_mbuf_size = MCLBYTES;
-	sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc);
-
-	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
-		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
 
-		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX)) {
-			sc->vtnet_mac_filter = malloc(
-			    sizeof(struct vtnet_mac_filter), M_DEVBUF,
-			    M_NOWAIT | M_ZERO);
-			if (sc->vtnet_mac_filter == NULL) {
-				device_printf(dev,
-				    "cannot allocate mac filter table\n");
-				error = ENOMEM;
-				goto fail;
-			}
+	VTNET_CORE_LOCK_INIT(sc);
+	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
 
-			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
-		}
+	vtnet_setup_sysctl(sc);
+	vtnet_setup_features(sc);
 
-		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
-			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
+	error = vtnet_alloc_rx_filters(sc);
+	if (error) {
+		device_printf(dev, "cannot allocate Rx filters\n");
+		goto fail;
 	}
 
-	vtnet_get_hwaddr(sc);
+	error = vtnet_alloc_rxtx_queues(sc);
+	if (error) {
+		device_printf(dev, "cannot allocate queues\n");
+		goto fail;
+	}
 
 	error = vtnet_alloc_virtqueues(sc);
 	if (error) {
@@ -339,111 +387,21 @@ vtnet_attach(device_t dev)
 		goto fail;
 	}
 
-	ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
-	if (ifp == NULL) {
-		device_printf(dev, "cannot allocate ifnet structure\n");
-		error = ENOSPC;
+	error = vtnet_setup_interface(sc);
+	if (error) {
+		device_printf(dev, "cannot setup interface\n");
 		goto fail;
 	}
 
-	ifp->if_softc = sc;
-	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
-	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
-	ifp->if_init = vtnet_init;
-	ifp->if_start = vtnet_start;
-	ifp->if_ioctl = vtnet_ioctl;
-
-	sc->vtnet_rx_size = virtqueue_size(sc->vtnet_rx_vq);
-	sc->vtnet_rx_process_limit = sc->vtnet_rx_size;
-
-	tx_size = virtqueue_size(sc->vtnet_tx_vq);
-	sc->vtnet_tx_size = tx_size;
-	IFQ_SET_MAXLEN(&ifp->if_snd, tx_size - 1);
-	ifp->if_snd.ifq_drv_maxlen = tx_size - 1;
-	IFQ_SET_READY(&ifp->if_snd);
-
-	ether_ifattach(ifp, sc->vtnet_hwaddr);
-
-	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
-		ifp->if_capabilities |= IFCAP_LINKSTATE;
-
-	/* Tell the upper layer(s) we support long frames. */
-	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
-	ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
-
-	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
-		ifp->if_capabilities |= IFCAP_TXCSUM;
-
-		if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
-			ifp->if_capabilities |= IFCAP_TSO4;
-		if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
-			ifp->if_capabilities |= IFCAP_TSO6;
-		if (ifp->if_capabilities & IFCAP_TSO)
-			ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
-
-		if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
-			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
-	}
-
-	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
-		ifp->if_capabilities |= IFCAP_RXCSUM;
-
-		if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
-		    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
-			ifp->if_capabilities |= IFCAP_LRO;
-	}
-
-	if (ifp->if_capabilities & IFCAP_HWCSUM) {
-		/*
-		 * VirtIO does not support VLAN tagging, but we can fake
-		 * it by inserting and removing the 802.1Q header during
-		 * transmit and receive. We are then able to do checksum
-		 * offloading of VLAN frames.
-		 */
-		ifp->if_capabilities |=
-		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
-	}
-
-	ifp->if_capenable = ifp->if_capabilities;
-
-	/*
-	 * Capabilities after here are not enabled by default.
-	 */
-
-	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
-		ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
-
-		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
-		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
-		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
-		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
-	}
-
-#ifdef DEVICE_POLLING
-	ifp->if_capabilities |= IFCAP_POLLING;
-#endif
-
 	error = virtio_setup_intr(dev, INTR_TYPE_NET);
 	if (error) {
 		device_printf(dev, "cannot setup virtqueue interrupts\n");
-		ether_ifdetach(ifp);
+		/* BMV: This will crash if during boot! */
+		ether_ifdetach(sc->vtnet_ifp);
 		goto fail;
 	}
 
-	/*
-	 * Device defaults to promiscuous mode for backwards
-	 * compatibility. Turn it off if possible.
-	 */
-	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
-		VTNET_LOCK(sc);
-		if (vtnet_set_promisc(sc, 0) != 0) {
-			ifp->if_flags |= IFF_PROMISC;
-			device_printf(dev,
-			    "cannot disable promiscuous mode\n");
-		}
-		VTNET_UNLOCK(sc);
-	} else
-		ifp->if_flags |= IFF_PROMISC;
+	vtnet_start_taskqueues(sc);
 
 fail:
 	if (error)
@@ -461,24 +419,19 @@ vtnet_detach(device_t dev)
 	sc = device_get_softc(dev);
 	ifp = sc->vtnet_ifp;
 
-	KASSERT(mtx_initialized(VTNET_MTX(sc)),
-	    ("vtnet mutex not initialized"));
-
-#ifdef DEVICE_POLLING
-	if (ifp != NULL && ifp->if_capenable & IFCAP_POLLING)
-		ether_poll_deregister(ifp);
-#endif
-
 	if (device_is_attached(dev)) {
-		VTNET_LOCK(sc);
+		VTNET_CORE_LOCK(sc);
 		vtnet_stop(sc);
-		VTNET_UNLOCK(sc);
+		VTNET_CORE_UNLOCK(sc);
 
 		callout_drain(&sc->vtnet_tick_ch);
+		vtnet_drain_taskqueues(sc);
 
 		ether_ifdetach(ifp);
 	}
 
+	vtnet_free_taskqueues(sc);
+
 	if (sc->vtnet_vlan_attach != NULL) {
 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
 		sc->vtnet_vlan_attach = NULL;
@@ -488,25 +441,20 @@ vtnet_detach(device_t dev)
 		sc->vtnet_vlan_detach = NULL;
 	}
 
-	if (sc->vtnet_mac_filter != NULL) {
-		free(sc->vtnet_mac_filter, M_DEVBUF);
-		sc->vtnet_mac_filter = NULL;
-	}
+	ifmedia_removeall(&sc->vtnet_media);
 
 	if (ifp != NULL) {
 		if_free(ifp);
 		sc->vtnet_ifp = NULL;
 	}
 
-	if (sc->vtnet_rx_vq != NULL)
-		vtnet_free_rx_mbufs(sc);
-	if (sc->vtnet_tx_vq != NULL)
-		vtnet_free_tx_mbufs(sc);
+	vtnet_free_rxtx_queues(sc);
+	vtnet_free_rx_filters(sc);
+
 	if (sc->vtnet_ctrl_vq != NULL)
 		vtnet_free_ctrl_vq(sc);
 
-	ifmedia_removeall(&sc->vtnet_media);
-	VTNET_LOCK_DESTROY(sc);
+	VTNET_CORE_LOCK_DESTROY(sc);
 
 	return (0);
 }
@@ -518,10 +466,10 @@ vtnet_suspend(device_t dev)
 
 	sc = device_get_softc(dev);
 
-	VTNET_LOCK(sc);
+	VTNET_CORE_LOCK(sc);
 	vtnet_stop(sc);
 	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
-	VTNET_UNLOCK(sc);
+	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
@@ -535,11 +483,11 @@ vtnet_resume(device_t dev)
 	sc = device_get_softc(dev);
 	ifp = sc->vtnet_ifp;
 
-	VTNET_LOCK(sc);
+	VTNET_CORE_LOCK(sc);
 	if (ifp->if_flags & IFF_UP)
 		vtnet_init_locked(sc);
 	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
-	VTNET_UNLOCK(sc);
+	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
@@ -556,15 +504,26 @@ vtnet_shutdown(device_t dev)
 }
 
 static int
+vtnet_attach_completed(device_t dev)
+{
+
+	vtnet_attach_disable_promisc(device_get_softc(dev));
+
+	return (0);
+}
+
+static int
 vtnet_config_change(device_t dev)
 {
 	struct vtnet_softc *sc;
 
 	sc = device_get_softc(dev);
 
-	VTNET_LOCK(sc);
+	VTNET_CORE_LOCK(sc);
 	vtnet_update_link_status(sc);
-	VTNET_UNLOCK(sc);
+	if (sc->vtnet_link_active != 0)
+		vtnet_tx_start_all(sc);
+	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
@@ -578,188 +537,491 @@ vtnet_negotiate_features(struct vtnet_softc *sc)
 	dev = sc->vtnet_dev;
 	mask = 0;
 
-	if (vtnet_csum_disable)
+	/*
+	 * TSO and LRO are only available when their corresponding checksum
+	 * offload feature is also negotiated.
+	 */
+	if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) {
 		mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
+		mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES;
+	}
+	if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
+		mask |= VTNET_TSO_FEATURES;
+	if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
+		mask |= VTNET_LRO_FEATURES;
+	if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
+		mask |= VIRTIO_NET_F_MQ;
+#ifdef VTNET_LEGACY_TX
+	mask |= VIRTIO_NET_F_MQ;
+#endif
+
+	features = VTNET_FEATURES & ~mask;
+	sc->vtnet_features = virtio_negotiate_features(dev, features);
+
+	if (virtio_with_feature(dev, VTNET_LRO_FEATURES) == 0)
+		return;
+	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF))
+		return;
 
 	/*
-	 * TSO and LRO are only available when their corresponding
-	 * checksum offload feature is also negotiated.
+	 * LRO without mergeable buffers requires special care. This is not
+	 * ideal because every receive buffer must be large enough to hold
+	 * the maximum TCP packet, the Ethernet header, and the header. This
+	 * requires up to 34 descriptors with MCLBYTES clusters. If we do
+	 * not have indirect descriptors, LRO is disabled since the virtqueue
+	 * will not contain very many receive buffers.
 	 */
+	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0) {
+		device_printf(dev,
+		    "LRO disabled due to both mergeable buffers and indirect "
+		    "descriptors not negotiated\n");
 
-	if (vtnet_csum_disable || vtnet_tso_disable)
-		mask |= VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 |
-		    VIRTIO_NET_F_HOST_ECN;
+		features &= ~VTNET_LRO_FEATURES;
+		sc->vtnet_features = virtio_negotiate_features(dev, features);
+	} else
+		sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
+}
 
-	if (vtnet_csum_disable || vtnet_lro_disable)
-		mask |= VTNET_LRO_FEATURES;
+static void
+vtnet_setup_features(struct vtnet_softc *sc)
+{
+	device_t dev;
+	int max_pairs, max;
 
-	features = VTNET_FEATURES & ~mask;
-#ifdef VTNET_TX_INTR_MODERATION
-	features |= VIRTIO_F_NOTIFY_ON_EMPTY;
-#endif
-	sc->vtnet_features = virtio_negotiate_features(dev, features);
+	dev = sc->vtnet_dev;
+
+	vtnet_negotiate_features(sc);
+
+	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
+		/* This feature should always be negotiated. */
+		sc->vtnet_flags |= VTNET_FLAG_MAC;
+	}
+
+	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
+		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
+		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+	} else
+		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
+
+	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
+		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
 
-	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0 &&
-	    virtio_with_feature(dev, VTNET_LRO_FEATURES)) {
+		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
+			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
+		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
+			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
+		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
+			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
+	}
+
+	if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) &&
+	    sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
+		max_pairs = virtio_read_dev_config_2(dev,
+		    offsetof(struct virtio_net_config, max_virtqueue_pairs));
+		if (max_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
+		    max_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
+			max_pairs = 1;
+	} else
+		max_pairs = 1;
+
+	if (max_pairs > 1) {
 		/*
-		 * LRO without mergeable buffers requires special care. This
-		 * is not ideal because every receive buffer must be large
-		 * enough to hold the maximum TCP packet, the Ethernet header,
-		 * and the vtnet_rx_header. This requires up to 34 descriptors
-		 * when using MCLBYTES clusters. If we do not have indirect
-		 * descriptors, LRO is disabled since the virtqueue will not
-		 * be able to contain very many receive buffers.
+		 * Limit the maximum number of queue pairs to the number of
+		 * CPUs or the configured maximum. The actual number of
+		 * queues that get used may be less.
 		 */
-		if (virtio_with_feature(dev,
-		    VIRTIO_RING_F_INDIRECT_DESC) == 0) {
-			device_printf(dev,
-			    "LRO disabled due to lack of both mergeable "
-			    "buffers and indirect descriptors\n");
-
-			sc->vtnet_features = virtio_negotiate_features(dev,
-			    features & ~VTNET_LRO_FEATURES);
-		} else
-			sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
+		max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
+		if (max > 0 && max_pairs > max)
+			max_pairs = max;
+		if (max_pairs > mp_ncpus)
+			max_pairs = mp_ncpus;
+		if (max_pairs > VTNET_MAX_QUEUE_PAIRS)
+			max_pairs = VTNET_MAX_QUEUE_PAIRS;
+		if (max_pairs > 1)
+			sc->vtnet_flags |= VTNET_FLAG_MULTIQ;
 	}
+
+	sc->vtnet_max_vq_pairs = max_pairs;
 }
 
 static int
-vtnet_alloc_virtqueues(struct vtnet_softc *sc)
+vtnet_init_rxq(struct vtnet_softc *sc, int id)
 {
-	device_t dev;
-	struct vq_alloc_info vq_info[3];
-	int nvqs, rxsegs;
+	struct vtnet_rxq *rxq;
 
-	dev = sc->vtnet_dev;
-	nvqs = 2;
+	rxq = &sc->vtnet_rxqs[id];
 
-	/*
-	 * Indirect descriptors are not needed for the Rx
-	 * virtqueue when mergeable buffers are negotiated.
-	 * The header is placed inline with the data, not
-	 * in a separate descriptor, and mbuf clusters are
-	 * always physically contiguous.
-	 */
-	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
-		rxsegs = sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG ?
-		    VTNET_MAX_RX_SEGS : VTNET_MIN_RX_SEGS;
-	} else
-		rxsegs = 0;
+	snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
+	    device_get_nameunit(sc->vtnet_dev), id);
+	mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
 
-	VQ_ALLOC_INFO_INIT(&vq_info[0], rxsegs,
-	    vtnet_rx_vq_intr, sc, &sc->vtnet_rx_vq,
-	    "%s receive", device_get_nameunit(dev));
+	rxq->vtnrx_sc = sc;
+	rxq->vtnrx_id = id;
 
-	VQ_ALLOC_INFO_INIT(&vq_info[1], VTNET_MAX_TX_SEGS,
-	    vtnet_tx_vq_intr, sc, &sc->vtnet_tx_vq,
-	    "%s transmit", device_get_nameunit(dev));
+	TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
+	rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
+	    taskqueue_thread_enqueue, &rxq->vtnrx_tq);
 
-	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
-		nvqs++;
+	return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
+}
+
+static int
+vtnet_init_txq(struct vtnet_softc *sc, int id)
+{
+	struct vtnet_txq *txq;
+
+	txq = &sc->vtnet_txqs[id];
+
+	snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
+	    device_get_nameunit(sc->vtnet_dev), id);
+	mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
+
+	txq->vtntx_sc = sc;
+	txq->vtntx_id = id;
+
+#ifndef VTNET_LEGACY_TX
+	txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
+	    M_NOWAIT, &txq->vtntx_mtx);
+	if (txq->vtntx_br == NULL)
+		return (ENOMEM);
+
+	TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
+#endif
+	TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
+	txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
+	    taskqueue_thread_enqueue, &txq->vtntx_tq);
+	if (txq->vtntx_tq == NULL)
+		return (ENOMEM);
+
+	return (0);
+}
 
-		VQ_ALLOC_INFO_INIT(&vq_info[2], 0, NULL, NULL,
-		    &sc->vtnet_ctrl_vq, "%s control",
-		    device_get_nameunit(dev));
+static int
+vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
+{
+	int i, npairs, error;
+
+	npairs = sc->vtnet_max_vq_pairs;
+
+	sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
+	    M_NOWAIT | M_ZERO);
+	sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
+	    M_NOWAIT | M_ZERO);
+	if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
+		return (ENOMEM);
+
+	for (i = 0; i < npairs; i++) {
+		error = vtnet_init_rxq(sc, i);
+		if (error)
+			return (error);
+		error = vtnet_init_txq(sc, i);
+		if (error)
+			return (error);
 	}
 
-	return (virtio_alloc_virtqueues(dev, 0, nvqs, vq_info));
+	vtnet_setup_queue_sysctl(sc);
+
+	return (0);
 }
 
 static void
-vtnet_get_hwaddr(struct vtnet_softc *sc)
+vtnet_destroy_rxq(struct vtnet_rxq *rxq)
 {
-	device_t dev;
 
-	dev = sc->vtnet_dev;
+	rxq->vtnrx_sc = NULL;
+	rxq->vtnrx_id = -1;
 
-	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
-		virtio_read_device_config(dev,
-		    offsetof(struct virtio_net_config, mac),
-		    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
-	} else {
-		/* Generate random locally administered unicast address. */
-		sc->vtnet_hwaddr[0] = 0xB2;
-		arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
+	if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
+		mtx_destroy(&rxq->vtnrx_mtx);
+}
 
-		vtnet_set_hwaddr(sc);
+static void
+vtnet_destroy_txq(struct vtnet_txq *txq)
+{
+
+	txq->vtntx_sc = NULL;
+	txq->vtntx_id = -1;
+
+#ifndef VTNET_LEGACY_TX
+	if (txq->vtntx_br != NULL) {
+		buf_ring_free(txq->vtntx_br, M_DEVBUF);
+		txq->vtntx_br = NULL;
 	}
+#endif
+
+	if (mtx_initialized(&txq->vtntx_mtx) != 0)
+		mtx_destroy(&txq->vtntx_mtx);
 }
 
 static void
-vtnet_set_hwaddr(struct vtnet_softc *sc)
+vtnet_free_rxtx_queues(struct vtnet_softc *sc)
 {
-	device_t dev;
+	int i;
 
-	dev = sc->vtnet_dev;
+	if (sc->vtnet_rxqs != NULL) {
+		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
+			vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
+		free(sc->vtnet_rxqs, M_DEVBUF);
+		sc->vtnet_rxqs = NULL;
+	}
 
-	virtio_write_device_config(dev,
-	    offsetof(struct virtio_net_config, mac),
-	    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
+	if (sc->vtnet_txqs != NULL) {
+		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
+			vtnet_destroy_txq(&sc->vtnet_txqs[i]);
+		free(sc->vtnet_txqs, M_DEVBUF);
+		sc->vtnet_txqs = NULL;
+	}
 }
 
 static int
-vtnet_is_link_up(struct vtnet_softc *sc)
+vtnet_alloc_rx_filters(struct vtnet_softc *sc)
+{
+
+	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
+		sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
+		    M_DEVBUF, M_NOWAIT | M_ZERO);
+		if (sc->vtnet_mac_filter == NULL)
+			return (ENOMEM);
+	}
+
+	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
+		sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
+		    VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
+		if (sc->vtnet_vlan_filter == NULL)
+			return (ENOMEM);
+	}
+
+	return (0);
+}
+
+static void
+vtnet_free_rx_filters(struct vtnet_softc *sc)
+{
+
+	if (sc->vtnet_mac_filter != NULL) {
+		free(sc->vtnet_mac_filter, M_DEVBUF);
+		sc->vtnet_mac_filter = NULL;
+	}
+
+	if (sc->vtnet_vlan_filter != NULL) {
+		free(sc->vtnet_vlan_filter, M_DEVBUF);
+		sc->vtnet_vlan_filter = NULL;
+	}
+}
+
+static int
+vtnet_alloc_virtqueues(struct vtnet_softc *sc)
 {
 	device_t dev;
-	struct ifnet *ifp;
-	uint16_t status;
+	struct vq_alloc_info *info;
+	struct vtnet_rxq *rxq;
+	struct vtnet_txq *txq;
+	int i, idx, flags, nvqs, rxsegs, error;
 
 	dev = sc->vtnet_dev;
-	ifp = sc->vtnet_ifp;
+	flags = 0;
+
+	/*
+	 * Indirect descriptors are not needed for the Rx virtqueue when
+	 * mergeable buffers are negotiated. The header is placed inline
+	 * with the data, not in a separate descriptor, and mbuf clusters
+	 * are always physically contiguous.
+	 */
+	if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
+		rxsegs = 0;
+	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
+		rxsegs = VTNET_MAX_RX_SEGS;
+	else
+		rxsegs = VTNET_MIN_RX_SEGS;
 
-	VTNET_LOCK_ASSERT(sc);
+	nvqs = sc->vtnet_max_vq_pairs * 2;
+	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
+		nvqs++;
 
-	if ((ifp->if_capenable & IFCAP_LINKSTATE) == 0)
-		return (1);
+	info = malloc(sizeof(struct vq_alloc_info) * nvqs , M_TEMP, M_NOWAIT);
+	if (info == NULL)
+		return (ENOMEM);
 
-	status = virtio_read_dev_config_2(dev,
-	    offsetof(struct virtio_net_config, status));
+	for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) {
+		rxq = &sc->vtnet_rxqs[i];
+		VQ_ALLOC_INFO_INIT(&info[idx], rxsegs,
+		    vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
+		    "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id);
 
-	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
+		txq = &sc->vtnet_txqs[i];
+		VQ_ALLOC_INFO_INIT(&info[idx+1], VTNET_MAX_TX_SEGS,
+		    vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
+		    "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id);
+	}
+
+	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
+		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
+		    &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
+	}
+
+	/*
+	 * Enable interrupt binding if this is multiqueue. This only matters
+	 * when per-vq MSIX is available.
+	 */
+	if (sc->vtnet_flags & VTNET_FLAG_MULTIQ)
+		flags |= 0;
+
+	error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
+	free(info, M_TEMP);
+
+	return (error);
 }
 
-static void
-vtnet_update_link_status(struct vtnet_softc *sc)
+static int
+vtnet_setup_interface(struct vtnet_softc *sc)
 {
+	device_t dev;
 	struct ifnet *ifp;
-	int link;
+	int limit;
 
-	ifp = sc->vtnet_ifp;
+	dev = sc->vtnet_dev;
 
-	link = vtnet_is_link_up(sc);
+	ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
+	if (ifp == NULL) {
+		device_printf(dev, "cannot allocate ifnet structure\n");
+		return (ENOSPC);
+	}
 
-	if (link && ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0)) {
-		sc->vtnet_flags |= VTNET_FLAG_LINK;
-		if_link_state_change(ifp, LINK_STATE_UP);
-		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
-			vtnet_start_locked(ifp);
-	} else if (!link && (sc->vtnet_flags & VTNET_FLAG_LINK)) {
-		sc->vtnet_flags &= ~VTNET_FLAG_LINK;
-		if_link_state_change(ifp, LINK_STATE_DOWN);
+	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
+	if_initbaudrate(ifp, IF_Gbps(10));	/* Approx. */
+	ifp->if_softc = sc;
+	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+	ifp->if_init = vtnet_init;
+	ifp->if_ioctl = vtnet_ioctl;
+
+#ifndef VTNET_LEGACY_TX
+	ifp->if_transmit = vtnet_txq_mq_start;
+	ifp->if_qflush = vtnet_qflush;
+#else
+	struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
+	ifp->if_start = vtnet_start;
+	IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1);
+	ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1;
+	IFQ_SET_READY(&ifp->if_snd);
+#endif
+
+	ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
+	    vtnet_ifmedia_sts);
+	ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
+	ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
+
+	/* Read (or generate) the MAC address for the adapter. */
+	vtnet_get_hwaddr(sc);
+
+	ether_ifattach(ifp, sc->vtnet_hwaddr);
+
+	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
+		ifp->if_capabilities |= IFCAP_LINKSTATE;
+
+	/* Tell the upper layer(s) we support long frames. */
+	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
+	ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
+
+	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
+		ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6;
+
+		if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) {
+			ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
+			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
+		} else {
+			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
+				ifp->if_capabilities |= IFCAP_TSO4;
+			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
+				ifp->if_capabilities |= IFCAP_TSO6;
+			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
+				sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
+		}
+
+		if (ifp->if_capabilities & IFCAP_TSO)
+			ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
+	}
+
+	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM))
+		ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
+
+	if (ifp->if_capabilities & IFCAP_HWCSUM) {
+		/*
+		 * VirtIO does not support VLAN tagging, but we can fake
+		 * it by inserting and removing the 802.1Q header during
+		 * transmit and receive. We are then able to do checksum
+		 * offloading of VLAN frames.
+		 */
+		ifp->if_capabilities |=
+		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
+	}
+
+	ifp->if_capenable = ifp->if_capabilities;
+
+	/*
+	 * Capabilities after here are not enabled by default.
+	 */
+
+	if (ifp->if_capabilities & IFCAP_RXCSUM) {
+		if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
+		    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
+			ifp->if_capabilities |= IFCAP_LRO;
+	}
+
+	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
+		ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
+
+		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
+		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
+		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
+		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
 	}
+
+	limit = vtnet_tunable_int(sc, "rx_process_limit",
+	    vtnet_rx_process_limit);
+	if (limit < 0)
+		limit = INT_MAX;
+	sc->vtnet_rx_process_limit = limit;
+
+	return (0);
 }
 
-static void
-vtnet_watchdog(struct vtnet_softc *sc)
+static int
+vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
 {
 	struct ifnet *ifp;
+	int frame_size, clsize;
 
 	ifp = sc->vtnet_ifp;
 
-#ifdef VTNET_TX_INTR_MODERATION
-	vtnet_txeof(sc);
-#endif
+	if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU)
+		return (EINVAL);
 
-	if (sc->vtnet_watchdog_timer == 0 || --sc->vtnet_watchdog_timer)
-		return;
+	frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) +
+	    new_mtu;
 
-	if_printf(ifp, "watchdog timeout -- resetting\n");
-#ifdef VTNET_DEBUG
-	virtqueue_dump(sc->vtnet_tx_vq);
-#endif
-	ifp->if_oerrors++;
-	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-	vtnet_init_locked(sc);
+	/*
+	 * Based on the new MTU (and hence frame size) determine which
+	 * cluster size is most appropriate for the receive queues.
+	 */
+	if (frame_size <= MCLBYTES) {
+		clsize = MCLBYTES;
+	} else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
+		/* Avoid going past 9K jumbos. */
+		if (frame_size > MJUM9BYTES)
+			return (EINVAL);
+		clsize = MJUM9BYTES;
+	} else
+		clsize = MJUMPAGESIZE;
+
+	ifp->if_mtu = new_mtu;
+	sc->vtnet_rx_new_clsize = clsize;
+
+	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+		vtnet_init_locked(sc);
+	}
+
+	return (0);
 }
 
 static int
@@ -771,22 +1033,19 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 
 	sc = ifp->if_softc;
 	ifr = (struct ifreq *) data;
-	reinit = 0;
 	error = 0;
 
 	switch (cmd) {
 	case SIOCSIFMTU:
-		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VTNET_MAX_MTU)
-			error = EINVAL;
-		else if (ifp->if_mtu != ifr->ifr_mtu) {
-			VTNET_LOCK(sc);
+		if (ifp->if_mtu != ifr->ifr_mtu) {
+			VTNET_CORE_LOCK(sc);
 			error = vtnet_change_mtu(sc, ifr->ifr_mtu);
-			VTNET_UNLOCK(sc);
+			VTNET_CORE_UNLOCK(sc);
 		}
 		break;
 
 	case SIOCSIFFLAGS:
-		VTNET_LOCK(sc);
+		VTNET_CORE_LOCK(sc);
 		if ((ifp->if_flags & IFF_UP) == 0) {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				vtnet_stop(sc);
@@ -803,16 +1062,17 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 
 		if (error == 0)
 			sc->vtnet_if_flags = ifp->if_flags;
-		VTNET_UNLOCK(sc);
+		VTNET_CORE_UNLOCK(sc);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
-		VTNET_LOCK(sc);
-		if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) &&
-		    (ifp->if_drv_flags & IFF_DRV_RUNNING))
+		if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
+			break;
+		VTNET_CORE_LOCK(sc);
+		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 			vtnet_rx_filter_mac(sc);
-		VTNET_UNLOCK(sc);
+		VTNET_CORE_UNLOCK(sc);
 		break;
 
 	case SIOCSIFMEDIA:
@@ -821,68 +1081,36 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		break;
 
 	case SIOCSIFCAP:
+		VTNET_CORE_LOCK(sc);
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 
-#ifdef DEVICE_POLLING
-		if (mask & IFCAP_POLLING) {
-			if (ifr->ifr_reqcap & IFCAP_POLLING) {
-				error = ether_poll_register(vtnet_poll, ifp);
-				if (error)
-					break;
-
-				VTNET_LOCK(sc);
-				vtnet_disable_rx_intr(sc);
-				vtnet_disable_tx_intr(sc);
-				ifp->if_capenable |= IFCAP_POLLING;
-				VTNET_UNLOCK(sc);
-			} else {
-				error = ether_poll_deregister(ifp);
-
-				/* Enable interrupts even in error case. */
-				VTNET_LOCK(sc);
-				vtnet_enable_tx_intr(sc);
-				vtnet_enable_rx_intr(sc);
-				ifp->if_capenable &= ~IFCAP_POLLING;
-				VTNET_UNLOCK(sc);
-			}
-		}
-#endif
-		VTNET_LOCK(sc);
-
-		if (mask & IFCAP_TXCSUM) {
+		if (mask & IFCAP_TXCSUM)
 			ifp->if_capenable ^= IFCAP_TXCSUM;
-			if (ifp->if_capenable & IFCAP_TXCSUM)
-				ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
-			else
-				ifp->if_hwassist &= ~VTNET_CSUM_OFFLOAD;
-		}
-
-		if (mask & IFCAP_TSO4) {
+		if (mask & IFCAP_TXCSUM_IPV6)
+			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
+		if (mask & IFCAP_TSO4)
 			ifp->if_capenable ^= IFCAP_TSO4;
-			if (ifp->if_capenable & IFCAP_TSO4)
-				ifp->if_hwassist |= CSUM_TSO;
-			else
-				ifp->if_hwassist &= ~CSUM_TSO;
-		}
-
-		if (mask & IFCAP_RXCSUM) {
-			ifp->if_capenable ^= IFCAP_RXCSUM;
-			reinit = 1;
-		}
+		if (mask & IFCAP_TSO6)
+			ifp->if_capenable ^= IFCAP_TSO6;
 
-		if (mask & IFCAP_LRO) {
-			ifp->if_capenable ^= IFCAP_LRO;
+		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
+		    IFCAP_VLAN_HWFILTER)) {
+			/* These Rx features require us to renegotiate. */
 			reinit = 1;
-		}
 
-		if (mask & IFCAP_VLAN_HWFILTER) {
-			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
-			reinit = 1;
-		}
+			if (mask & IFCAP_RXCSUM)
+				ifp->if_capenable ^= IFCAP_RXCSUM;
+			if (mask & IFCAP_RXCSUM_IPV6)
+				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
+			if (mask & IFCAP_LRO)
+				ifp->if_capenable ^= IFCAP_LRO;
+			if (mask & IFCAP_VLAN_HWFILTER)
+				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
+		} else
+			reinit = 0;
 
 		if (mask & IFCAP_VLAN_HWTSO)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
-
 		if (mask & IFCAP_VLAN_HWTAGGING)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 
@@ -890,9 +1118,10 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			vtnet_init_locked(sc);
 		}
+
+		VTNET_CORE_UNLOCK(sc);
 		VLAN_CAPABILITIES(ifp);
 
-		VTNET_UNLOCK(sc);
 		break;
 
 	default:
@@ -900,80 +1129,32 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		break;
 	}
 
-	VTNET_LOCK_ASSERT_NOTOWNED(sc);
+	VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
 
 	return (error);
 }
 
 static int
-vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
-{
-	struct ifnet *ifp;
-	int new_frame_size, clsize;
-
-	ifp = sc->vtnet_ifp;
-
-	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
-		new_frame_size = sizeof(struct vtnet_rx_header) +
-		    sizeof(struct ether_vlan_header) + new_mtu;
-
-		if (new_frame_size > MJUM9BYTES)
-			return (EINVAL);
-
-		if (new_frame_size <= MCLBYTES)
-			clsize = MCLBYTES;
-		else
-			clsize = MJUM9BYTES;
-	} else {
-		new_frame_size = sizeof(struct virtio_net_hdr_mrg_rxbuf) +
-		    sizeof(struct ether_vlan_header) + new_mtu;
-
-		if (new_frame_size <= MCLBYTES)
-			clsize = MCLBYTES;
-		else
-			clsize = MJUMPAGESIZE;
-	}
-
-	sc->vtnet_rx_mbuf_size = clsize;
-	sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc);
-	KASSERT(sc->vtnet_rx_mbuf_count < VTNET_MAX_RX_SEGS,
-	    ("too many rx mbufs: %d", sc->vtnet_rx_mbuf_count));
-
-	ifp->if_mtu = new_mtu;
-
-	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
-		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-		vtnet_init_locked(sc);
-	}
-
-	return (0);
-}
-
-static int
-vtnet_init_rx_vq(struct vtnet_softc *sc)
+vtnet_rxq_populate(struct vtnet_rxq *rxq)
 {
 	struct virtqueue *vq;
 	int nbufs, error;
 
-	vq = sc->vtnet_rx_vq;
-	nbufs = 0;
+	vq = rxq->vtnrx_vq;
 	error = ENOSPC;
 
-	while (!virtqueue_full(vq)) {
-		if ((error = vtnet_newbuf(sc)) != 0)
+	for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
+		error = vtnet_rxq_new_buf(rxq);
+		if (error)
 			break;
-		nbufs++;
 	}
 
 	if (nbufs > 0) {
 		virtqueue_notify(vq);
-
 		/*
 		 * EMSGSIZE signifies the virtqueue did not have enough
 		 * entries available to hold the last mbuf. This is not
-		 * an error. We should not get ENOSPC since we check if
-		 * the virtqueue is full before attempting to add a
-		 * buffer.
+		 * an error.
 		 */
 		if (error == EMSGSIZE)
 			error = 0;
@@ -983,86 +1164,32 @@ vtnet_init_rx_vq(struct vtnet_softc *sc)
 }
 
 static void
-vtnet_free_rx_mbufs(struct vtnet_softc *sc)
+vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
 {
 	struct virtqueue *vq;
 	struct mbuf *m;
 	int last;
 
-	vq = sc->vtnet_rx_vq;
+	vq = rxq->vtnrx_vq;
 	last = 0;
 
 	while ((m = virtqueue_drain(vq, &last)) != NULL)
 		m_freem(m);
 
-	KASSERT(virtqueue_empty(vq), ("mbufs remaining in Rx Vq"));
-}
-
-static void
-vtnet_free_tx_mbufs(struct vtnet_softc *sc)
-{
-	struct virtqueue *vq;
-	struct vtnet_tx_header *txhdr;
-	int last;
-
-	vq = sc->vtnet_tx_vq;
-	last = 0;
-
-	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
-		m_freem(txhdr->vth_mbuf);
-		uma_zfree(vtnet_tx_header_zone, txhdr);
-	}
-
-	KASSERT(virtqueue_empty(vq), ("mbufs remaining in Tx Vq"));
-}
-
-static void
-vtnet_free_ctrl_vq(struct vtnet_softc *sc)
-{
-
-	/*
-	 * The control virtqueue is only polled, therefore
-	 * it should already be empty.
-	 */
-	KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq),
-	    ("Ctrl Vq not empty"));
-}
-
-#ifdef DEVICE_POLLING
-static int
-vtnet_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
-{
-	struct vtnet_softc *sc;
-	int rx_done;
-
-	sc = ifp->if_softc;
-	rx_done = 0;
-
-	VTNET_LOCK(sc);
-	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
-		if (cmd == POLL_AND_CHECK_STATUS)
-			vtnet_update_link_status(sc);
-
-		if (virtqueue_nused(sc->vtnet_rx_vq) > 0)
-			vtnet_rxeof(sc, count, &rx_done);
-
-		vtnet_txeof(sc);
-		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
-			vtnet_start_locked(ifp);
-	}
-	VTNET_UNLOCK(sc);
-
-	return (rx_done);
+	KASSERT(virtqueue_empty(vq),
+	    ("%s: mbufs remaining in rx queue %p", __func__, rxq));
 }
-#endif /* DEVICE_POLLING */
 
 static struct mbuf *
-vtnet_alloc_rxbuf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
+vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
 {
 	struct mbuf *m_head, *m_tail, *m;
 	int i, clsize;
 
-	clsize = sc->vtnet_rx_mbuf_size;
+	clsize = sc->vtnet_rx_clsize;
+
+	KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
+	    ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs));
 
 	m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize);
 	if (m_head == NULL)
@@ -1071,19 +1198,15 @@ vtnet_alloc_rxbuf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
 	m_head->m_len = clsize;
 	m_tail = m_head;
 
-	if (nbufs > 1) {
-		KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
-		    ("chained Rx mbuf requested without LRO_NOMRG"));
-
-		for (i = 1; i < nbufs; i++) {
-			m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
-			if (m == NULL)
-				goto fail;
+	/* Allocate the rest of the chain. */
+	for (i = 1; i < nbufs; i++) {
+		m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
+		if (m == NULL)
+			goto fail;
 
-			m->m_len = clsize;
-			m_tail->m_next = m;
-			m_tail = m;
-		}
+		m->m_len = clsize;
+		m_tail->m_next = m;
+		m_tail = m;
 	}
 
 	if (m_tailp != NULL)
@@ -1098,43 +1221,48 @@ fail:
 	return (NULL);
 }
 
+/*
+ * Slow path for when LRO without mergeable buffers is negotiated.
+ */
 static int
-vtnet_replace_rxbuf(struct vtnet_softc *sc, struct mbuf *m0, int len0)
+vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
+    int len0)
 {
+	struct vtnet_softc *sc;
 	struct mbuf *m, *m_prev;
 	struct mbuf *m_new, *m_tail;
 	int len, clsize, nreplace, error;
 
-	m = m0;
-	m_prev = NULL;
-	len = len0;
+	sc = rxq->vtnrx_sc;
+	clsize = sc->vtnet_rx_clsize;
 
+	m_prev = NULL;
 	m_tail = NULL;
-	clsize = sc->vtnet_rx_mbuf_size;
 	nreplace = 0;
 
-	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG ||
-	    m->m_next == NULL, ("chained Rx mbuf without LRO_NOMRG"));
+	m = m0;
+	len = len0;
 
 	/*
-	 * Since LRO_NOMRG mbuf chains are so large, we want to avoid
-	 * allocating an entire chain for each received frame. When
-	 * the received frame's length is less than that of the chain,
-	 * the unused mbufs are reassigned to the new chain.
+	 * Since these mbuf chains are so large, we avoid allocating an
+	 * entire replacement chain if possible. When the received frame
+	 * did not consume the entire chain, the unused mbufs are moved
+	 * to the replacement chain.
 	 */
 	while (len > 0) {
 		/*
-		 * Something is seriously wrong if we received
-		 * a frame larger than the mbuf chain. Drop it.
+		 * Something is seriously wrong if we received a frame
+		 * larger than the chain. Drop it.
 		 */
 		if (m == NULL) {
 			sc->vtnet_stats.rx_frame_too_large++;
 			return (EMSGSIZE);
 		}
 
+		/* We always allocate the same cluster size. */
 		KASSERT(m->m_len == clsize,
-		    ("mbuf length not expected cluster size: %d",
-		    m->m_len));
+		    ("%s: mbuf size %d is not the cluster size %d",
+		    __func__, m->m_len, clsize));
 
 		m->m_len = MIN(m->m_len, len);
 		len -= m->m_len;
@@ -1144,27 +1272,26 @@ vtnet_replace_rxbuf(struct vtnet_softc *sc, struct mbuf *m0, int len0)
 		nreplace++;
 	}
 
-	KASSERT(m_prev != NULL, ("m_prev == NULL"));
-	KASSERT(nreplace <= sc->vtnet_rx_mbuf_count,
-	    ("too many replacement mbufs: %d/%d", nreplace,
-	    sc->vtnet_rx_mbuf_count));
+	KASSERT(nreplace <= sc->vtnet_rx_nmbufs,
+	    ("%s: too many replacement mbufs %d max %d", __func__, nreplace,
+	    sc->vtnet_rx_nmbufs));
 
-	m_new = vtnet_alloc_rxbuf(sc, nreplace, &m_tail);
+	m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
 	if (m_new == NULL) {
 		m_prev->m_len = clsize;
 		return (ENOBUFS);
 	}
 
 	/*
-	 * Move unused mbufs, if any, from the original chain
-	 * onto the end of the new chain.
+	 * Move any unused mbufs from the received chain onto the end
+	 * of the new chain.
 	 */
 	if (m_prev->m_next != NULL) {
 		m_tail->m_next = m_prev->m_next;
 		m_prev->m_next = NULL;
 	}
 
-	error = vtnet_enqueue_rxbuf(sc, m_new);
+	error = vtnet_rxq_enqueue_buf(rxq, m_new);
 	if (error) {
 		/*
 		 * BAD! We could not enqueue the replacement mbuf chain. We
@@ -1189,343 +1316,321 @@ vtnet_replace_rxbuf(struct vtnet_softc *sc, struct mbuf *m0, int len0)
 }
 
 static int
-vtnet_newbuf(struct vtnet_softc *sc)
+vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
 {
-	struct mbuf *m;
+	struct vtnet_softc *sc;
+	struct mbuf *m_new;
 	int error;
 
-	m = vtnet_alloc_rxbuf(sc, sc->vtnet_rx_mbuf_count, NULL);
-	if (m == NULL)
-		return (ENOBUFS);
+	sc = rxq->vtnrx_sc;
 
-	error = vtnet_enqueue_rxbuf(sc, m);
-	if (error)
-		m_freem(m);
+	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
+	    ("%s: chained mbuf without LRO_NOMRG", __func__));
 
-	return (error);
-}
-
-static void
-vtnet_discard_merged_rxbuf(struct vtnet_softc *sc, int nbufs)
-{
-	struct virtqueue *vq;
-	struct mbuf *m;
-
-	vq = sc->vtnet_rx_vq;
+	if (m->m_next == NULL) {
+		/* Fast-path for the common case of just one mbuf. */
+		if (m->m_len < len)
+			return (EINVAL);
 
-	while (--nbufs > 0) {
-		if ((m = virtqueue_dequeue(vq, NULL)) == NULL)
-			break;
-		vtnet_discard_rxbuf(sc, m);
-	}
-}
+		m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
+		if (m_new == NULL)
+			return (ENOBUFS);
 
-static void
-vtnet_discard_rxbuf(struct vtnet_softc *sc, struct mbuf *m)
-{
-	int error;
+		error = vtnet_rxq_enqueue_buf(rxq, m_new);
+		if (error) {
+			/*
+			 * The new mbuf is suppose to be an identical
+			 * copy of the one just dequeued so this is an
+			 * unexpected error.
+			 */
+			m_freem(m_new);
+			sc->vtnet_stats.rx_enq_replacement_failed++;
+		} else
+			m->m_len = len;
+	} else
+		error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len);
 
-	/*
-	 * Requeue the discarded mbuf. This should always be
-	 * successful since it was just dequeued.
-	 */
-	error = vtnet_enqueue_rxbuf(sc, m);
-	KASSERT(error == 0, ("cannot requeue discarded mbuf"));
+	return (error);
 }
 
 static int
-vtnet_enqueue_rxbuf(struct vtnet_softc *sc, struct mbuf *m)
+vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
 {
 	struct sglist sg;
 	struct sglist_seg segs[VTNET_MAX_RX_SEGS];
+	struct vtnet_softc *sc;
 	struct vtnet_rx_header *rxhdr;
-	struct virtio_net_hdr *hdr;
 	uint8_t *mdata;
 	int offset, error;
 
-	VTNET_LOCK_ASSERT(sc);
-	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG ||
-	    m->m_next == NULL, ("chained Rx mbuf without LRO_NOMRG"));
-
-	sglist_init(&sg, VTNET_MAX_RX_SEGS, segs);
-
+	sc = rxq->vtnrx_sc;
 	mdata = mtod(m, uint8_t *);
-	offset = 0;
 
+	VTNET_RXQ_LOCK_ASSERT(rxq);
+	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
+	    ("%s: chained mbuf without LRO_NOMRG", __func__));
+	KASSERT(m->m_len == sc->vtnet_rx_clsize,
+	    ("%s: unexpected cluster size %d/%d", __func__, m->m_len,
+	     sc->vtnet_rx_clsize));
+
+	sglist_init(&sg, VTNET_MAX_RX_SEGS, segs);
 	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
+		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
 		rxhdr = (struct vtnet_rx_header *) mdata;
-		hdr = &rxhdr->vrh_hdr;
-		offset += sizeof(struct vtnet_rx_header);
-
-		error = sglist_append(&sg, hdr, sc->vtnet_hdr_size);
-		KASSERT(error == 0, ("cannot add header to sglist"));
-	}
-
-	error = sglist_append(&sg, mdata + offset, m->m_len - offset);
-	if (error)
-		return (error);
+		sglist_append(&sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
+		offset = sizeof(struct vtnet_rx_header);
+	} else
+		offset = 0;
 
+	sglist_append(&sg, mdata + offset, m->m_len - offset);
 	if (m->m_next != NULL) {
 		error = sglist_append_mbuf(&sg, m->m_next);
-		if (error)
-			return (error);
+		MPASS(error == 0);
 	}
 
-	return (virtqueue_enqueue(sc->vtnet_rx_vq, m, &sg, 0, sg.sg_nseg));
+	error = virtqueue_enqueue(rxq->vtnrx_vq, m, &sg, 0, sg.sg_nseg);
+
+	return (error);
 }
 
-static void
-vtnet_vlan_tag_remove(struct mbuf *m)
+static int
+vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
 {
-	struct ether_vlan_header *evl;
+	struct vtnet_softc *sc;
+	struct mbuf *m;
+	int error;
 
-	evl = mtod(m, struct ether_vlan_header *);
+	sc = rxq->vtnrx_sc;
 
-	m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
-	m->m_flags |= M_VLANTAG;
+	m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
+	if (m == NULL)
+		return (ENOBUFS);
 
-	/* Strip the 802.1Q header. */
-	bcopy((char *) evl, (char *) evl + ETHER_VLAN_ENCAP_LEN,
-	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
-	m_adj(m, ETHER_VLAN_ENCAP_LEN);
+	error = vtnet_rxq_enqueue_buf(rxq, m);
+	if (error)
+		m_freem(m);
+
+	return (error);
 }
 
-#ifdef notyet
+/*
+ * Use the checksum offset in the VirtIO header to set the
+ * correct CSUM_* flags.
+ */
 static int
-vtnet_rx_csum(struct vtnet_softc *sc, struct mbuf *m,
-    struct virtio_net_hdr *hdr)
+vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m,
+    uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
 {
-	struct ether_header *eh;
-	struct ether_vlan_header *evh;
-	struct ip *ip;
-	struct ip6_hdr *ip6;
-	struct udphdr *udp;
-	int ip_offset, csum_start, csum_offset, hlen;
-	uint16_t eth_type;
-	uint8_t ip_proto;
-
-	/*
-	 * Convert the VirtIO checksum interface to FreeBSD's interface.
-	 * The host only provides us with the offset at which to start
-	 * checksumming, and the offset from that to place the completed
-	 * checksum. While this maps well with how Linux does checksums,
-	 * for FreeBSD, we must parse the received packet in order to set
-	 * the appropriate CSUM_* flags.
-	 */
-
-	/*
-	 * Every mbuf added to the receive virtqueue is always at least
-	 * MCLBYTES big, so assume something is amiss if the first mbuf
-	 * does not contain both the Ethernet and protocol headers.
-	 */
-	ip_offset = sizeof(struct ether_header);
-	if (m->m_len < ip_offset)
-		return (1);
+	struct vtnet_softc *sc;
+#if defined(INET) || defined(INET6)
+	int offset = hdr->csum_start + hdr->csum_offset;
+#endif
 
-	eh = mtod(m, struct ether_header *);
-	eth_type = ntohs(eh->ether_type);
-	if (eth_type == ETHERTYPE_VLAN) {
-		ip_offset = sizeof(struct ether_vlan_header);
-		if (m->m_len < ip_offset)
-			return (1);
-		evh = mtod(m, struct ether_vlan_header *);
-		eth_type = ntohs(evh->evl_proto);
-	}
+	sc = rxq->vtnrx_sc;
 
+	/* Only do a basic sanity check on the offset. */
 	switch (eth_type) {
+#if defined(INET)
 	case ETHERTYPE_IP:
-		if (m->m_len < ip_offset + sizeof(struct ip))
-			return (1);
-
-		ip = (struct ip *)(mtod(m, uint8_t *) + ip_offset);
-		 /* Sanity check the IP header. */
-		if (ip->ip_v != IPVERSION)
-			return (1);
-		hlen = ip->ip_hl << 2;
-		if (hlen < sizeof(struct ip))
-			return (1);
-		if (ntohs(ip->ip_len) < hlen)
+		if (__predict_false(offset < ip_start + sizeof(struct ip)))
 			return (1);
-		if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset))
-			return (1);
-
-		ip_proto = ip->ip_p;
-		csum_start = ip_offset + hlen;
 		break;
-
+#endif
+#if defined(INET6)
 	case ETHERTYPE_IPV6:
-		if (m->m_len < ip_offset + sizeof(struct ip6_hdr))
+		if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
 			return (1);
-
-		/*
-		 * XXX FreeBSD does not handle any IPv6 checksum offloading
-		 * at the moment.
-		 */
-
-		ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + ip_offset);
-		/* XXX Assume no extension headers are present. */
-		ip_proto = ip6->ip6_nxt;
-		csum_start = ip_offset + sizeof(struct ip6_hdr);
 		break;
-
+#endif
 	default:
 		sc->vtnet_stats.rx_csum_bad_ethtype++;
 		return (1);
 	}
 
-	/* Assume checksum begins right after the IP header. */
-	if (hdr->csum_start != csum_start) {
-		sc->vtnet_stats.rx_csum_bad_start++;
-		return (1);
-	}
-
-	switch (ip_proto) {
-	case IPPROTO_TCP:
-		csum_offset = offsetof(struct tcphdr, th_sum);
-		break;
-
-	case IPPROTO_UDP:
-		csum_offset = offsetof(struct udphdr, uh_sum);
+	/*
+	 * Use the offset to determine the appropriate CSUM_* flags. This is
+	 * a bit dirty, but we can get by with it since the checksum offsets
+	 * happen to be different. We assume the host host does not do IPv4
+	 * header checksum offloading.
+	 */
+	switch (hdr->csum_offset) {
+	case offsetof(struct udphdr, uh_sum):
+	case offsetof(struct tcphdr, th_sum):
+		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+		m->m_pkthdr.csum_data = 0xFFFF;
 		break;
-
-	case IPPROTO_SCTP:
-		csum_offset = offsetof(struct sctphdr, checksum);
+	case offsetof(struct sctphdr, checksum):
+		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 		break;
-
 	default:
-		sc->vtnet_stats.rx_csum_bad_ipproto++;
-		return (1);
-	}
-
-	if (hdr->csum_offset != csum_offset) {
 		sc->vtnet_stats.rx_csum_bad_offset++;
 		return (1);
 	}
 
-	/*
-	 * The IP header checksum is almost certainly valid but I'm
-	 * uncertain if that is guaranteed.
-	 *
-	 * m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
-	 */
+	return (0);
+}
 
-	switch (ip_proto) {
-	case IPPROTO_UDP:
-		if (m->m_len < csum_start + sizeof(struct udphdr))
-			return (1);
+static int
+vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m,
+    uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
+{
+	struct vtnet_softc *sc;
+	int offset, proto;
 
-		udp = (struct udphdr *)(mtod(m, uint8_t *) + csum_start);
-		if (udp->uh_sum == 0)
-			return (0);
+	sc = rxq->vtnrx_sc;
 
-		/* FALLTHROUGH */
+	switch (eth_type) {
+#if defined(INET)
+	case ETHERTYPE_IP: {
+		struct ip *ip;
+		if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
+			return (1);
+		ip = (struct ip *)(m->m_data + ip_start);
+		proto = ip->ip_p;
+		offset = ip_start + (ip->ip_hl << 2);
+		break;
+	}
+#endif
+#if defined(INET6)
+	case ETHERTYPE_IPV6:
+		if (__predict_false(m->m_len < ip_start +
+		    sizeof(struct ip6_hdr)))
+			return (1);
+		offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
+		if (__predict_false(offset < 0))
+			return (1);
+		break;
+#endif
+	default:
+		sc->vtnet_stats.rx_csum_bad_ethtype++;
+		return (1);
+	}
 
+	switch (proto) {
 	case IPPROTO_TCP:
+		if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
+			return (1);
+		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+		m->m_pkthdr.csum_data = 0xFFFF;
+		break;
+	case IPPROTO_UDP:
+		if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
+			return (1);
 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 		m->m_pkthdr.csum_data = 0xFFFF;
 		break;
-
 	case IPPROTO_SCTP:
+		if (__predict_false(m->m_len < offset + sizeof(struct sctphdr)))
+			return (1);
 		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 		break;
+	default:
+		/*
+		 * For the remaining protocols, FreeBSD does not support
+		 * checksum offloading, so the checksum will be recomputed.
+		 */
+#if 0
+		if_printf(sc->vtnet_ifp, "cksum offload of unsupported "
+		    "protocol eth_type=%#x proto=%d csum_start=%d
+		    "csum_offset=%d\n", __func__, eth_type, proto,
+		    hdr->csum_start, hdr->csum_offset);
+#endif
+		break;
 	}
 
-	sc->vtnet_stats.rx_csum_offloaded++;
-
 	return (0);
 }
-#endif
 
 /*
- * Alternative method of doing receive checksum offloading. Rather
- * than parsing the received frame down to the IP header, use the
- * csum_offset to determine which CSUM_* flags are appropriate. We
- * can get by with doing this only because the checksum offsets are
- * unique for the things we care about.
+ * Set the appropriate CSUM_* flags. Unfortunately, the information
+ * provided is not directly useful to us. The VirtIO header gives the
+ * offset of the checksum, which is all Linux needs, but this is not
+ * how FreeBSD does things. We are forced to peek inside the packet
+ * a bit.
+ *
+ * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
+ * could accept the offsets and let the stack figure it out.
  */
 static int
-vtnet_rx_csum(struct vtnet_softc *sc, struct mbuf *m,
+vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
     struct virtio_net_hdr *hdr)
 {
 	struct ether_header *eh;
 	struct ether_vlan_header *evh;
-	struct udphdr *udp;
-	int csum_len;
 	uint16_t eth_type;
-
-	csum_len = hdr->csum_start + hdr->csum_offset;
-
-	if (csum_len < sizeof(struct ether_header) + sizeof(struct ip))
-		return (1);
-	if (m->m_len < csum_len)
-		return (1);
+	int offset, error;
 
 	eh = mtod(m, struct ether_header *);
 	eth_type = ntohs(eh->ether_type);
 	if (eth_type == ETHERTYPE_VLAN) {
+		/* BMV: We should handle nested VLAN tags too. */
 		evh = mtod(m, struct ether_vlan_header *);
 		eth_type = ntohs(evh->evl_proto);
-	}
-
-	if (eth_type != ETHERTYPE_IP && eth_type != ETHERTYPE_IPV6) {
-		sc->vtnet_stats.rx_csum_bad_ethtype++;
-		return (1);
-	}
-
-	/* Use the offset to determine the appropriate CSUM_* flags. */
-	switch (hdr->csum_offset) {
-	case offsetof(struct udphdr, uh_sum):
-		if (m->m_len < hdr->csum_start + sizeof(struct udphdr))
-			return (1);
-		udp = (struct udphdr *)(mtod(m, uint8_t *) + hdr->csum_start);
-		if (udp->uh_sum == 0)
-			return (0);
+		offset = sizeof(struct ether_vlan_header);
+	} else
+		offset = sizeof(struct ether_header);
 
-		/* FALLTHROUGH */
+	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
+		error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr);
+	else
+		error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr);
 
-	case offsetof(struct tcphdr, th_sum):
-		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
-		m->m_pkthdr.csum_data = 0xFFFF;
-		break;
+	return (error);
+}
 
-	case offsetof(struct sctphdr, checksum):
-		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
-		break;
+static void
+vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
+{
+	struct mbuf *m;
 
-	default:
-		sc->vtnet_stats.rx_csum_bad_offset++;
-		return (1);
+	while (--nbufs > 0) {
+		m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
+		if (m == NULL)
+			break;
+		vtnet_rxq_discard_buf(rxq, m);
 	}
+}
 
-	sc->vtnet_stats.rx_csum_offloaded++;
+static void
+vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
+{
+	int error;
 
-	return (0);
+	/*
+	 * Requeue the discarded mbuf. This should always be successful
+	 * since it was just dequeued.
+	 */
+	error = vtnet_rxq_enqueue_buf(rxq, m);
+	KASSERT(error == 0,
+	    ("%s: cannot requeue discarded mbuf %d", __func__, error));
 }
 
 static int
-vtnet_rxeof_merged(struct vtnet_softc *sc, struct mbuf *m_head, int nbufs)
+vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
 {
+	struct vtnet_softc *sc;
 	struct ifnet *ifp;
 	struct virtqueue *vq;
 	struct mbuf *m, *m_tail;
 	int len;
 
+	sc = rxq->vtnrx_sc;
+	vq = rxq->vtnrx_vq;
 	ifp = sc->vtnet_ifp;
-	vq = sc->vtnet_rx_vq;
 	m_tail = m_head;
 
 	while (--nbufs > 0) {
 		m = virtqueue_dequeue(vq, &len);
 		if (m == NULL) {
-			ifp->if_ierrors++;
+			rxq->vtnrx_stats.vrxs_ierrors++;
 			goto fail;
 		}
 
-		if (vtnet_newbuf(sc) != 0) {
-			ifp->if_iqdrops++;
-			vtnet_discard_rxbuf(sc, m);
+		if (vtnet_rxq_new_buf(rxq) != 0) {
+			rxq->vtnrx_stats.vrxs_iqdrops++;
+			vtnet_rxq_discard_buf(rxq, m);
 			if (nbufs > 1)
-				vtnet_discard_merged_rxbuf(sc, nbufs);
+				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
 			goto fail;
 		}
 
@@ -1549,35 +1654,83 @@ fail:
 	return (1);
 }
 
+static void
+vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
+    struct virtio_net_hdr *hdr)
+{
+	struct vtnet_softc *sc;
+	struct ifnet *ifp;
+	struct ether_header *eh;
+
+	sc = rxq->vtnrx_sc;
+	ifp = sc->vtnet_ifp;
+
+	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
+		eh = mtod(m, struct ether_header *);
+		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
+			vtnet_vlan_tag_remove(m);
+			/*
+			 * With the 802.1Q header removed, update the
+			 * checksum starting location accordingly.
+			 */
+			if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
+				hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
+		}
+	}
+
+	m->m_pkthdr.flowid = rxq->vtnrx_id;
+	m->m_flags |= M_FLOWID;
+
+	/*
+	 * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum
+	 * distinction that Linux does. Need to reevaluate if performing
+	 * offloading for the NEEDS_CSUM case is really appropriate.
+	 */
+	if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
+	    VIRTIO_NET_HDR_F_DATA_VALID)) {
+		if (vtnet_rxq_csum(rxq, m, hdr) == 0)
+			rxq->vtnrx_stats.vrxs_csum++;
+		else
+			rxq->vtnrx_stats.vrxs_csum_failed++;
+	}
+
+	rxq->vtnrx_stats.vrxs_ipackets++;
+	rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
+
+	/* VTNET_RXQ_UNLOCK(rxq); */
+	(*ifp->if_input)(ifp, m);
+	/* VTNET_RXQ_LOCK(rxq); */
+}
+
 static int
-vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp)
+vtnet_rxq_eof(struct vtnet_rxq *rxq)
 {
-	struct virtio_net_hdr lhdr;
+	struct virtio_net_hdr lhdr, *hdr;
+	struct vtnet_softc *sc;
 	struct ifnet *ifp;
 	struct virtqueue *vq;
 	struct mbuf *m;
-	struct ether_header *eh;
-	struct virtio_net_hdr *hdr;
 	struct virtio_net_hdr_mrg_rxbuf *mhdr;
-	int len, deq, nbufs, adjsz, rx_npkts;
+	int len, deq, nbufs, adjsz, count;
 
+	sc = rxq->vtnrx_sc;
+	vq = rxq->vtnrx_vq;
 	ifp = sc->vtnet_ifp;
-	vq = sc->vtnet_rx_vq;
 	hdr = &lhdr;
 	deq = 0;
-	rx_npkts = 0;
+	count = sc->vtnet_rx_process_limit;
 
-	VTNET_LOCK_ASSERT(sc);
+	VTNET_RXQ_LOCK_ASSERT(rxq);
 
-	while (--count >= 0) {
+	while (count-- > 0) {
 		m = virtqueue_dequeue(vq, &len);
 		if (m == NULL)
 			break;
 		deq++;
 
 		if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
-			ifp->if_ierrors++;
-			vtnet_discard_rxbuf(sc, m);
+			rxq->vtnrx_stats.vrxs_ierrors++;
+			vtnet_rxq_discard_buf(rxq, m);
 			continue;
 		}
 
@@ -1585,8 +1738,8 @@ vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp)
 			nbufs = 1;
 			adjsz = sizeof(struct vtnet_rx_header);
 			/*
-			 * Account for our pad between the header and
-			 * the actual start of the frame.
+			 * Account for our pad inserted between the header
+			 * and the actual start of the frame.
 			 */
 			len += VTNET_RX_HEADER_PAD;
 		} else {
@@ -1595,11 +1748,11 @@ vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp)
 			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 		}
 
-		if (vtnet_replace_rxbuf(sc, m, len) != 0) {
-			ifp->if_iqdrops++;
-			vtnet_discard_rxbuf(sc, m);
+		if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
+			rxq->vtnrx_stats.vrxs_iqdrops++;
+			vtnet_rxq_discard_buf(rxq, m);
 			if (nbufs > 1)
-				vtnet_discard_merged_rxbuf(sc, nbufs);
+				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
 			continue;
 		}
 
@@ -1608,263 +1761,297 @@ vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp)
 		m->m_pkthdr.csum_flags = 0;
 
 		if (nbufs > 1) {
-			if (vtnet_rxeof_merged(sc, m, nbufs) != 0)
+			/* Dequeue the rest of chain. */
+			if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
 				continue;
 		}
 
-		ifp->if_ipackets++;
-
 		/*
 		 * Save copy of header before we strip it. For both mergeable
-		 * and non-mergeable, the VirtIO header is placed first in the
-		 * mbuf's data. We no longer need num_buffers, so always use a
-		 * virtio_net_hdr.
+		 * and non-mergeable, the header is at the beginning of the
+		 * mbuf data. We no longer need num_buffers, so always use a
+		 * regular header.
+		 *
+		 * BMV: Is this memcpy() expensive? We know the mbuf data is
+		 * still valid even after the m_adj().
 		 */
 		memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
 		m_adj(m, adjsz);
 
-		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
-			eh = mtod(m, struct ether_header *);
-			if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
-				vtnet_vlan_tag_remove(m);
-
-				/*
-				 * With the 802.1Q header removed, update the
-				 * checksum starting location accordingly.
-				 */
-				if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
-					hdr->csum_start -=
-					    ETHER_VLAN_ENCAP_LEN;
-			}
-		}
-
-		if (ifp->if_capenable & IFCAP_RXCSUM &&
-		    hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
-			if (vtnet_rx_csum(sc, m, hdr) != 0)
-				sc->vtnet_stats.rx_csum_failed++;
-		}
-
-		VTNET_UNLOCK(sc);
-		rx_npkts++;
-		(*ifp->if_input)(ifp, m);
-		VTNET_LOCK(sc);
-
-		/*
-		 * The interface may have been stopped while we were
-		 * passing the packet up the network stack.
-		 */
-		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
-			break;
+		vtnet_rxq_input(rxq, m, hdr);
 	}
 
 	if (deq > 0)
 		virtqueue_notify(vq);
 
-	if (rx_npktsp != NULL)
-		*rx_npktsp = rx_npkts;
-
 	return (count > 0 ? 0 : EAGAIN);
 }
 
 static void
-vtnet_rx_vq_intr(void *xsc)
+vtnet_rx_vq_intr(void *xrxq)
 {
 	struct vtnet_softc *sc;
+	struct vtnet_rxq *rxq;
 	struct ifnet *ifp;
-	int more;
+	int tries, more;
 
-	sc = xsc;
+	rxq = xrxq;
+	sc = rxq->vtnrx_sc;
 	ifp = sc->vtnet_ifp;
+	tries = 0;
+
+	if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
+		/*
+		 * Ignore this interrupt. Either this is a spurious interrupt
+		 * or multiqueue without per-VQ MSIX so every queue needs to
+		 * be polled (a brain dead configuration we could try harder
+		 * to avoid).
+		 */
+		vtnet_rxq_disable_intr(rxq);
+		return;
+	}
 
 again:
-	VTNET_LOCK(sc);
+	VTNET_RXQ_LOCK(rxq);
 
-#ifdef DEVICE_POLLING
-	if (ifp->if_capenable & IFCAP_POLLING) {
-		VTNET_UNLOCK(sc);
+	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+		VTNET_RXQ_UNLOCK(rxq);
 		return;
 	}
-#endif
+
+	more = vtnet_rxq_eof(rxq);
+	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
+		if (!more)
+			vtnet_rxq_disable_intr(rxq);
+		/*
+		 * This is an occasional condition or race (when !more),
+		 * so retry a few times before scheduling the taskqueue.
+		 */
+		rxq->vtnrx_stats.vrxs_rescheduled++;
+		VTNET_RXQ_UNLOCK(rxq);
+		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
+			goto again;
+		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
+	} else
+		VTNET_RXQ_UNLOCK(rxq);
+}
+
+static void
+vtnet_rxq_tq_intr(void *xrxq, int pending)
+{
+	struct vtnet_softc *sc;
+	struct vtnet_rxq *rxq;
+	struct ifnet *ifp;
+	int more;
+
+	rxq = xrxq;
+	sc = rxq->vtnrx_sc;
+	ifp = sc->vtnet_ifp;
+
+	VTNET_RXQ_LOCK(rxq);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
-		vtnet_enable_rx_intr(sc);
-		VTNET_UNLOCK(sc);
+		VTNET_RXQ_UNLOCK(rxq);
 		return;
 	}
 
-	more = vtnet_rxeof(sc, sc->vtnet_rx_process_limit, NULL);
-	if (more || vtnet_enable_rx_intr(sc) != 0) {
+	more = vtnet_rxq_eof(rxq);
+	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
 		if (!more)
-			vtnet_disable_rx_intr(sc);
-		sc->vtnet_stats.rx_task_rescheduled++;
-		VTNET_UNLOCK(sc);
-		goto again;
+			vtnet_rxq_disable_intr(rxq);
+		rxq->vtnrx_stats.vrxs_rescheduled++;
+		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
 	}
 
-	VTNET_UNLOCK(sc);
+	VTNET_RXQ_UNLOCK(rxq);
 }
 
 static void
-vtnet_txeof(struct vtnet_softc *sc)
+vtnet_txq_free_mbufs(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
-	struct ifnet *ifp;
 	struct vtnet_tx_header *txhdr;
-	int deq;
-
-	vq = sc->vtnet_tx_vq;
-	ifp = sc->vtnet_ifp;
-	deq = 0;
+	int last;
 
-	VTNET_LOCK_ASSERT(sc);
+	vq = txq->vtntx_vq;
+	last = 0;
 
-	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
-		deq++;
-		ifp->if_opackets++;
+	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
 		m_freem(txhdr->vth_mbuf);
 		uma_zfree(vtnet_tx_header_zone, txhdr);
 	}
 
-	if (deq > 0) {
-		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
-		if (virtqueue_empty(vq))
-			sc->vtnet_watchdog_timer = 0;
-	}
+	KASSERT(virtqueue_empty(vq),
+	    ("%s: mbufs remaining in tx queue %p", __func__, txq));
 }
 
-static struct mbuf *
-vtnet_tx_offload(struct vtnet_softc *sc, struct mbuf *m,
-    struct virtio_net_hdr *hdr)
+/*
+ * BMV: Much of this can go away once we finally have offsets in
+ * the mbuf packet header. Bug andre@.
+ */
+static int
+vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m,
+    int *etype, int *proto, int *start)
 {
-	struct ifnet *ifp;
-	struct ether_header *eh;
+	struct vtnet_softc *sc;
 	struct ether_vlan_header *evh;
-	struct ip *ip;
-	struct ip6_hdr *ip6;
-	struct tcphdr *tcp;
-	int ip_offset;
-	uint16_t eth_type, csum_start;
-	uint8_t ip_proto, gso_type;
+	int offset;
 
-	ifp = sc->vtnet_ifp;
+	sc = txq->vtntx_sc;
 
-	ip_offset = sizeof(struct ether_header);
-	if (m->m_len < ip_offset) {
-		if ((m = m_pullup(m, ip_offset)) == NULL)
-			return (NULL);
+	evh = mtod(m, struct ether_vlan_header *);
+	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
+		/* BMV: We should handle nested VLAN tags too. */
+		*etype = ntohs(evh->evl_proto);
+		offset = sizeof(struct ether_vlan_header);
+	} else {
+		*etype = ntohs(evh->evl_encap_proto);
+		offset = sizeof(struct ether_header);
+	}
+
+	switch (*etype) {
+#if defined(INET)
+	case ETHERTYPE_IP: {
+		struct ip *ip, iphdr;
+		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
+			m_copydata(m, offset, sizeof(struct ip),
+			    (caddr_t) &iphdr);
+			ip = &iphdr;
+		} else
+			ip = (struct ip *)(m->m_data + offset);
+		*proto = ip->ip_p;
+		*start = offset + (ip->ip_hl << 2);
+		break;
 	}
-
-	eh = mtod(m, struct ether_header *);
-	eth_type = ntohs(eh->ether_type);
-	if (eth_type == ETHERTYPE_VLAN) {
-		ip_offset = sizeof(struct ether_vlan_header);
-		if (m->m_len < ip_offset) {
-			if ((m = m_pullup(m, ip_offset)) == NULL)
-				return (NULL);
-		}
-		evh = mtod(m, struct ether_vlan_header *);
-		eth_type = ntohs(evh->evl_proto);
+#endif
+#if defined(INET6)
+	case ETHERTYPE_IPV6:
+		*proto = -1;
+		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
+		/* Assert the network stack sent us a valid packet. */
+		KASSERT(*start > offset,
+		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
+		    *start, offset, *proto));
+		break;
+#endif
+	default:
+		sc->vtnet_stats.tx_csum_bad_ethtype++;
+		return (EINVAL);
 	}
 
-	switch (eth_type) {
-	case ETHERTYPE_IP:
-		if (m->m_len < ip_offset + sizeof(struct ip)) {
-			m = m_pullup(m, ip_offset + sizeof(struct ip));
-			if (m == NULL)
-				return (NULL);
-		}
+	return (0);
+}
 
-		ip = (struct ip *)(mtod(m, uint8_t *) + ip_offset);
-		ip_proto = ip->ip_p;
-		csum_start = ip_offset + (ip->ip_hl << 2);
-		gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
-		break;
+static int
+vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
+    int offset, struct virtio_net_hdr *hdr)
+{
+	static struct timeval lastecn;
+	static int curecn;
+	struct vtnet_softc *sc;
+	struct tcphdr *tcp, tcphdr;
 
-	case ETHERTYPE_IPV6:
-		if (m->m_len < ip_offset + sizeof(struct ip6_hdr)) {
-			m = m_pullup(m, ip_offset + sizeof(struct ip6_hdr));
-			if (m == NULL)
-				return (NULL);
-		}
+	sc = txq->vtntx_sc;
+
+	if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
+		m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
+		tcp = &tcphdr;
+	} else
+		tcp = (struct tcphdr *)(m->m_data + offset);
+
+	hdr->hdr_len = offset + (tcp->th_off << 2);
+	hdr->gso_size = m->m_pkthdr.tso_segsz;
+	hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
+	    VIRTIO_NET_HDR_GSO_TCPV6;
 
-		ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + ip_offset);
+	if (tcp->th_flags & TH_CWR) {
 		/*
-		 * XXX Assume no extension headers are present. Presently,
-		 * this will always be true in the case of TSO, and FreeBSD
-		 * does not perform checksum offloading of IPv6 yet.
+		 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD,
+		 * ECN support is not on a per-interface basis, but globally via
+		 * the net.inet.tcp.ecn.enable sysctl knob. The default is off.
 		 */
-		ip_proto = ip6->ip6_nxt;
-		csum_start = ip_offset + sizeof(struct ip6_hdr);
-		gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
-		break;
-
-	default:
-		return (m);
+		if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
+			if (ppsratecheck(&lastecn, &curecn, 1))
+				if_printf(sc->vtnet_ifp,
+				    "TSO with ECN not negotiated with host\n");
+			return (ENOTSUP);
+		}
+		hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
 	}
 
-	if (m->m_pkthdr.csum_flags & VTNET_CSUM_OFFLOAD) {
-		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
-		hdr->csum_start = csum_start;
-		hdr->csum_offset = m->m_pkthdr.csum_data;
+	txq->vtntx_stats.vtxs_tso++;
 
-		sc->vtnet_stats.tx_csum_offloaded++;
-	}
+	return (0);
+}
 
-	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
-		if (ip_proto != IPPROTO_TCP)
-			return (m);
+static struct mbuf *
+vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
+    struct virtio_net_hdr *hdr)
+{
+	struct vtnet_softc *sc;
+	int flags, etype, csum_start, proto, error;
 
-		if (m->m_len < csum_start + sizeof(struct tcphdr)) {
-			m = m_pullup(m, csum_start + sizeof(struct tcphdr));
-			if (m == NULL)
-				return (NULL);
-		}
+	sc = txq->vtntx_sc;
+	flags = m->m_pkthdr.csum_flags;
 
-		tcp = (struct tcphdr *)(mtod(m, uint8_t *) + csum_start);
-		hdr->gso_type = gso_type;
-		hdr->hdr_len = csum_start + (tcp->th_off << 2);
-		hdr->gso_size = m->m_pkthdr.tso_segsz;
+	error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
+	if (error)
+		goto drop;
 
-		if (tcp->th_flags & TH_CWR) {
-			/*
-			 * Drop if we did not negotiate VIRTIO_NET_F_HOST_ECN.
-			 * ECN support is only configurable globally with the
-			 * net.inet.tcp.ecn.enable sysctl knob.
-			 */
-			if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
-				if_printf(ifp, "TSO with ECN not supported "
-				    "by host\n");
-				m_freem(m);
-				return (NULL);
-			}
+	if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) ||
+	    (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) {
+		/*
+		 * We could compare the IP protocol vs the CSUM_ flag too,
+		 * but that really should not be necessary.
+		 */
+		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
+		hdr->csum_start = csum_start;
+		hdr->csum_offset = m->m_pkthdr.csum_data;
+		txq->vtntx_stats.vtxs_csum++;
+	}
 
-			hdr->flags |= VIRTIO_NET_HDR_GSO_ECN;
+	if (flags & CSUM_TSO) {
+		if (__predict_false(proto != IPPROTO_TCP)) {
+			/* Likely failed to correctly parse the mbuf. */
+			sc->vtnet_stats.tx_tso_not_tcp++;
+			goto drop;
 		}
 
-		sc->vtnet_stats.tx_tso_offloaded++;
+		KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM,
+		    ("%s: mbuf %p TSO without checksum offload", __func__, m));
+
+		error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
+		if (error)
+			goto drop;
 	}
 
 	return (m);
+
+drop:
+	m_freem(m);
+	return (NULL);
 }
 
 static int
-vtnet_enqueue_txbuf(struct vtnet_softc *sc, struct mbuf **m_head,
+vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
     struct vtnet_tx_header *txhdr)
 {
 	struct sglist sg;
 	struct sglist_seg segs[VTNET_MAX_TX_SEGS];
+	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct mbuf *m;
 	int collapsed, error;
 
-	vq = sc->vtnet_tx_vq;
+	vq = txq->vtntx_vq;
+	sc = txq->vtntx_sc;
 	m = *m_head;
 	collapsed = 0;
 
 	sglist_init(&sg, VTNET_MAX_TX_SEGS, segs);
 	error = sglist_append(&sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
 	KASSERT(error == 0 && sg.sg_nseg == 1,
-	    ("%s: cannot add header to sglist error %d", __func__, error));
+	    ("%s: error %d adding header to sglist", __func__, error));
 
 again:
 	error = sglist_append_mbuf(&sg, m);
@@ -1878,12 +2065,14 @@ again:
 
 		*m_head = m;
 		collapsed = 1;
+		txq->vtntx_stats.vtxs_collapsed++;
 		goto again;
 	}
 
 	txhdr->vth_mbuf = m;
+	error = virtqueue_enqueue(vq, txhdr, &sg, sg.sg_nseg, 0);
 
-	return (virtqueue_enqueue(vq, txhdr, &sg, sg.sg_nseg, 0));
+	return (error);
 
 fail:
 	m_freem(*m_head);
@@ -1893,28 +2082,29 @@ fail:
 }
 
 static int
-vtnet_encap(struct vtnet_softc *sc, struct mbuf **m_head)
+vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head)
 {
+	struct vtnet_softc *sc;
 	struct vtnet_tx_header *txhdr;
 	struct virtio_net_hdr *hdr;
 	struct mbuf *m;
 	int error;
 
+	sc = txq->vtntx_sc;
 	m = *m_head;
 	M_ASSERTPKTHDR(m);
 
 	txhdr = uma_zalloc(vtnet_tx_header_zone, M_NOWAIT | M_ZERO);
 	if (txhdr == NULL) {
-		*m_head = NULL;
 		m_freem(m);
+		*m_head = NULL;
 		return (ENOMEM);
 	}
 
 	/*
-	 * Always use the non-mergeable header to simplify things. When
-	 * the mergeable feature is negotiated, the num_buffers field
-	 * must be set to zero. We use vtnet_hdr_size later to enqueue
-	 * the correct header size to the host.
+	 * Always use the non-mergeable header, regardless if the feature
+	 * was negotiated. For transmit, num_buffers is always zero. The
+	 * vtnet_hdr_size is used to enqueue the correct header size.
 	 */
 	hdr = &txhdr->vth_uhdr.hdr;
 
@@ -1927,72 +2117,55 @@ vtnet_encap(struct vtnet_softc *sc, struct mbuf **m_head)
 		m->m_flags &= ~M_VLANTAG;
 	}
 
-	if (m->m_pkthdr.csum_flags != 0) {
-		m = vtnet_tx_offload(sc, m, hdr);
+	if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
+		m = vtnet_txq_offload(txq, m, hdr);
 		if ((*m_head = m) == NULL) {
 			error = ENOBUFS;
 			goto fail;
 		}
 	}
 
-	error = vtnet_enqueue_txbuf(sc, m_head, txhdr);
+	error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
+	if (error == 0)
+		return (0);
+
 fail:
-	if (error)
-		uma_zfree(vtnet_tx_header_zone, txhdr);
+	uma_zfree(vtnet_tx_header_zone, txhdr);
 
 	return (error);
 }
 
-static void
-vtnet_start(struct ifnet *ifp)
-{
-	struct vtnet_softc *sc;
-
-	sc = ifp->if_softc;
-
-	VTNET_LOCK(sc);
-	vtnet_start_locked(ifp);
-	VTNET_UNLOCK(sc);
-}
+#ifdef VTNET_LEGACY_TX
 
 static void
-vtnet_start_locked(struct ifnet *ifp)
+vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
 {
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct mbuf *m0;
 	int enq;
 
-	sc = ifp->if_softc;
-	vq = sc->vtnet_tx_vq;
+	sc = txq->vtntx_sc;
+	vq = txq->vtntx_vq;
 	enq = 0;
 
-	VTNET_LOCK_ASSERT(sc);
+	VTNET_TXQ_LOCK_ASSERT(txq);
 
-	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
-	    IFF_DRV_RUNNING || ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0))
+	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+	    sc->vtnet_link_active == 0)
 		return;
 
-#ifdef VTNET_TX_INTR_MODERATION
-	if (virtqueue_nused(vq) >= sc->vtnet_tx_size / 2)
-		vtnet_txeof(sc);
-#endif
-
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
-		if (virtqueue_full(vq)) {
-			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+		if (virtqueue_full(vq))
 			break;
-		}
 
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m0);
 		if (m0 == NULL)
 			break;
 
-		if (vtnet_encap(sc, &m0) != 0) {
-			if (m0 == NULL)
-				break;
-			IFQ_DRV_PREPEND(&ifp->if_snd, m0);
-			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+		if (vtnet_txq_encap(txq, &m0) != 0) {
+			if (m0 != NULL)
+				IFQ_DRV_PREPEND(&ifp->if_snd, m0);
 			break;
 		}
 
@@ -2002,65 +2175,529 @@ vtnet_start_locked(struct ifnet *ifp)
 
 	if (enq > 0) {
 		virtqueue_notify(vq);
-		sc->vtnet_watchdog_timer = VTNET_WATCHDOG_TIMEOUT;
+		txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
 	}
 }
 
 static void
-vtnet_tick(void *xsc)
+vtnet_start(struct ifnet *ifp)
 {
 	struct vtnet_softc *sc;
+	struct vtnet_txq *txq;
 
-	sc = xsc;
+	sc = ifp->if_softc;
+	txq = &sc->vtnet_txqs[0];
 
-	VTNET_LOCK_ASSERT(sc);
-#ifdef VTNET_DEBUG
-	virtqueue_dump(sc->vtnet_rx_vq);
-	virtqueue_dump(sc->vtnet_tx_vq);
-#endif
+	VTNET_TXQ_LOCK(txq);
+	vtnet_start_locked(txq, ifp);
+	VTNET_TXQ_UNLOCK(txq);
+}
 
-	vtnet_watchdog(sc);
-	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
+#else /* !VTNET_LEGACY_TX */
+
+static int
+vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
+{
+	struct vtnet_softc *sc;
+	struct virtqueue *vq;
+	struct buf_ring *br;
+	struct ifnet *ifp;
+	int enq, error;
+
+	sc = txq->vtntx_sc;
+	vq = txq->vtntx_vq;
+	br = txq->vtntx_br;
+	ifp = sc->vtnet_ifp;
+	enq = 0;
+	error = 0;
+
+	VTNET_TXQ_LOCK_ASSERT(txq);
+
+	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+	    sc->vtnet_link_active == 0) {
+		if (m != NULL)
+			error = drbr_enqueue(ifp, br, m);
+		return (error);
+	}
+
+	if (m != NULL) {
+		error = drbr_enqueue(ifp, br, m);
+		if (error)
+			return (error);
+	}
+
+	while ((m = drbr_peek(ifp, br)) != NULL) {
+		error = vtnet_txq_encap(txq, &m);
+		if (error) {
+			if (m != NULL)
+				drbr_putback(ifp, br, m);
+			else
+				drbr_advance(ifp, br);
+			break;
+		}
+		drbr_advance(ifp, br);
+
+		enq++;
+		ETHER_BPF_MTAP(ifp, m);
+	}
+
+	if (enq > 0) {
+		virtqueue_notify(vq);
+		txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
+	}
+
+	return (error);
+}
+
+static int
+vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
+{
+	struct vtnet_softc *sc;
+	struct vtnet_txq *txq;
+	int i, npairs, error;
+
+	sc = ifp->if_softc;
+	npairs = sc->vtnet_act_vq_pairs;
+
+	if (m->m_flags & M_FLOWID)
+		i = m->m_pkthdr.flowid % npairs;
+	else
+		i = curcpu % npairs;
+
+	txq = &sc->vtnet_txqs[i];
+
+	if (VTNET_TXQ_TRYLOCK(txq) != 0) {
+		error = vtnet_txq_mq_start_locked(txq, m);
+		VTNET_TXQ_UNLOCK(txq);
+	} else {
+		error = drbr_enqueue(ifp, txq->vtntx_br, m);
+		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
+	}
+
+	return (error);
 }
 
 static void
-vtnet_tx_vq_intr(void *xsc)
+vtnet_txq_tq_deferred(void *xtxq, int pending)
 {
 	struct vtnet_softc *sc;
+	struct vtnet_txq *txq;
+
+	txq = xtxq;
+	sc = txq->vtntx_sc;
+
+	VTNET_TXQ_LOCK(txq);
+	if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
+		vtnet_txq_mq_start_locked(txq, NULL);
+	VTNET_TXQ_UNLOCK(txq);
+}
+
+#endif /* VTNET_LEGACY_TX */
+
+static void
+vtnet_txq_tq_intr(void *xtxq, int pending)
+{
+	struct vtnet_softc *sc;
+	struct vtnet_txq *txq;
 	struct ifnet *ifp;
 
-	sc = xsc;
+	txq = xtxq;
+	sc = txq->vtntx_sc;
 	ifp = sc->vtnet_ifp;
 
-again:
-	VTNET_LOCK(sc);
+	VTNET_TXQ_LOCK(txq);
 
-#ifdef DEVICE_POLLING
-	if (ifp->if_capenable & IFCAP_POLLING) {
-		VTNET_UNLOCK(sc);
+	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+		VTNET_TXQ_UNLOCK(txq);
 		return;
 	}
+
+	vtnet_txq_eof(txq);
+
+#ifdef VTNET_LEGACY_TX
+	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+		vtnet_start_locked(txq, ifp);
+#else
+	if (!drbr_empty(ifp, txq->vtntx_br))
+		vtnet_txq_mq_start_locked(txq, NULL);
 #endif
 
+	if (vtnet_txq_enable_intr(txq) != 0) {
+		vtnet_txq_disable_intr(txq);
+		txq->vtntx_stats.vtxs_rescheduled++;
+		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
+	}
+
+	VTNET_TXQ_UNLOCK(txq);
+}
+
+static void
+vtnet_txq_eof(struct vtnet_txq *txq)
+{
+	struct virtqueue *vq;
+	struct vtnet_tx_header *txhdr;
+	struct mbuf *m;
+
+	vq = txq->vtntx_vq;
+	VTNET_TXQ_LOCK_ASSERT(txq);
+
+	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
+		m = txhdr->vth_mbuf;
+
+		txq->vtntx_stats.vtxs_opackets++;
+		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
+		if (m->m_flags & M_MCAST)
+			txq->vtntx_stats.vtxs_omcasts++;
+
+		m_freem(m);
+		uma_zfree(vtnet_tx_header_zone, txhdr);
+	}
+
+	if (virtqueue_empty(vq))
+		txq->vtntx_watchdog = 0;
+}
+
+static void
+vtnet_tx_vq_intr(void *xtxq)
+{
+	struct vtnet_softc *sc;
+	struct vtnet_txq *txq;
+	struct ifnet *ifp;
+	int tries;
+
+	txq = xtxq;
+	sc = txq->vtntx_sc;
+	ifp = sc->vtnet_ifp;
+	tries = 0;
+
+	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
+		/*
+		 * Ignore this interrupt. Either this is a spurious interrupt
+		 * or multiqueue without per-VQ MSIX so every queue needs to
+		 * be polled (a brain dead configuration we could try harder
+		 * to avoid).
+		 */
+		vtnet_txq_disable_intr(txq);
+		return;
+	}
+
+again:
+	VTNET_TXQ_LOCK(txq);
+
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
-		vtnet_enable_tx_intr(sc);
-		VTNET_UNLOCK(sc);
+		VTNET_TXQ_UNLOCK(txq);
 		return;
 	}
 
-	vtnet_txeof(sc);
+	vtnet_txq_eof(txq);
 
+#ifdef VTNET_LEGACY_TX
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
-		vtnet_start_locked(ifp);
+		vtnet_start_locked(txq, ifp);
+#else
+	if (!drbr_empty(ifp, txq->vtntx_br))
+		vtnet_txq_mq_start_locked(txq, NULL);
+#endif
 
-	if (vtnet_enable_tx_intr(sc) != 0) {
-		vtnet_disable_tx_intr(sc);
-		sc->vtnet_stats.tx_task_rescheduled++;
-		VTNET_UNLOCK(sc);
-		goto again;
+	if (vtnet_txq_enable_intr(txq) != 0) {
+		vtnet_txq_disable_intr(txq);
+		/*
+		 * This is an occasional race, so retry a few times
+		 * before scheduling the taskqueue.
+		 */
+		VTNET_TXQ_UNLOCK(txq);
+		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
+			goto again;
+		txq->vtntx_stats.vtxs_rescheduled++;
+		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
+	} else
+		VTNET_TXQ_UNLOCK(txq);
+}
+
+static void
+vtnet_tx_start_all(struct vtnet_softc *sc)
+{
+	struct ifnet *ifp;
+	struct vtnet_txq *txq;
+	int i;
+
+	ifp = sc->vtnet_ifp;
+	VTNET_CORE_LOCK_ASSERT(sc);
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
+		txq = &sc->vtnet_txqs[i];
+
+		VTNET_TXQ_LOCK(txq);
+#ifdef VTNET_LEGACY_TX
+		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+			vtnet_start_locked(txq, ifp);
+#else
+		if (!drbr_empty(ifp, txq->vtntx_br))
+			vtnet_txq_mq_start_locked(txq, NULL);
+#endif
+		VTNET_TXQ_UNLOCK(txq);
+	}
+}
+
+#ifndef VTNET_LEGACY_TX
+static void
+vtnet_qflush(struct ifnet *ifp)
+{
+	struct vtnet_softc *sc;
+	struct vtnet_txq *txq;
+	struct mbuf *m;
+	int i;
+
+	sc = ifp->if_softc;
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
+		txq = &sc->vtnet_txqs[i];
+
+		VTNET_TXQ_LOCK(txq);
+		while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
+			m_freem(m);
+		VTNET_TXQ_UNLOCK(txq);
+	}
+
+	if_qflush(ifp);
+}
+#endif
+
+static int
+vtnet_watchdog(struct vtnet_txq *txq)
+{
+	struct vtnet_softc *sc;
+
+	sc = txq->vtntx_sc;
+
+	VTNET_TXQ_LOCK(txq);
+	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
+		VTNET_TXQ_UNLOCK(txq);
+		return (0);
+	}
+	VTNET_TXQ_UNLOCK(txq);
+
+	if_printf(sc->vtnet_ifp, "watchdog timeout on queue %d\n",
+	    txq->vtntx_id);
+	return (1);
+}
+
+static void
+vtnet_rxq_accum_stats(struct vtnet_rxq *rxq, struct vtnet_rxq_stats *accum)
+{
+	struct vtnet_rxq_stats *st;
+
+	st = &rxq->vtnrx_stats;
+
+	accum->vrxs_ipackets += st->vrxs_ipackets;
+	accum->vrxs_ibytes += st->vrxs_ibytes;
+	accum->vrxs_iqdrops += st->vrxs_iqdrops;
+	accum->vrxs_csum += st->vrxs_csum;
+	accum->vrxs_csum_failed += st->vrxs_csum_failed;
+	accum->vrxs_rescheduled += st->vrxs_rescheduled;
+}
+
+static void
+vtnet_txq_accum_stats(struct vtnet_txq *txq, struct vtnet_txq_stats *accum)
+{
+	struct vtnet_txq_stats *st;
+
+	st = &txq->vtntx_stats;
+
+	accum->vtxs_opackets += st->vtxs_opackets;
+	accum->vtxs_obytes += st->vtxs_obytes;
+	accum->vtxs_csum += st->vtxs_csum;
+	accum->vtxs_tso += st->vtxs_tso;
+	accum->vtxs_collapsed += st->vtxs_collapsed;
+	accum->vtxs_rescheduled += st->vtxs_rescheduled;
+}
+
+static void
+vtnet_accumulate_stats(struct vtnet_softc *sc)
+{
+	struct ifnet *ifp;
+	struct vtnet_statistics *st;
+	struct vtnet_rxq_stats rxaccum;
+	struct vtnet_txq_stats txaccum;
+	int i;
+
+	ifp = sc->vtnet_ifp;
+	st = &sc->vtnet_stats;
+	bzero(&rxaccum, sizeof(struct vtnet_rxq_stats));
+	bzero(&txaccum, sizeof(struct vtnet_txq_stats));
+
+	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+		vtnet_rxq_accum_stats(&sc->vtnet_rxqs[i], &rxaccum);
+		vtnet_txq_accum_stats(&sc->vtnet_txqs[i], &txaccum);
+	}
+
+	st->rx_csum_offloaded = rxaccum.vrxs_csum;
+	st->rx_csum_failed = rxaccum.vrxs_csum_failed;
+	st->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
+	st->tx_csum_offloaded = txaccum.vtxs_csum;
+	st->tx_tso_offloaded = txaccum.vtxs_tso;
+	st->tx_task_rescheduled = txaccum.vtxs_rescheduled;
+
+	/*
+	 * With the exception of if_ierrors, these ifnet statistics are
+	 * only updated in the driver, so just set them to our accumulated
+	 * values. if_ierrors is updated in ether_input() for malformed
+	 * frames that we should have already discarded.
+	 */
+	ifp->if_ipackets = rxaccum.vrxs_ipackets;
+	ifp->if_iqdrops = rxaccum.vrxs_iqdrops;
+	ifp->if_ierrors = rxaccum.vrxs_ierrors;
+	ifp->if_opackets = txaccum.vtxs_opackets;
+#ifndef VTNET_LEGACY_TX
+	ifp->if_obytes = txaccum.vtxs_obytes;
+	ifp->if_omcasts = txaccum.vtxs_omcasts;
+#endif
+}
+
+static void
+vtnet_tick(void *xsc)
+{
+	struct vtnet_softc *sc;
+	struct ifnet *ifp;
+	int i, timedout;
+
+	sc = xsc;
+	ifp = sc->vtnet_ifp;
+	timedout = 0;
+
+	VTNET_CORE_LOCK_ASSERT(sc);
+	vtnet_accumulate_stats(sc);
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
+		timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
+
+	if (timedout != 0) {
+		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+		vtnet_init_locked(sc);
+	} else
+		callout_schedule(&sc->vtnet_tick_ch, hz);
+}
+
+static void
+vtnet_start_taskqueues(struct vtnet_softc *sc)
+{
+	device_t dev;
+	struct vtnet_rxq *rxq;
+	struct vtnet_txq *txq;
+	int i, error;
+
+	dev = sc->vtnet_dev;
+
+	/*
+	 * Errors here are very difficult to recover from - we cannot
+	 * easily fail because, if this is during boot, we will hang
+	 * when freeing any successfully started taskqueues because
+	 * the scheduler isn't up yet.
+	 *
+	 * Most drivers just ignore the return value - it only fails
+	 * with ENOMEM so an error is not likely.
+	 */
+	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+		rxq = &sc->vtnet_rxqs[i];
+		error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
+		    "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
+		if (error) {
+			device_printf(dev, "failed to start rx taskq %d\n",
+			    rxq->vtnrx_id);
+		}
+
+		txq = &sc->vtnet_txqs[i];
+		error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
+		    "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
+		if (error) {
+			device_printf(dev, "failed to start tx taskq %d\n",
+			    txq->vtntx_id);
+		}
+	}
+}
+
+static void
+vtnet_free_taskqueues(struct vtnet_softc *sc)
+{
+	struct vtnet_rxq *rxq;
+	struct vtnet_txq *txq;
+	int i;
+
+	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+		rxq = &sc->vtnet_rxqs[i];
+		if (rxq->vtnrx_tq != NULL) {
+			taskqueue_free(rxq->vtnrx_tq);
+			rxq->vtnrx_vq = NULL;
+		}
+
+		txq = &sc->vtnet_txqs[i];
+		if (txq->vtntx_tq != NULL) {
+			taskqueue_free(txq->vtntx_tq);
+			txq->vtntx_tq = NULL;
+		}
 	}
+}
 
-	VTNET_UNLOCK(sc);
+static void
+vtnet_drain_taskqueues(struct vtnet_softc *sc)
+{
+	struct vtnet_rxq *rxq;
+	struct vtnet_txq *txq;
+	int i;
+
+	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+		rxq = &sc->vtnet_rxqs[i];
+		if (rxq->vtnrx_tq != NULL)
+			taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
+
+		txq = &sc->vtnet_txqs[i];
+		if (txq->vtntx_tq != NULL) {
+			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
+#ifndef VTNET_LEGACY_TX
+			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
+#endif
+		}
+	}
+}
+
+static void
+vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
+{
+	struct vtnet_rxq *rxq;
+	struct vtnet_txq *txq;
+	int i;
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
+		rxq = &sc->vtnet_rxqs[i];
+		vtnet_rxq_free_mbufs(rxq);
+
+		txq = &sc->vtnet_txqs[i];
+		vtnet_txq_free_mbufs(txq);
+	}
+}
+
+static void
+vtnet_stop_rendezvous(struct vtnet_softc *sc)
+{
+	struct vtnet_rxq *rxq;
+	struct vtnet_txq *txq;
+	int i;
+
+	/*
+	 * Lock and unlock the per-queue mutex so we known the stop
+	 * state is visible. Doing only the active queues should be
+	 * sufficient, but it does not cost much extra to do all the
+	 * queues. Note we hold the core mutex here too.
+	 */
+	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+		rxq = &sc->vtnet_rxqs[i];
+		VTNET_RXQ_LOCK(rxq);
+		VTNET_RXQ_UNLOCK(rxq);
+
+		txq = &sc->vtnet_txqs[i];
+		VTNET_TXQ_LOCK(txq);
+		VTNET_TXQ_UNLOCK(txq);
+	}
 }
 
 static void
@@ -2072,46 +2709,60 @@ vtnet_stop(struct vtnet_softc *sc)
 	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 
-	VTNET_LOCK_ASSERT(sc);
+	VTNET_CORE_LOCK_ASSERT(sc);
 
-	sc->vtnet_watchdog_timer = 0;
+	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+	sc->vtnet_link_active = 0;
 	callout_stop(&sc->vtnet_tick_ch);
-	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 
-	vtnet_disable_rx_intr(sc);
-	vtnet_disable_tx_intr(sc);
+	/* Only advisory. */
+	vtnet_disable_interrupts(sc);
 
 	/*
-	 * Stop the host VirtIO adapter. Note this will reset the host
-	 * adapter's state back to the pre-initialized state, so in
-	 * order to make the device usable again, we must drive it
-	 * through virtio_reinit() and virtio_reinit_complete().
+	 * Stop the host adapter. This resets it to the pre-initialized
+	 * state. It will not generate any interrupts until after it is
+	 * reinitialized.
 	 */
 	virtio_stop(dev);
+	vtnet_stop_rendezvous(sc);
 
-	sc->vtnet_flags &= ~VTNET_FLAG_LINK;
-
-	vtnet_free_rx_mbufs(sc);
-	vtnet_free_tx_mbufs(sc);
+	/* Free any mbufs left in the virtqueues. */
+	vtnet_drain_rxtx_queues(sc);
 }
 
 static int
-vtnet_reinit(struct vtnet_softc *sc)
+vtnet_virtio_reinit(struct vtnet_softc *sc)
 {
+	device_t dev;
 	struct ifnet *ifp;
 	uint64_t features;
+	int mask, error;
 
+	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 	features = sc->vtnet_features;
 
+	mask = 0;
+#if defined(INET)
+	mask |= IFCAP_RXCSUM;
+#endif
+#if defined (INET6)
+	mask |= IFCAP_RXCSUM_IPV6;
+#endif
+
 	/*
 	 * Re-negotiate with the host, removing any disabled receive
 	 * features. Transmit features are disabled only on our side
 	 * via if_capenable and if_hwassist.
 	 */
 
-	if (ifp->if_capabilities & IFCAP_RXCSUM) {
-		if ((ifp->if_capenable & IFCAP_RXCSUM) == 0)
+	if (ifp->if_capabilities & mask) {
+		/*
+		 * We require both IPv4 and IPv6 offloading to be enabled
+		 * in order to negotiated it: VirtIO does not distinguish
+		 * between the two.
+		 */
+		if ((ifp->if_capenable & mask) != mask)
 			features &= ~VIRTIO_NET_F_GUEST_CSUM;
 	}
 
@@ -2125,86 +2776,205 @@ vtnet_reinit(struct vtnet_softc *sc)
 			features &= ~VIRTIO_NET_F_CTRL_VLAN;
 	}
 
-	return (virtio_reinit(sc->vtnet_dev, features));
+	error = virtio_reinit(dev, features);
+	if (error)
+		device_printf(dev, "virtio reinit error %d\n", error);
+
+	return (error);
 }
 
 static void
-vtnet_init_locked(struct vtnet_softc *sc)
+vtnet_init_rx_filters(struct vtnet_softc *sc)
 {
-	device_t dev;
 	struct ifnet *ifp;
-	int error;
 
-	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 
-	VTNET_LOCK_ASSERT(sc);
+	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
+		/* Restore promiscuous and all-multicast modes. */
+		vtnet_rx_filter(sc);
+		/* Restore filtered MAC addresses. */
+		vtnet_rx_filter_mac(sc);
+	}
 
-	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
+		vtnet_rx_filter_vlan(sc);
+}
+
+static int
+vtnet_init_rx_queues(struct vtnet_softc *sc)
+{
+	device_t dev;
+	struct vtnet_rxq *rxq;
+	int i, clsize, error;
+
+	dev = sc->vtnet_dev;
+
+	/*
+	 * Use the new cluster size if one has been set (via a MTU
+	 * change). Otherwise, use the standard 2K clusters.
+	 *
+	 * BMV: It might make sense to use page sized clusters as
+	 * the default (depending on the features negotiated).
+	 */
+	if (sc->vtnet_rx_new_clsize != 0) {
+		clsize = sc->vtnet_rx_new_clsize;
+		sc->vtnet_rx_new_clsize = 0;
+	} else
+		clsize = MCLBYTES;
+
+	sc->vtnet_rx_clsize = clsize;
+	sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize);
+
+	/* The first segment is reserved for the header. */
+	KASSERT(sc->vtnet_rx_nmbufs < VTNET_MAX_RX_SEGS,
+	    ("%s: too many rx mbufs %d", __func__, sc->vtnet_rx_nmbufs));
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
+		rxq = &sc->vtnet_rxqs[i];
+
+		/* Hold the lock to satisfy asserts. */
+		VTNET_RXQ_LOCK(rxq);
+		error = vtnet_rxq_populate(rxq);
+		VTNET_RXQ_UNLOCK(rxq);
+
+		if (error) {
+			device_printf(dev,
+			    "cannot allocate mbufs for Rx queue %d\n", i);
+			return (error);
+		}
+	}
+
+	return (0);
+}
+
+static int
+vtnet_init_tx_queues(struct vtnet_softc *sc)
+{
+	struct vtnet_txq *txq;
+	int i;
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
+		txq = &sc->vtnet_txqs[i];
+		txq->vtntx_watchdog = 0;
+	}
+
+	return (0);
+}
+
+static int
+vtnet_init_rxtx_queues(struct vtnet_softc *sc)
+{
+	int error;
+
+	error = vtnet_init_rx_queues(sc);
+	if (error)
+		return (error);
+
+	error = vtnet_init_tx_queues(sc);
+	if (error)
+		return (error);
+
+	return (0);
+}
+
+static void
+vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
+{
+	device_t dev;
+	int npairs;
+
+	dev = sc->vtnet_dev;
+
+	if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) {
+		MPASS(sc->vtnet_max_vq_pairs == 1);
+		sc->vtnet_act_vq_pairs = 1;
 		return;
+	}
 
-	/* Stop host's adapter, cancel any pending I/O. */
-	vtnet_stop(sc);
+	/* BMV: Just use the maximum configured for now. */
+	npairs = sc->vtnet_max_vq_pairs;
 
-	/* Reinitialize the host device. */
-	error = vtnet_reinit(sc);
-	if (error) {
+	if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
 		device_printf(dev,
-		    "reinitialization failed, stopping device...\n");
-		vtnet_stop(sc);
-		return;
+		    "cannot set active queue pairs to %d\n", npairs);
+		npairs = 1;
 	}
 
-	/* Update host with assigned MAC address. */
+	sc->vtnet_act_vq_pairs = npairs;
+}
+
+static int
+vtnet_reinit(struct vtnet_softc *sc)
+{
+	device_t dev;
+	struct ifnet *ifp;
+	int error;
+
+	dev = sc->vtnet_dev;
+	ifp = sc->vtnet_ifp;
+
+	/* Use the current MAC address. */
 	bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
 	vtnet_set_hwaddr(sc);
 
+	vtnet_set_active_vq_pairs(sc);
+
 	ifp->if_hwassist = 0;
 	if (ifp->if_capenable & IFCAP_TXCSUM)
 		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
+	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
+		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6;
 	if (ifp->if_capenable & IFCAP_TSO4)
 		ifp->if_hwassist |= CSUM_TSO;
+	if (ifp->if_capenable & IFCAP_TSO6)
+		ifp->if_hwassist |= CSUM_TSO; /* No CSUM_TSO_IPV6. */
 
-	error = vtnet_init_rx_vq(sc);
-	if (error) {
-		device_printf(dev,
-		    "cannot allocate mbufs for Rx virtqueue\n");
-		vtnet_stop(sc);
-		return;
-	}
+	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
+		vtnet_init_rx_filters(sc);
 
-	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
-		if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
-			/* Restore promiscuous and all-multicast modes. */
-			vtnet_rx_filter(sc);
+	error = vtnet_init_rxtx_queues(sc);
+	if (error)
+		return (error);
 
-			/* Restore filtered MAC addresses. */
-			vtnet_rx_filter_mac(sc);
-		}
+	vtnet_enable_interrupts(sc);
+	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 
-		/* Restore VLAN filters. */
-		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
-			vtnet_rx_filter_vlan(sc);
-	}
+	return (0);
+}
 
-#ifdef DEVICE_POLLING
-	if (ifp->if_capenable & IFCAP_POLLING) {
-		vtnet_disable_rx_intr(sc);
-		vtnet_disable_tx_intr(sc);
-	} else
-#endif
-	{
-		vtnet_enable_rx_intr(sc);
-		vtnet_enable_tx_intr(sc);
-	}
+static void
+vtnet_init_locked(struct vtnet_softc *sc)
+{
+	device_t dev;
+	struct ifnet *ifp;
 
-	ifp->if_drv_flags |= IFF_DRV_RUNNING;
-	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+	dev = sc->vtnet_dev;
+	ifp = sc->vtnet_ifp;
+
+	VTNET_CORE_LOCK_ASSERT(sc);
+
+	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+		return;
+
+	vtnet_stop(sc);
+
+	/* Reinitialize with the host. */
+	if (vtnet_virtio_reinit(sc) != 0)
+		goto fail;
+
+	if (vtnet_reinit(sc) != 0)
+		goto fail;
 
 	virtio_reinit_complete(dev);
 
 	vtnet_update_link_status(sc);
 	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
+
+	return;
+
+fail:
+	vtnet_stop(sc);
 }
 
 static void
@@ -2214,9 +2984,24 @@ vtnet_init(void *xsc)
 
 	sc = xsc;
 
-	VTNET_LOCK(sc);
+	VTNET_CORE_LOCK(sc);
 	vtnet_init_locked(sc);
-	VTNET_UNLOCK(sc);
+	VTNET_CORE_UNLOCK(sc);
+}
+
+static void
+vtnet_free_ctrl_vq(struct vtnet_softc *sc)
+{
+	struct virtqueue *vq;
+
+	vq = sc->vtnet_ctrl_vq;
+
+	/*
+	 * The control virtqueue is only polled and therefore it should
+	 * already be empty.
+	 */
+	KASSERT(virtqueue_empty(vq),
+	    ("%s: ctrl vq %p not empty", __func__, vq));
 }
 
 static void
@@ -2224,87 +3009,117 @@ vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
     struct sglist *sg, int readable, int writable)
 {
 	struct virtqueue *vq;
-	void *c;
 
 	vq = sc->vtnet_ctrl_vq;
 
-	VTNET_LOCK_ASSERT(sc);
+	VTNET_CORE_LOCK_ASSERT(sc);
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
-	    ("no control virtqueue"));
-	KASSERT(virtqueue_empty(vq),
-	    ("control command already enqueued"));
+	    ("%s: CTRL_VQ feature not negotiated", __func__));
 
+	if (!virtqueue_empty(vq))
+		return;
 	if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
 		return;
 
-	virtqueue_notify(vq);
-
 	/*
-	 * Poll until the command is complete. Previously, we would
-	 * sleep until the control virtqueue interrupt handler woke
-	 * us up, but dropping the VTNET_MTX leads to serialization
-	 * difficulties.
-	 *
-	 * Furthermore, it appears QEMU/KVM only allocates three MSIX
-	 * vectors. Two of those vectors are needed for the Rx and Tx
-	 * virtqueues. We do not support sharing both a Vq and config
-	 * changed notification on the same MSIX vector.
+	 * Poll for the response, but the command is likely already
+	 * done when we return from the notify.
 	 */
-	c = virtqueue_poll(vq, NULL);
-	KASSERT(c == cookie, ("unexpected control command response"));
+	virtqueue_notify(vq);
+	virtqueue_poll(vq, NULL);
 }
 
-static void
-vtnet_rx_filter(struct vtnet_softc *sc)
+static int
+vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
 {
-	device_t dev;
-	struct ifnet *ifp;
+	struct virtio_net_ctrl_hdr hdr;
+	struct sglist_seg segs[3];
+	struct sglist sg;
+	uint8_t ack;
+	int error;
 
-	dev = sc->vtnet_dev;
-	ifp = sc->vtnet_ifp;
+	hdr.class = VIRTIO_NET_CTRL_MAC;
+	hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
+	ack = VIRTIO_NET_ERR;
 
-	VTNET_LOCK_ASSERT(sc);
-	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
-	    ("CTRL_RX feature not negotiated"));
+	sglist_init(&sg, 3, segs);
+	error = 0;
+	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
+	error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN);
+	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
+	KASSERT(error == 0 && sg.sg_nseg == 3,
+	    ("%s: error %d adding set MAC msg to sglist", __func__, error));
 
-	if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
-		device_printf(dev, "cannot %s promiscuous mode\n",
-		    ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
+	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
 
-	if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
-		device_printf(dev, "cannot %s all-multicast mode\n",
-		    ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
+	return (ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static int
-vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
+vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
 {
-	struct virtio_net_ctrl_hdr hdr;
 	struct sglist_seg segs[3];
 	struct sglist sg;
-	uint8_t onoff, ack;
+	struct {
+		struct virtio_net_ctrl_hdr hdr;
+		uint8_t pad1;
+		struct virtio_net_ctrl_mq mq;
+		uint8_t pad2;
+		uint8_t ack;
+	} s;
 	int error;
 
-	if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
-		return (ENOTSUP);
+	s.hdr.class = VIRTIO_NET_CTRL_MQ;
+	s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
+	s.mq.virtqueue_pairs = npairs;
+	s.ack = VIRTIO_NET_ERR;
 
+	sglist_init(&sg, 3, segs);
 	error = 0;
+	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
+	error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
+	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
+	KASSERT(error == 0 && sg.sg_nseg == 3,
+	    ("%s: error %d adding MQ message to sglist", __func__, error));
 
-	hdr.class = VIRTIO_NET_CTRL_RX;
-	hdr.cmd = cmd;
-	onoff = !!on;
-	ack = VIRTIO_NET_ERR;
+	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
+
+	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
+}
+
+static int
+vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
+{
+	struct sglist_seg segs[3];
+	struct sglist sg;
+	struct {
+		struct virtio_net_ctrl_hdr hdr;
+		uint8_t pad1;
+		uint8_t onoff;
+		uint8_t pad2;
+		uint8_t ack;
+	} s;
+	int error;
+
+	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
+	    ("%s: CTRL_RX feature not negotiated", __func__));
+
+	s.hdr.class = VIRTIO_NET_CTRL_RX;
+	s.hdr.cmd = cmd;
+	s.onoff = !!on;
+	s.ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, 3, segs);
-	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
-	error |= sglist_append(&sg, &onoff, sizeof(uint8_t));
-	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
+	error = 0;
+	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
+	error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
+	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 	KASSERT(error == 0 && sg.sg_nseg == 3,
-	    ("error adding Rx filter message to sglist"));
+	    ("%s: error %d adding Rx message to sglist", __func__, error));
 
-	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
+	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 
-	return (ack == VIRTIO_NET_OK ? 0 : EIO);
+	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static int
@@ -2321,6 +3136,48 @@ vtnet_set_allmulti(struct vtnet_softc *sc, int on)
 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
 }
 
+/*
+ * The device defaults to promiscuous mode for backwards compatibility.
+ * Turn it off at attach time if possible.
+ */
+static void
+vtnet_attach_disable_promisc(struct vtnet_softc *sc)
+{
+	struct ifnet *ifp;
+
+	ifp = sc->vtnet_ifp;
+
+	VTNET_CORE_LOCK(sc);
+	if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) {
+		ifp->if_flags |= IFF_PROMISC;
+	} else if (vtnet_set_promisc(sc, 0) != 0) {
+		ifp->if_flags |= IFF_PROMISC;
+		device_printf(sc->vtnet_dev,
+		    "cannot disable default promiscuous mode\n");
+	}
+	VTNET_CORE_UNLOCK(sc);
+}
+
+static void
+vtnet_rx_filter(struct vtnet_softc *sc)
+{
+	device_t dev;
+	struct ifnet *ifp;
+
+	dev = sc->vtnet_dev;
+	ifp = sc->vtnet_ifp;
+
+	VTNET_CORE_LOCK_ASSERT(sc);
+
+	if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
+		device_printf(dev, "cannot %s promiscuous mode\n",
+		    ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
+
+	if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
+		device_printf(dev, "cannot %s all-multicast mode\n",
+		    ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
+}
+
 static void
 vtnet_rx_filter_mac(struct vtnet_softc *sc)
 {
@@ -2340,19 +3197,23 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
 	mcnt = 0;
 	promisc = 0;
 	allmulti = 0;
-	error = 0;
 
-	VTNET_LOCK_ASSERT(sc);
+	VTNET_CORE_LOCK_ASSERT(sc);
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
-	    ("CTRL_RX feature not negotiated"));
+	    ("%s: CTRL_RX feature not negotiated", __func__));
 
 	/* Unicast MAC addresses: */
 	if_addr_rlock(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_LINK)
 			continue;
-		else if (ucnt == VTNET_MAX_MAC_ENTRIES)
+		else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
+		    sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
+			continue;
+		else if (ucnt == VTNET_MAX_MAC_ENTRIES) {
+			promisc = 1;
 			break;
+		}
 
 		bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
 		    &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN);
@@ -2360,10 +3221,8 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
 	}
 	if_addr_runlock(ifp);
 
-	if (ucnt >= VTNET_MAX_MAC_ENTRIES) {
-		promisc = 1;
+	if (promisc != 0) {
 		filter->vmf_unicast.nentries = 0;
-
 		if_printf(ifp, "more than %d MAC addresses assigned, "
 		    "falling back to promiscuous mode\n",
 		    VTNET_MAX_MAC_ENTRIES);
@@ -2375,8 +3234,10 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
-		else if (mcnt == VTNET_MAX_MAC_ENTRIES)
+		else if (mcnt == VTNET_MAX_MAC_ENTRIES) {
+			allmulti = 1;
 			break;
+		}
 
 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
 		    &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN);
@@ -2384,17 +3245,15 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
 	}
 	if_maddr_runlock(ifp);
 
-	if (mcnt >= VTNET_MAX_MAC_ENTRIES) {
-		allmulti = 1;
+	if (allmulti != 0) {
 		filter->vmf_multicast.nentries = 0;
-
 		if_printf(ifp, "more than %d multicast MAC addresses "
 		    "assigned, falling back to all-multicast mode\n",
 		    VTNET_MAX_MAC_ENTRIES);
 	} else
 		filter->vmf_multicast.nentries = mcnt;
 
-	if (promisc && allmulti)
+	if (promisc != 0 && allmulti != 0)
 		goto out;
 
 	hdr.class = VIRTIO_NET_CTRL_MAC;
@@ -2402,6 +3261,7 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
 	ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, 4, segs);
+	error = 0;
 	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
 	error |= sglist_append(&sg, &filter->vmf_unicast,
 	    sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
@@ -2409,7 +3269,7 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
 	    sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
 	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
 	KASSERT(error == 0 && sg.sg_nseg == 4,
-	    ("error adding MAC filtering message to sglist"));
+	    ("%s: error %d adding MAC filter msg to sglist", __func__, error));
 
 	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
 
@@ -2417,111 +3277,99 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
 		if_printf(ifp, "error setting host MAC filter table\n");
 
 out:
-	if (promisc)
-		if (vtnet_set_promisc(sc, 1) != 0)
-			if_printf(ifp, "cannot enable promiscuous mode\n");
-	if (allmulti)
-		if (vtnet_set_allmulti(sc, 1) != 0)
-			if_printf(ifp, "cannot enable all-multicast mode\n");
+	if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
+		if_printf(ifp, "cannot enable promiscuous mode\n");
+	if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
+		if_printf(ifp, "cannot enable all-multicast mode\n");
 }
 
 static int
 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
 {
-	struct virtio_net_ctrl_hdr hdr;
 	struct sglist_seg segs[3];
 	struct sglist sg;
-	uint8_t ack;
+	struct {
+		struct virtio_net_ctrl_hdr hdr;
+		uint8_t pad1;
+		uint16_t tag;
+		uint8_t pad2;
+		uint8_t ack;
+	} s;
 	int error;
 
-	hdr.class = VIRTIO_NET_CTRL_VLAN;
-	hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
-	ack = VIRTIO_NET_ERR;
-	error = 0;
+	s.hdr.class = VIRTIO_NET_CTRL_VLAN;
+	s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
+	s.tag = tag;
+	s.ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, 3, segs);
-	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
-	error |= sglist_append(&sg, &tag, sizeof(uint16_t));
-	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
+	error = 0;
+	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
+	error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
+	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 	KASSERT(error == 0 && sg.sg_nseg == 3,
-	    ("error adding VLAN control message to sglist"));
+	    ("%s: error %d adding VLAN message to sglist", __func__, error));
 
-	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
+	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 
-	return (ack == VIRTIO_NET_OK ? 0 : EIO);
+	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static void
 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
 {
-	device_t dev;
-	uint32_t w, mask;
+	uint32_t w;
 	uint16_t tag;
-	int i, nvlans, error;
+	int i, bit;
 
-	VTNET_LOCK_ASSERT(sc);
+	VTNET_CORE_LOCK_ASSERT(sc);
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
-	    ("VLAN_FILTER feature not negotiated"));
+	    ("%s: VLAN_FILTER feature not negotiated", __func__));
 
-	dev = sc->vtnet_dev;
-	nvlans = sc->vtnet_nvlans;
-	error = 0;
+	/* Enable the filter for each configured VLAN. */
+	for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
+		w = sc->vtnet_vlan_filter[i];
+
+		while ((bit = ffs(w) - 1) != -1) {
+			w &= ~(1 << bit);
+			tag = sizeof(w) * CHAR_BIT * i + bit;
 
-	/* Enable filtering for each configured VLAN. */
-	for (i = 0; i < VTNET_VLAN_SHADOW_SIZE && nvlans > 0; i++) {
-		w = sc->vtnet_vlan_shadow[i];
-		for (mask = 1, tag = i * 32; w != 0; mask <<= 1, tag++) {
-			if ((w & mask) != 0) {
-				w &= ~mask;
-				nvlans--;
-				if (vtnet_exec_vlan_filter(sc, 1, tag) != 0)
-					error++;
+			if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
+				device_printf(sc->vtnet_dev,
+				    "cannot enable VLAN %d filter\n", tag);
 			}
 		}
 	}
-
-	KASSERT(nvlans == 0, ("VLAN count incorrect"));
-	if (error)
-		device_printf(dev, "cannot restore VLAN filter table\n");
 }
 
 static void
-vtnet_set_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
+vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
 {
 	struct ifnet *ifp;
 	int idx, bit;
 
-	KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
-	    ("VLAN_FILTER feature not negotiated"));
-
-	if ((tag == 0) || (tag > 4095))
-		return;
-
 	ifp = sc->vtnet_ifp;
 	idx = (tag >> 5) & 0x7F;
 	bit = tag & 0x1F;
 
-	VTNET_LOCK(sc);
+	if (tag == 0 || tag > 4095)
+		return;
+
+	VTNET_CORE_LOCK(sc);
 
-	/* Update shadow VLAN table. */
-	if (add) {
-		sc->vtnet_nvlans++;
-		sc->vtnet_vlan_shadow[idx] |= (1 << bit);
-	} else {
-		sc->vtnet_nvlans--;
-		sc->vtnet_vlan_shadow[idx] &= ~(1 << bit);
-	}
+	if (add)
+		sc->vtnet_vlan_filter[idx] |= (1 << bit);
+	else
+		sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
 
-	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
-		if (vtnet_exec_vlan_filter(sc, add, tag) != 0) {
-			device_printf(sc->vtnet_dev,
-			    "cannot %s VLAN %d %s the host filter table\n",
-			    add ? "add" : "remove", tag,
-			    add ? "to" : "from");
-		}
+	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
+	    vtnet_exec_vlan_filter(sc, add, tag) != 0) {
+		device_printf(sc->vtnet_dev,
+		    "cannot %s VLAN %d %s the host filter table\n",
+		    add ? "add" : "remove", tag, add ? "to" : "from");
 	}
 
-	VTNET_UNLOCK(sc);
+	VTNET_CORE_UNLOCK(sc);
 }
 
 static void
@@ -2531,7 +3379,7 @@ vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
 	if (ifp->if_softc != arg)
 		return;
 
-	vtnet_set_vlan_filter(arg, 1, tag);
+	vtnet_update_vlan_filter(arg, 1, tag);
 }
 
 static void
@@ -2541,7 +3389,47 @@ vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
 	if (ifp->if_softc != arg)
 		return;
 
-	vtnet_set_vlan_filter(arg, 0, tag);
+	vtnet_update_vlan_filter(arg, 0, tag);
+}
+
+static int
+vtnet_is_link_up(struct vtnet_softc *sc)
+{
+	device_t dev;
+	struct ifnet *ifp;
+	uint16_t status;
+
+	dev = sc->vtnet_dev;
+	ifp = sc->vtnet_ifp;
+
+	if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0)
+		status = VIRTIO_NET_S_LINK_UP;
+	else
+		status = virtio_read_dev_config_2(dev,
+		    offsetof(struct virtio_net_config, status));
+
+	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
+}
+
+static void
+vtnet_update_link_status(struct vtnet_softc *sc)
+{
+	struct ifnet *ifp;
+	int link;
+
+	ifp = sc->vtnet_ifp;
+
+	VTNET_CORE_LOCK_ASSERT(sc);
+	link = vtnet_is_link_up(sc);
+
+	/* Notify if the link status has changed. */
+	if (link != 0 && sc->vtnet_link_active == 0) {
+		sc->vtnet_link_active = 1;
+		if_link_state_change(ifp, LINK_STATE_UP);
+	} else if (link == 0 && sc->vtnet_link_active != 0) {
+		sc->vtnet_link_active = 0;
+		if_link_state_change(ifp, LINK_STATE_DOWN);
+	}
 }
 
 static int
@@ -2569,112 +3457,334 @@ vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
-	VTNET_LOCK(sc);
+	VTNET_CORE_LOCK(sc);
 	if (vtnet_is_link_up(sc) != 0) {
 		ifmr->ifm_status |= IFM_ACTIVE;
 		ifmr->ifm_active |= VTNET_MEDIATYPE;
 	} else
 		ifmr->ifm_active |= IFM_NONE;
-	VTNET_UNLOCK(sc);
+	VTNET_CORE_UNLOCK(sc);
 }
 
 static void
-vtnet_add_statistics(struct vtnet_softc *sc)
+vtnet_set_hwaddr(struct vtnet_softc *sc)
 {
 	device_t dev;
-	struct vtnet_statistics *stats;
-        struct sysctl_ctx_list *ctx;
+
+	dev = sc->vtnet_dev;
+
+	if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
+		if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
+			device_printf(dev, "unable to set MAC address\n");
+	} else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
+		virtio_write_device_config(dev,
+		    offsetof(struct virtio_net_config, mac),
+		    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
+	}
+}
+
+static void
+vtnet_get_hwaddr(struct vtnet_softc *sc)
+{
+	device_t dev;
+
+	dev = sc->vtnet_dev;
+
+	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
+		/*
+		 * Generate a random locally administered unicast address.
+		 *
+		 * It would be nice to generate the same MAC address across
+		 * reboots, but it seems all the hosts currently available
+		 * support the MAC feature, so this isn't too important.
+		 */
+		sc->vtnet_hwaddr[0] = 0xB2;
+		arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
+		vtnet_set_hwaddr(sc);
+		return;
+	}
+
+	virtio_read_device_config(dev, offsetof(struct virtio_net_config, mac),
+	    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
+}
+
+static void
+vtnet_vlan_tag_remove(struct mbuf *m)
+{
+	struct ether_vlan_header *evh;
+
+	evh = mtod(m, struct ether_vlan_header *);
+	m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
+	m->m_flags |= M_VLANTAG;
+
+	/* Strip the 802.1Q header. */
+	bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
+	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
+	m_adj(m, ETHER_VLAN_ENCAP_LEN);
+}
+
+static void
+vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
+    struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
+{
+	struct sysctl_oid *node;
+	struct sysctl_oid_list *list;
+	struct vtnet_rxq_stats *stats;
+	char namebuf[16];
+
+	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
+	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
+	    CTLFLAG_RD, NULL, "Receive Queue");
+	list = SYSCTL_CHILDREN(node);
+
+	stats = &rxq->vtnrx_stats;
+
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
+	    &stats->vrxs_ipackets, "Receive packets");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
+	    &stats->vrxs_ibytes, "Receive bytes");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
+	    &stats->vrxs_iqdrops, "Receive drops");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
+	    &stats->vrxs_ierrors, "Receive errors");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
+	    &stats->vrxs_csum, "Receive checksum offloaded");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
+	    &stats->vrxs_csum_failed, "Receive checksum offload failed");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
+	    &stats->vrxs_rescheduled,
+	    "Receive interrupt handler rescheduled");
+}
+
+static void
+vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
+    struct sysctl_oid_list *child, struct vtnet_txq *txq)
+{
+	struct sysctl_oid *node;
+	struct sysctl_oid_list *list;
+	struct vtnet_txq_stats *stats;
+	char namebuf[16];
+
+	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
+	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
+	    CTLFLAG_RD, NULL, "Transmit Queue");
+	list = SYSCTL_CHILDREN(node);
+
+	stats = &txq->vtntx_stats;
+
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
+	    &stats->vtxs_opackets, "Transmit packets");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
+	    &stats->vtxs_obytes, "Transmit bytes");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
+	    &stats->vtxs_omcasts, "Transmit multicasts");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
+	    &stats->vtxs_csum, "Transmit checksum offloaded");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
+	    &stats->vtxs_tso, "Transmit segmentation offloaded");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "collapsed", CTLFLAG_RD,
+	    &stats->vtxs_collapsed, "Transmit mbufs collapsed");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
+	    &stats->vtxs_rescheduled,
+	    "Transmit interrupt handler rescheduled");
+}
+
+static void
+vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
+{
+	device_t dev;
+	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *tree;
 	struct sysctl_oid_list *child;
+	int i;
 
 	dev = sc->vtnet_dev;
-	stats = &sc->vtnet_stats;
 	ctx = device_get_sysctl_ctx(dev);
 	tree = device_get_sysctl_tree(dev);
 	child = SYSCTL_CHILDREN(tree);
 
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_failed",
+	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+		vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
+		vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
+	}
+}
+
+static void
+vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
+    struct sysctl_oid_list *child, struct vtnet_softc *sc)
+{
+	struct vtnet_statistics *stats;
+
+	stats = &sc->vtnet_stats;
+
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
 	    CTLFLAG_RD, &stats->mbuf_alloc_failed,
 	    "Mbuf cluster allocation failures");
 
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_frame_too_large",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
 	    CTLFLAG_RD, &stats->rx_frame_too_large,
 	    "Received frame larger than the mbuf chain");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
 	    CTLFLAG_RD, &stats->rx_enq_replacement_failed,
 	    "Enqueuing the replacement receive mbuf failed");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_mergeable_failed",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
 	    CTLFLAG_RD, &stats->rx_mergeable_failed,
 	    "Mergeable buffers receive failures");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
 	    CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
 	    "Received checksum offloaded buffer with unsupported "
 	    "Ethernet type");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_start",
-	    CTLFLAG_RD, &stats->rx_csum_bad_start,
-	    "Received checksum offloaded buffer with incorrect start offset");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
 	    CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
 	    "Received checksum offloaded buffer with incorrect IP protocol");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_offset",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
 	    CTLFLAG_RD, &stats->rx_csum_bad_offset,
 	    "Received checksum offloaded buffer with incorrect offset");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_failed",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
+	    CTLFLAG_RD, &stats->rx_csum_bad_proto,
+	    "Received checksum offloaded buffer with incorrect protocol");
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
 	    CTLFLAG_RD, &stats->rx_csum_failed,
 	    "Received buffer checksum offload failed");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_offloaded",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
 	    CTLFLAG_RD, &stats->rx_csum_offloaded,
 	    "Received buffer checksum offload succeeded");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_task_rescheduled",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
 	    CTLFLAG_RD, &stats->rx_task_rescheduled,
 	    "Times the receive interrupt task rescheduled itself");
 
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_csum_offloaded",
-	    CTLFLAG_RD, &stats->tx_csum_offloaded,
-	    "Offloaded checksum of transmitted buffer");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_tso_offloaded",
-	    CTLFLAG_RD, &stats->tx_tso_offloaded,
-	    "Segmentation offload of transmitted buffer");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
 	    CTLFLAG_RD, &stats->tx_csum_bad_ethtype,
 	    "Aborted transmit of checksum offloaded buffer with unknown "
 	    "Ethernet type");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
 	    CTLFLAG_RD, &stats->tx_tso_bad_ethtype,
 	    "Aborted transmit of TSO buffer with unknown Ethernet type");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_task_rescheduled",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
+	    CTLFLAG_RD, &stats->tx_tso_not_tcp,
+	    "Aborted transmit of TSO buffer with non TCP protocol");
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
+	    CTLFLAG_RD, &stats->tx_csum_offloaded,
+	    "Offloaded checksum of transmitted buffer");
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
+	    CTLFLAG_RD, &stats->tx_tso_offloaded,
+	    "Segmentation offload of transmitted buffer");
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
 	    CTLFLAG_RD, &stats->tx_task_rescheduled,
 	    "Times the transmit interrupt task rescheduled itself");
 }
 
+static void
+vtnet_setup_sysctl(struct vtnet_softc *sc)
+{
+	device_t dev;
+	struct sysctl_ctx_list *ctx;
+	struct sysctl_oid *tree;
+	struct sysctl_oid_list *child;
+
+	dev = sc->vtnet_dev;
+	ctx = device_get_sysctl_ctx(dev);
+	tree = device_get_sysctl_tree(dev);
+	child = SYSCTL_CHILDREN(tree);
+
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
+	    CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
+	    "Maximum number of supported virtqueue pairs");
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
+	    CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
+	    "Number of active virtqueue pairs");
+
+	vtnet_setup_stat_sysctl(ctx, child, sc);
+}
+
 static int
-vtnet_enable_rx_intr(struct vtnet_softc *sc)
+vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
 {
 
-	return (virtqueue_enable_intr(sc->vtnet_rx_vq));
+	return (virtqueue_enable_intr(rxq->vtnrx_vq));
 }
 
 static void
-vtnet_disable_rx_intr(struct vtnet_softc *sc)
+vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
 {
 
-	virtqueue_disable_intr(sc->vtnet_rx_vq);
+	virtqueue_disable_intr(rxq->vtnrx_vq);
 }
 
 static int
-vtnet_enable_tx_intr(struct vtnet_softc *sc)
+vtnet_txq_enable_intr(struct vtnet_txq *txq)
 {
 
-#ifdef VTNET_TX_INTR_MODERATION
-	return (0);
-#else
-	return (virtqueue_enable_intr(sc->vtnet_tx_vq));
-#endif
+	return (virtqueue_postpone_intr(txq->vtntx_vq, VQ_POSTPONE_LONG));
+}
+
+static void
+vtnet_txq_disable_intr(struct vtnet_txq *txq)
+{
+
+	virtqueue_disable_intr(txq->vtntx_vq);
+}
+
+static void
+vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
+{
+	int i;
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
+		vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]);
 }
 
 static void
-vtnet_disable_tx_intr(struct vtnet_softc *sc)
+vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
 {
+	int i;
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
+		vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
+}
+
+static void
+vtnet_enable_interrupts(struct vtnet_softc *sc)
+{
+
+	vtnet_enable_rx_interrupts(sc);
+	vtnet_enable_tx_interrupts(sc);
+}
+
+static void
+vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
+{
+	int i;
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
+		vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
+}
+
+static void
+vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
+{
+	int i;
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
+		vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
+}
+
+static void
+vtnet_disable_interrupts(struct vtnet_softc *sc)
+{
+
+	vtnet_disable_rx_interrupts(sc);
+	vtnet_disable_tx_interrupts(sc);
+}
+
+static int
+vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
+{
+	char path[64];
+
+	snprintf(path, sizeof(path),
+	    "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
+	TUNABLE_INT_FETCH(path, &def);
 
-	virtqueue_disable_intr(sc->vtnet_tx_vq);
+	return (def);
 }
diff --git a/sys/dev/virtio/network/if_vtnetvar.h b/sys/dev/virtio/network/if_vtnetvar.h
index d870436..5921103 100644
--- a/sys/dev/virtio/network/if_vtnetvar.h
+++ b/sys/dev/virtio/network/if_vtnetvar.h
@@ -29,83 +29,165 @@
 #ifndef _IF_VTNETVAR_H
 #define _IF_VTNETVAR_H
 
+struct vtnet_softc;
+
 struct vtnet_statistics {
-	unsigned long	mbuf_alloc_failed;
-
-	unsigned long	rx_frame_too_large;
-	unsigned long	rx_enq_replacement_failed;
-	unsigned long	rx_mergeable_failed;
-	unsigned long	rx_csum_bad_ethtype;
-	unsigned long	rx_csum_bad_start;
-	unsigned long	rx_csum_bad_ipproto;
-	unsigned long	rx_csum_bad_offset;
-	unsigned long	rx_csum_failed;
-	unsigned long	rx_csum_offloaded;
-	unsigned long	rx_task_rescheduled;
-
-	unsigned long	tx_csum_offloaded;
-	unsigned long	tx_tso_offloaded;
-	unsigned long	tx_csum_bad_ethtype;
-	unsigned long	tx_tso_bad_ethtype;
-	unsigned long	tx_task_rescheduled;
+	uint64_t	mbuf_alloc_failed;
+
+	uint64_t	rx_frame_too_large;
+	uint64_t	rx_enq_replacement_failed;
+	uint64_t	rx_mergeable_failed;
+	uint64_t	rx_csum_bad_ethtype;
+	uint64_t	rx_csum_bad_ipproto;
+	uint64_t	rx_csum_bad_offset;
+	uint64_t	rx_csum_bad_proto;
+	uint64_t	tx_csum_bad_ethtype;
+	uint64_t	tx_tso_bad_ethtype;
+	uint64_t	tx_tso_not_tcp;
+
+	/*
+	 * These are accumulated from each Rx/Tx queue.
+	 */
+	uint64_t	rx_csum_failed;
+	uint64_t	rx_csum_offloaded;
+	uint64_t	rx_task_rescheduled;
+	uint64_t	tx_csum_offloaded;
+	uint64_t	tx_tso_offloaded;
+	uint64_t	tx_task_rescheduled;
+};
+
+struct vtnet_rxq_stats {
+	uint64_t	vrxs_ipackets;	/* if_ipackets */
+	uint64_t	vrxs_ibytes;	/* if_ibytes */
+	uint64_t	vrxs_iqdrops;	/* if_iqdrops */
+	uint64_t	vrxs_ierrors;	/* if_ierrors */
+	uint64_t	vrxs_csum;
+	uint64_t	vrxs_csum_failed;
+	uint64_t	vrxs_rescheduled;
 };
 
+struct vtnet_rxq {
+	struct mtx		 vtnrx_mtx;
+	struct vtnet_softc	*vtnrx_sc;
+	struct virtqueue	*vtnrx_vq;
+	int			 vtnrx_id;
+	int			 vtnrx_process_limit;
+	struct vtnet_rxq_stats	 vtnrx_stats;
+	struct taskqueue	*vtnrx_tq;
+	struct task		 vtnrx_intrtask;
+	char			 vtnrx_name[16];
+} __aligned(CACHE_LINE_SIZE);
+
+#define VTNET_RXQ_LOCK(_rxq)	mtx_lock(&(_rxq)->vtnrx_mtx)
+#define VTNET_RXQ_UNLOCK(_rxq)	mtx_unlock(&(_rxq)->vtnrx_mtx)
+#define VTNET_RXQ_LOCK_ASSERT(_rxq)		\
+    mtx_assert(&(_rxq)->vtnrx_mtx, MA_OWNED)
+#define VTNET_RXQ_LOCK_ASSERT_NOTOWNED(_rxq)	\
+    mtx_assert(&(_rxq)->vtnrx_mtx, MA_NOTOWNED)
+
+struct vtnet_txq_stats {
+	uint64_t vtxs_opackets;	/* if_opackets */
+	uint64_t vtxs_obytes;	/* if_obytes */
+	uint64_t vtxs_omcasts;	/* if_omcasts */
+	uint64_t vtxs_csum;
+	uint64_t vtxs_tso;
+	uint64_t vtxs_collapsed;
+	uint64_t vtxs_rescheduled;
+};
+
+struct vtnet_txq {
+	struct mtx		 vtntx_mtx;
+	struct vtnet_softc	*vtntx_sc;
+	struct virtqueue	*vtntx_vq;
+#ifndef VTNET_LEGACY_TX
+	struct buf_ring		*vtntx_br;
+#endif
+	int			 vtntx_id;
+	int			 vtntx_watchdog;
+	struct vtnet_txq_stats	 vtntx_stats;
+	struct taskqueue	*vtntx_tq;
+	struct task		 vtntx_intrtask;
+#ifndef VTNET_LEGACY_TX
+	struct task		 vtntx_defrtask;
+#endif
+	char			 vtntx_name[16];
+} __aligned(CACHE_LINE_SIZE);
+
+#define VTNET_TXQ_LOCK(_txq)	mtx_lock(&(_txq)->vtntx_mtx)
+#define VTNET_TXQ_TRYLOCK(_txq)	mtx_trylock(&(_txq)->vtntx_mtx)
+#define VTNET_TXQ_UNLOCK(_txq)	mtx_unlock(&(_txq)->vtntx_mtx)
+#define VTNET_TXQ_LOCK_ASSERT(_txq)		\
+    mtx_assert(&(_txq)->vtntx_mtx, MA_OWNED)
+#define VTNET_TXQ_LOCK_ASSERT_NOTOWNED(_txq)	\
+    mtx_assert(&(_txq)->vtntx_mtx, MA_NOTOWNED)
+
 struct vtnet_softc {
 	device_t		 vtnet_dev;
 	struct ifnet		*vtnet_ifp;
-	struct mtx		 vtnet_mtx;
+	struct vtnet_rxq	*vtnet_rxqs;
+	struct vtnet_txq	*vtnet_txqs;
 
 	uint32_t		 vtnet_flags;
-#define VTNET_FLAG_LINK		 0x0001
-#define VTNET_FLAG_SUSPENDED	 0x0002
+#define VTNET_FLAG_SUSPENDED	 0x0001
+#define VTNET_FLAG_MAC		 0x0002
 #define VTNET_FLAG_CTRL_VQ	 0x0004
 #define VTNET_FLAG_CTRL_RX	 0x0008
-#define VTNET_FLAG_VLAN_FILTER	 0x0010
-#define VTNET_FLAG_TSO_ECN	 0x0020
-#define VTNET_FLAG_MRG_RXBUFS	 0x0040
-#define VTNET_FLAG_LRO_NOMRG	 0x0080
-
-	struct virtqueue	*vtnet_rx_vq;
-	struct virtqueue	*vtnet_tx_vq;
-	struct virtqueue	*vtnet_ctrl_vq;
+#define VTNET_FLAG_CTRL_MAC	 0x0010
+#define VTNET_FLAG_VLAN_FILTER	 0x0020
+#define VTNET_FLAG_TSO_ECN	 0x0040
+#define VTNET_FLAG_MRG_RXBUFS	 0x0080
+#define VTNET_FLAG_LRO_NOMRG	 0x0100
+#define VTNET_FLAG_MULTIQ	 0x0200
 
+	int			 vtnet_link_active;
 	int			 vtnet_hdr_size;
-	int			 vtnet_tx_size;
-	int			 vtnet_rx_size;
 	int			 vtnet_rx_process_limit;
-	int			 vtnet_rx_mbuf_size;
-	int			 vtnet_rx_mbuf_count;
+	int			 vtnet_rx_nmbufs;
+	int			 vtnet_rx_clsize;
+	int			 vtnet_rx_new_clsize;
 	int			 vtnet_if_flags;
-	int			 vtnet_watchdog_timer;
-	uint64_t		 vtnet_features;
+	int			 vtnet_act_vq_pairs;
+	int			 vtnet_max_vq_pairs;
 
-	struct vtnet_statistics	 vtnet_stats;
+	struct virtqueue	*vtnet_ctrl_vq;
+	struct vtnet_mac_filter	*vtnet_mac_filter;
+	uint32_t		*vtnet_vlan_filter;
 
+	uint64_t		 vtnet_features;
+	struct vtnet_statistics	 vtnet_stats;
 	struct callout		 vtnet_tick_ch;
-
+	struct ifmedia		 vtnet_media;
 	eventhandler_tag	 vtnet_vlan_attach;
 	eventhandler_tag	 vtnet_vlan_detach;
 
-	struct ifmedia		 vtnet_media;
-	/*
-	 * Fake media type; the host does not provide us with
-	 * any real media information.
-	 */
-#define VTNET_MEDIATYPE		 (IFM_ETHER | IFM_1000_T | IFM_FDX)
+	struct mtx		 vtnet_mtx;
+	char			 vtnet_mtx_name[16];
 	char			 vtnet_hwaddr[ETHER_ADDR_LEN];
+};
 
-	struct vtnet_mac_filter	*vtnet_mac_filter;
-	/*
-	 * During reset, the host's VLAN filtering table is lost. The
-	 * array below is used to restore all the VLANs configured on
-	 * this interface after a reset.
-	 */
-#define VTNET_VLAN_SHADOW_SIZE	 (4096 / 32)
-	int			 vtnet_nvlans;
-	uint32_t		 vtnet_vlan_shadow[VTNET_VLAN_SHADOW_SIZE];
+/*
+ * Maximum number of queue pairs we will autoconfigure to.
+ */
+#define VTNET_MAX_QUEUE_PAIRS	8
 
-	char			 vtnet_mtx_name[16];
-};
+/*
+ * Additional completed entries can appear in a virtqueue before we can
+ * reenable interrupts. Number of times to retry before scheduling the
+ * taskqueue to process the completed entries.
+ */
+#define VTNET_INTR_DISABLE_RETRIES	4
+
+/*
+ * Fake the media type. The host does not provide us with any real media
+ * information.
+ */
+#define VTNET_MEDIATYPE		 (IFM_ETHER | IFM_10G_T | IFM_FDX)
+
+/*
+ * Number of words to allocate for the VLAN shadow table. There is one
+ * bit for each VLAN.
+ */
+#define VTNET_VLAN_FILTER_NWORDS	(4096 / 32)
 
 /*
  * When mergeable buffers are not negotiated, the vtnet_rx_header structure
@@ -161,8 +243,12 @@ struct vtnet_mac_filter {
  */
 CTASSERT(sizeof(struct vtnet_mac_filter) <= PAGE_SIZE);
 
-#define VTNET_WATCHDOG_TIMEOUT	5
+#define VTNET_TX_TIMEOUT	5
 #define VTNET_CSUM_OFFLOAD	(CSUM_TCP | CSUM_UDP | CSUM_SCTP)
+#define VTNET_CSUM_OFFLOAD_IPV6	(CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6)
+
+#define VTNET_CSUM_ALL_OFFLOAD	\
+    (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6 | CSUM_TSO)
 
 /* Features desired/implemented by this driver. */
 #define VTNET_FEATURES \
@@ -170,8 +256,10 @@ CTASSERT(sizeof(struct vtnet_mac_filter) <= PAGE_SIZE);
      VIRTIO_NET_F_STATUS		| \
      VIRTIO_NET_F_CTRL_VQ		| \
      VIRTIO_NET_F_CTRL_RX		| \
+     VIRTIO_NET_F_CTRL_MAC_ADDR		| \
      VIRTIO_NET_F_CTRL_VLAN		| \
      VIRTIO_NET_F_CSUM			| \
+     VIRTIO_NET_F_GSO			| \
      VIRTIO_NET_F_HOST_TSO4		| \
      VIRTIO_NET_F_HOST_TSO6		| \
      VIRTIO_NET_F_HOST_ECN		| \
@@ -180,9 +268,18 @@ CTASSERT(sizeof(struct vtnet_mac_filter) <= PAGE_SIZE);
      VIRTIO_NET_F_GUEST_TSO6		| \
      VIRTIO_NET_F_GUEST_ECN		| \
      VIRTIO_NET_F_MRG_RXBUF		| \
+     VIRTIO_NET_F_MQ			| \
+     VIRTIO_RING_F_EVENT_IDX		| \
      VIRTIO_RING_F_INDIRECT_DESC)
 
 /*
+ * The VIRTIO_NET_F_HOST_TSO[46] features permit us to send the host
+ * frames larger than 1514 bytes.
+ */
+#define VTNET_TSO_FEATURES (VIRTIO_NET_F_GSO | VIRTIO_NET_F_HOST_TSO4 | \
+    VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_ECN)
+
+/*
  * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
  * frames larger than 1514 bytes. We do not yet support software LRO
  * via tcp_lro_rx().
@@ -209,27 +306,34 @@ CTASSERT(((VTNET_MAX_RX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_RX_SIZE);
 CTASSERT(((VTNET_MAX_TX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_MTU);
 
 /*
+ * Number of slots in the Tx bufrings. This value matches most other
+ * multiqueue drivers.
+ */
+#define VTNET_DEFAULT_BUFRING_SIZE	4096
+
+/*
  * Determine how many mbufs are in each receive buffer. For LRO without
  * mergeable descriptors, we must allocate an mbuf chain large enough to
  * hold both the vtnet_rx_header and the maximum receivable data.
  */
-#define VTNET_NEEDED_RX_MBUFS(_sc)					\
+#define VTNET_NEEDED_RX_MBUFS(_sc, _clsize)				\
 	((_sc)->vtnet_flags & VTNET_FLAG_LRO_NOMRG) == 0 ? 1 :		\
 	    howmany(sizeof(struct vtnet_rx_header) + VTNET_MAX_RX_SIZE,	\
-	        (_sc)->vtnet_rx_mbuf_size)
+	        (_clsize))
 
-#define VTNET_MTX(_sc)		&(_sc)->vtnet_mtx
-#define VTNET_LOCK(_sc)		mtx_lock(VTNET_MTX((_sc)))
-#define VTNET_UNLOCK(_sc)	mtx_unlock(VTNET_MTX((_sc)))
-#define VTNET_LOCK_DESTROY(_sc)	mtx_destroy(VTNET_MTX((_sc)))
-#define VTNET_LOCK_ASSERT(_sc)	mtx_assert(VTNET_MTX((_sc)), MA_OWNED)
-#define VTNET_LOCK_ASSERT_NOTOWNED(_sc)	\
-	 			mtx_assert(VTNET_MTX((_sc)), MA_NOTOWNED)
+#define VTNET_CORE_MTX(_sc)		&(_sc)->vtnet_mtx
+#define VTNET_CORE_LOCK(_sc)		mtx_lock(VTNET_CORE_MTX((_sc)))
+#define VTNET_CORE_UNLOCK(_sc)		mtx_unlock(VTNET_CORE_MTX((_sc)))
+#define VTNET_CORE_LOCK_DESTROY(_sc)	mtx_destroy(VTNET_CORE_MTX((_sc)))
+#define VTNET_CORE_LOCK_ASSERT(_sc)		\
+    mtx_assert(VTNET_CORE_MTX((_sc)), MA_OWNED)
+#define VTNET_CORE_LOCK_ASSERT_NOTOWNED(_sc)	\
+    mtx_assert(VTNET_CORE_MTX((_sc)), MA_NOTOWNED)
 
-#define VTNET_LOCK_INIT(_sc) do {					\
+#define VTNET_CORE_LOCK_INIT(_sc) do {					\
     snprintf((_sc)->vtnet_mtx_name, sizeof((_sc)->vtnet_mtx_name),	\
         "%s", device_get_nameunit((_sc)->vtnet_dev));			\
-    mtx_init(VTNET_MTX((_sc)), (_sc)->vtnet_mtx_name,			\
+    mtx_init(VTNET_CORE_MTX((_sc)), (_sc)->vtnet_mtx_name,		\
         "VTNET Core Lock", MTX_DEF);					\
 } while (0)
 
diff --git a/sys/modules/virtio/network/Makefile b/sys/modules/virtio/network/Makefile
index 8463309c..f124d99 100644
--- a/sys/modules/virtio/network/Makefile
+++ b/sys/modules/virtio/network/Makefile
@@ -23,14 +23,29 @@
 # SUCH DAMAGE.
 #
 
+.include <bsd.own.mk>
+
 .PATH: ${.CURDIR}/../../../dev/virtio/network
 
 KMOD=	if_vtnet
 SRCS=	if_vtnet.c
 SRCS+=	virtio_bus_if.h virtio_if.h
 SRCS+=	bus_if.h device_if.h 
+SRCS+=	opt_inet.h opt_inet6.h
 
 MFILES=	kern/bus_if.m kern/device_if.m \
 	dev/virtio/virtio_bus_if.m dev/virtio/virtio_if.m
 
+.if !defined(KERNBUILDDIR)
+.if ${MK_INET_SUPPORT} != "no"
+opt_inet.h:
+	@echo "#define INET 1" > ${.TARGET}
+.endif
+
+.if ${MK_INET6_SUPPORT} != "no"
+opt_inet6.h:
+	@echo "#define INET6 1" > ${.TARGET}
+.endif
+.endif
+
 .include <bsd.kmod.mk>
author	bryanv <bryanv@FreeBSD.org>	2013-09-01 04:33:47 +0000
committer	bryanv <bryanv@FreeBSD.org>	2013-09-01 04:33:47 +0000
commit	c4011595924e79ff02f7f896f09c0b57011ce00d (patch)
tree	3e1b266e931116c1d854704fde099d838a45bb58
parent	4174a823010710068b29ea7f013486fc98f3ffbe (diff)
download	FreeBSD-src-c4011595924e79ff02f7f896f09c0b57011ce00d.zip FreeBSD-src-c4011595924e79ff02f7f896f09c0b57011ce00d.tar.gz