summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbryanv <bryanv@FreeBSD.org>2013-09-01 04:33:47 +0000
committerbryanv <bryanv@FreeBSD.org>2013-09-01 04:33:47 +0000
commitc4011595924e79ff02f7f896f09c0b57011ce00d (patch)
tree3e1b266e931116c1d854704fde099d838a45bb58
parent4174a823010710068b29ea7f013486fc98f3ffbe (diff)
downloadFreeBSD-src-c4011595924e79ff02f7f896f09c0b57011ce00d.zip
FreeBSD-src-c4011595924e79ff02f7f896f09c0b57011ce00d.tar.gz
Import multiqueue VirtIO net driver from my user/bryanv/vtnetmq branch
This is a significant rewrite of much of the previous driver; lots of misc. cleanup was also performed, and support for a few other minor features was also added.
-rw-r--r--share/man/man4/vtnet.416
-rw-r--r--sys/dev/virtio/network/if_vtnet.c3630
-rw-r--r--sys/dev/virtio/network/if_vtnetvar.h236
-rw-r--r--sys/modules/virtio/network/Makefile15
4 files changed, 2571 insertions, 1326 deletions
diff --git a/share/man/man4/vtnet.4 b/share/man/man4/vtnet.4
index 8d4d202..c7b2189 100644
--- a/share/man/man4/vtnet.4
+++ b/share/man/man4/vtnet.4
@@ -69,14 +69,30 @@ prompt before booting the kernel or stored in
.Xr loader.conf 5 .
.Bl -tag -width "xxxxxx"
.It Va hw.vtnet.csum_disable
+.It Va hw.vtnet. Ns Ar X Ns Va .csum_disable
This tunable disables receive and send checksum offload.
The default value is 0.
.It Va hw.vtnet.tso_disable
+.It Va hw.vtnet. Ns Ar X Ns Va .tso_disable
This tunable disables TSO.
The default value is 0.
.It Va hw.vtnet.lro_disable
+.It Va hw.vtnet. Ns Ar X Ns Va .lro_disable
This tunable disables LRO.
The default value is 0.
+.It Va hw.vtnet.mq_disable
+.It Va hw.vtnet. Ns Ar X Ns Va .mq_disable
+This tunable disables multiqueue.
+The default value is 0.
+.It Va hw.vtnet.mq_max_pairs
+.It Va hw.vtnet. Ns Ar X Ns Va .mq_max_pairs
+This tunable sets the maximum number of transmit and receive queue pairs.
+Multiple queues are only supported when the Multiqueue feature is negotiated.
+This driver supports a maximum of 8 queue pairs.
+The number of queue pairs used is the lesser of the maximum supported by the
+driver and the hypervisor, the number of CPUs present in the guest, and this
+tunable if not zero.
+The default value is 0.
.El
.Sh SEE ALSO
.Xr arp 4 ,
diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c
index 89604d1..f757394 100644
--- a/sys/dev/virtio/network/if_vtnet.c
+++ b/sys/dev/virtio/network/if_vtnet.c
@@ -29,10 +29,6 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#ifdef HAVE_KERNEL_OPTION_HEADERS
-#include "opt_device_polling.h"
-#endif
-
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@@ -46,6 +42,9 @@ __FBSDID("$FreeBSD$");
#include <sys/sglist.h>
#include <sys/lock.h>
#include <sys/mutex.h>
+#include <sys/taskqueue.h>
+#include <sys/smp.h>
+#include <machine/smp.h>
#include <vm/uma.h>
@@ -63,6 +62,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
#include <netinet/udp.h>
#include <netinet/tcp.h>
#include <netinet/sctp.h>
@@ -79,6 +79,9 @@ __FBSDID("$FreeBSD$");
#include "virtio_if.h"
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
static int vtnet_modevent(module_t, int, void *);
static int vtnet_probe(device_t);
@@ -87,82 +90,139 @@ static int vtnet_detach(device_t);
static int vtnet_suspend(device_t);
static int vtnet_resume(device_t);
static int vtnet_shutdown(device_t);
+static int vtnet_attach_completed(device_t);
static int vtnet_config_change(device_t);
static void vtnet_negotiate_features(struct vtnet_softc *);
+static void vtnet_setup_features(struct vtnet_softc *);
+static int vtnet_init_rxq(struct vtnet_softc *, int);
+static int vtnet_init_txq(struct vtnet_softc *, int);
+static int vtnet_alloc_rxtx_queues(struct vtnet_softc *);
+static void vtnet_free_rxtx_queues(struct vtnet_softc *);
+static int vtnet_alloc_rx_filters(struct vtnet_softc *);
+static void vtnet_free_rx_filters(struct vtnet_softc *);
static int vtnet_alloc_virtqueues(struct vtnet_softc *);
-static void vtnet_get_hwaddr(struct vtnet_softc *);
-static void vtnet_set_hwaddr(struct vtnet_softc *);
-static int vtnet_is_link_up(struct vtnet_softc *);
-static void vtnet_update_link_status(struct vtnet_softc *);
-static void vtnet_watchdog(struct vtnet_softc *);
+static int vtnet_setup_interface(struct vtnet_softc *);
static int vtnet_change_mtu(struct vtnet_softc *, int);
static int vtnet_ioctl(struct ifnet *, u_long, caddr_t);
-static int vtnet_init_rx_vq(struct vtnet_softc *);
-static void vtnet_free_rx_mbufs(struct vtnet_softc *);
-static void vtnet_free_tx_mbufs(struct vtnet_softc *);
-static void vtnet_free_ctrl_vq(struct vtnet_softc *);
-
-#ifdef DEVICE_POLLING
-static poll_handler_t vtnet_poll;
-#endif
-
-static struct mbuf * vtnet_alloc_rxbuf(struct vtnet_softc *, int,
- struct mbuf **);
-static int vtnet_replace_rxbuf(struct vtnet_softc *,
+static int vtnet_rxq_populate(struct vtnet_rxq *);
+static void vtnet_rxq_free_mbufs(struct vtnet_rxq *);
+static struct mbuf *
+ vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
+static int vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *,
struct mbuf *, int);
-static int vtnet_newbuf(struct vtnet_softc *);
-static void vtnet_discard_merged_rxbuf(struct vtnet_softc *, int);
-static void vtnet_discard_rxbuf(struct vtnet_softc *, struct mbuf *);
-static int vtnet_enqueue_rxbuf(struct vtnet_softc *, struct mbuf *);
-static void vtnet_vlan_tag_remove(struct mbuf *);
-static int vtnet_rx_csum(struct vtnet_softc *, struct mbuf *,
+static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
+static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
+static int vtnet_rxq_new_buf(struct vtnet_rxq *);
+static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
+ struct virtio_net_hdr *);
+static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
+static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
+static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
+static void vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
struct virtio_net_hdr *);
-static int vtnet_rxeof_merged(struct vtnet_softc *, struct mbuf *, int);
-static int vtnet_rxeof(struct vtnet_softc *, int, int *);
+static int vtnet_rxq_eof(struct vtnet_rxq *);
static void vtnet_rx_vq_intr(void *);
+static void vtnet_rxq_tq_intr(void *, int);
-static void vtnet_txeof(struct vtnet_softc *);
-static struct mbuf * vtnet_tx_offload(struct vtnet_softc *, struct mbuf *,
+static void vtnet_txq_free_mbufs(struct vtnet_txq *);
+static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
+ int *, int *, int *);
+static int vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
+ int, struct virtio_net_hdr *);
+static struct mbuf *
+ vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
struct virtio_net_hdr *);
-static int vtnet_enqueue_txbuf(struct vtnet_softc *, struct mbuf **,
+static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
struct vtnet_tx_header *);
-static int vtnet_encap(struct vtnet_softc *, struct mbuf **);
-static void vtnet_start_locked(struct ifnet *);
+static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **);
+#ifdef VTNET_LEGACY_TX
+static void vtnet_start_locked(struct vtnet_txq *, struct ifnet *);
static void vtnet_start(struct ifnet *);
-static void vtnet_tick(void *);
+#else
+static int vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
+static int vtnet_txq_mq_start(struct ifnet *, struct mbuf *);
+static void vtnet_txq_tq_deferred(void *, int);
+#endif
+static void vtnet_txq_tq_intr(void *, int);
+static void vtnet_txq_eof(struct vtnet_txq *);
static void vtnet_tx_vq_intr(void *);
+static void vtnet_tx_start_all(struct vtnet_softc *);
+
+#ifndef VTNET_LEGACY_TX
+static void vtnet_qflush(struct ifnet *);
+#endif
+
+static int vtnet_watchdog(struct vtnet_txq *);
+static void vtnet_rxq_accum_stats(struct vtnet_rxq *,
+ struct vtnet_rxq_stats *);
+static void vtnet_txq_accum_stats(struct vtnet_txq *,
+ struct vtnet_txq_stats *);
+static void vtnet_accumulate_stats(struct vtnet_softc *);
+static void vtnet_tick(void *);
+static void vtnet_start_taskqueues(struct vtnet_softc *);
+static void vtnet_free_taskqueues(struct vtnet_softc *);
+static void vtnet_drain_taskqueues(struct vtnet_softc *);
+
+static void vtnet_drain_rxtx_queues(struct vtnet_softc *);
+static void vtnet_stop_rendezvous(struct vtnet_softc *);
static void vtnet_stop(struct vtnet_softc *);
+static int vtnet_virtio_reinit(struct vtnet_softc *);
+static void vtnet_init_rx_filters(struct vtnet_softc *);
+static int vtnet_init_rx_queues(struct vtnet_softc *);
+static int vtnet_init_tx_queues(struct vtnet_softc *);
+static int vtnet_init_rxtx_queues(struct vtnet_softc *);
+static void vtnet_set_active_vq_pairs(struct vtnet_softc *);
static int vtnet_reinit(struct vtnet_softc *);
static void vtnet_init_locked(struct vtnet_softc *);
static void vtnet_init(void *);
+static void vtnet_free_ctrl_vq(struct vtnet_softc *);
static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
struct sglist *, int, int);
-
-static void vtnet_rx_filter(struct vtnet_softc *sc);
+static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
+static int vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
static int vtnet_set_promisc(struct vtnet_softc *, int);
static int vtnet_set_allmulti(struct vtnet_softc *, int);
+static void vtnet_attach_disable_promisc(struct vtnet_softc *);
+static void vtnet_rx_filter(struct vtnet_softc *);
static void vtnet_rx_filter_mac(struct vtnet_softc *);
-
static int vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
static void vtnet_rx_filter_vlan(struct vtnet_softc *);
-static void vtnet_set_vlan_filter(struct vtnet_softc *, int, uint16_t);
+static void vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
static void vtnet_register_vlan(void *, struct ifnet *, uint16_t);
static void vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
+static int vtnet_is_link_up(struct vtnet_softc *);
+static void vtnet_update_link_status(struct vtnet_softc *);
static int vtnet_ifmedia_upd(struct ifnet *);
static void vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
+static void vtnet_get_hwaddr(struct vtnet_softc *);
+static void vtnet_set_hwaddr(struct vtnet_softc *);
+static void vtnet_vlan_tag_remove(struct mbuf *);
-static void vtnet_add_statistics(struct vtnet_softc *);
-
-static int vtnet_enable_rx_intr(struct vtnet_softc *);
-static int vtnet_enable_tx_intr(struct vtnet_softc *);
-static void vtnet_disable_rx_intr(struct vtnet_softc *);
-static void vtnet_disable_tx_intr(struct vtnet_softc *);
+static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
+ struct sysctl_oid_list *, struct vtnet_rxq *);
+static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
+ struct sysctl_oid_list *, struct vtnet_txq *);
+static void vtnet_setup_queue_sysctl(struct vtnet_softc *);
+static void vtnet_setup_sysctl(struct vtnet_softc *);
+
+static int vtnet_rxq_enable_intr(struct vtnet_rxq *);
+static void vtnet_rxq_disable_intr(struct vtnet_rxq *);
+static int vtnet_txq_enable_intr(struct vtnet_txq *);
+static void vtnet_txq_disable_intr(struct vtnet_txq *);
+static void vtnet_enable_rx_interrupts(struct vtnet_softc *);
+static void vtnet_enable_tx_interrupts(struct vtnet_softc *);
+static void vtnet_enable_interrupts(struct vtnet_softc *);
+static void vtnet_disable_rx_interrupts(struct vtnet_softc *);
+static void vtnet_disable_tx_interrupts(struct vtnet_softc *);
+static void vtnet_disable_interrupts(struct vtnet_softc *);
+
+static int vtnet_tunable_int(struct vtnet_softc *, const char *, int);
/* Tunables. */
static int vtnet_csum_disable = 0;
@@ -171,16 +231,25 @@ static int vtnet_tso_disable = 0;
TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
static int vtnet_lro_disable = 0;
TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
+static int vtnet_mq_disable = 0;
+TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
+static int vtnet_mq_max_pairs = 0;
+TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
+static int vtnet_rx_process_limit = 512;
+TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
/*
- * Reducing the number of transmit completed interrupts can
- * improve performance. To do so, the define below keeps the
- * Tx vq interrupt disabled and adds calls to vtnet_txeof()
- * in the start and watchdog paths. The price to pay for this
- * is the m_free'ing of transmitted mbufs may be delayed until
- * the watchdog fires.
+ * Reducing the number of transmit completed interrupts can improve
+ * performance. To do so, the define below keeps the Tx vq interrupt
+ * disabled and adds calls to vtnet_txeof() in the start and watchdog
+ * paths. The price to pay for this is the m_free'ing of transmitted
+ * mbufs may be delayed until the watchdog fires.
+ *
+ * BMV: Reintroduce this later as a run-time option, if it makes
+ * sense after the EVENT_IDX feature is supported.
+ *
+ * #define VTNET_TX_INTR_MODERATION
*/
-#define VTNET_TX_INTR_MODERATION
static uma_zone_t vtnet_tx_header_zone;
@@ -203,21 +272,25 @@ static struct virtio_feature_desc vtnet_feature_desc[] = {
{ VIRTIO_NET_F_CTRL_RX, "RxMode" },
{ VIRTIO_NET_F_CTRL_VLAN, "VLanFilter" },
{ VIRTIO_NET_F_CTRL_RX_EXTRA, "RxModeExtra" },
+ { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" },
+ { VIRTIO_NET_F_MQ, "Multiqueue" },
+ { VIRTIO_NET_F_CTRL_MAC_ADDR, "SetMacAddress" },
{ 0, NULL }
};
static device_method_t vtnet_methods[] = {
/* Device methods. */
- DEVMETHOD(device_probe, vtnet_probe),
- DEVMETHOD(device_attach, vtnet_attach),
- DEVMETHOD(device_detach, vtnet_detach),
- DEVMETHOD(device_suspend, vtnet_suspend),
- DEVMETHOD(device_resume, vtnet_resume),
- DEVMETHOD(device_shutdown, vtnet_shutdown),
+ DEVMETHOD(device_probe, vtnet_probe),
+ DEVMETHOD(device_attach, vtnet_attach),
+ DEVMETHOD(device_detach, vtnet_detach),
+ DEVMETHOD(device_suspend, vtnet_suspend),
+ DEVMETHOD(device_resume, vtnet_resume),
+ DEVMETHOD(device_shutdown, vtnet_shutdown),
/* VirtIO methods. */
- DEVMETHOD(virtio_config_change, vtnet_config_change),
+ DEVMETHOD(virtio_attach_completed, vtnet_attach_completed),
+ DEVMETHOD(virtio_config_change, vtnet_config_change),
DEVMETHOD_END
};
@@ -282,56 +355,31 @@ static int
vtnet_attach(device_t dev)
{
struct vtnet_softc *sc;
- struct ifnet *ifp;
- int tx_size, error;
+ int error;
sc = device_get_softc(dev);
sc->vtnet_dev = dev;
- VTNET_LOCK_INIT(sc);
- callout_init_mtx(&sc->vtnet_tick_ch, VTNET_MTX(sc), 0);
-
- ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
- vtnet_ifmedia_sts);
- ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
- ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
-
- vtnet_add_statistics(sc);
-
+ /* Register our feature descriptions. */
virtio_set_feature_desc(dev, vtnet_feature_desc);
- vtnet_negotiate_features(sc);
-
- if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
- sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
- sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
- } else
- sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
-
- sc->vtnet_rx_mbuf_size = MCLBYTES;
- sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc);
-
- if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
- sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
- if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX)) {
- sc->vtnet_mac_filter = malloc(
- sizeof(struct vtnet_mac_filter), M_DEVBUF,
- M_NOWAIT | M_ZERO);
- if (sc->vtnet_mac_filter == NULL) {
- device_printf(dev,
- "cannot allocate mac filter table\n");
- error = ENOMEM;
- goto fail;
- }
+ VTNET_CORE_LOCK_INIT(sc);
+ callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
- sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
- }
+ vtnet_setup_sysctl(sc);
+ vtnet_setup_features(sc);
- if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
- sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
+ error = vtnet_alloc_rx_filters(sc);
+ if (error) {
+ device_printf(dev, "cannot allocate Rx filters\n");
+ goto fail;
}
- vtnet_get_hwaddr(sc);
+ error = vtnet_alloc_rxtx_queues(sc);
+ if (error) {
+ device_printf(dev, "cannot allocate queues\n");
+ goto fail;
+ }
error = vtnet_alloc_virtqueues(sc);
if (error) {
@@ -339,111 +387,21 @@ vtnet_attach(device_t dev)
goto fail;
}
- ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
- if (ifp == NULL) {
- device_printf(dev, "cannot allocate ifnet structure\n");
- error = ENOSPC;
+ error = vtnet_setup_interface(sc);
+ if (error) {
+ device_printf(dev, "cannot setup interface\n");
goto fail;
}
- ifp->if_softc = sc;
- if_initname(ifp, device_get_name(dev), device_get_unit(dev));
- ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_init = vtnet_init;
- ifp->if_start = vtnet_start;
- ifp->if_ioctl = vtnet_ioctl;
-
- sc->vtnet_rx_size = virtqueue_size(sc->vtnet_rx_vq);
- sc->vtnet_rx_process_limit = sc->vtnet_rx_size;
-
- tx_size = virtqueue_size(sc->vtnet_tx_vq);
- sc->vtnet_tx_size = tx_size;
- IFQ_SET_MAXLEN(&ifp->if_snd, tx_size - 1);
- ifp->if_snd.ifq_drv_maxlen = tx_size - 1;
- IFQ_SET_READY(&ifp->if_snd);
-
- ether_ifattach(ifp, sc->vtnet_hwaddr);
-
- if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
- ifp->if_capabilities |= IFCAP_LINKSTATE;
-
- /* Tell the upper layer(s) we support long frames. */
- ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
- ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
-
- if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
- ifp->if_capabilities |= IFCAP_TXCSUM;
-
- if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
- ifp->if_capabilities |= IFCAP_TSO4;
- if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
- ifp->if_capabilities |= IFCAP_TSO6;
- if (ifp->if_capabilities & IFCAP_TSO)
- ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
-
- if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
- sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
- }
-
- if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
- ifp->if_capabilities |= IFCAP_RXCSUM;
-
- if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
- virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
- ifp->if_capabilities |= IFCAP_LRO;
- }
-
- if (ifp->if_capabilities & IFCAP_HWCSUM) {
- /*
- * VirtIO does not support VLAN tagging, but we can fake
- * it by inserting and removing the 802.1Q header during
- * transmit and receive. We are then able to do checksum
- * offloading of VLAN frames.
- */
- ifp->if_capabilities |=
- IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
- }
-
- ifp->if_capenable = ifp->if_capabilities;
-
- /*
- * Capabilities after here are not enabled by default.
- */
-
- if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
- ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
-
- sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
- vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
- sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
- vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
- }
-
-#ifdef DEVICE_POLLING
- ifp->if_capabilities |= IFCAP_POLLING;
-#endif
-
error = virtio_setup_intr(dev, INTR_TYPE_NET);
if (error) {
device_printf(dev, "cannot setup virtqueue interrupts\n");
- ether_ifdetach(ifp);
+ /* BMV: This will crash if during boot! */
+ ether_ifdetach(sc->vtnet_ifp);
goto fail;
}
- /*
- * Device defaults to promiscuous mode for backwards
- * compatibility. Turn it off if possible.
- */
- if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
- VTNET_LOCK(sc);
- if (vtnet_set_promisc(sc, 0) != 0) {
- ifp->if_flags |= IFF_PROMISC;
- device_printf(dev,
- "cannot disable promiscuous mode\n");
- }
- VTNET_UNLOCK(sc);
- } else
- ifp->if_flags |= IFF_PROMISC;
+ vtnet_start_taskqueues(sc);
fail:
if (error)
@@ -461,24 +419,19 @@ vtnet_detach(device_t dev)
sc = device_get_softc(dev);
ifp = sc->vtnet_ifp;
- KASSERT(mtx_initialized(VTNET_MTX(sc)),
- ("vtnet mutex not initialized"));
-
-#ifdef DEVICE_POLLING
- if (ifp != NULL && ifp->if_capenable & IFCAP_POLLING)
- ether_poll_deregister(ifp);
-#endif
-
if (device_is_attached(dev)) {
- VTNET_LOCK(sc);
+ VTNET_CORE_LOCK(sc);
vtnet_stop(sc);
- VTNET_UNLOCK(sc);
+ VTNET_CORE_UNLOCK(sc);
callout_drain(&sc->vtnet_tick_ch);
+ vtnet_drain_taskqueues(sc);
ether_ifdetach(ifp);
}
+ vtnet_free_taskqueues(sc);
+
if (sc->vtnet_vlan_attach != NULL) {
EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
sc->vtnet_vlan_attach = NULL;
@@ -488,25 +441,20 @@ vtnet_detach(device_t dev)
sc->vtnet_vlan_detach = NULL;
}
- if (sc->vtnet_mac_filter != NULL) {
- free(sc->vtnet_mac_filter, M_DEVBUF);
- sc->vtnet_mac_filter = NULL;
- }
+ ifmedia_removeall(&sc->vtnet_media);
if (ifp != NULL) {
if_free(ifp);
sc->vtnet_ifp = NULL;
}
- if (sc->vtnet_rx_vq != NULL)
- vtnet_free_rx_mbufs(sc);
- if (sc->vtnet_tx_vq != NULL)
- vtnet_free_tx_mbufs(sc);
+ vtnet_free_rxtx_queues(sc);
+ vtnet_free_rx_filters(sc);
+
if (sc->vtnet_ctrl_vq != NULL)
vtnet_free_ctrl_vq(sc);
- ifmedia_removeall(&sc->vtnet_media);
- VTNET_LOCK_DESTROY(sc);
+ VTNET_CORE_LOCK_DESTROY(sc);
return (0);
}
@@ -518,10 +466,10 @@ vtnet_suspend(device_t dev)
sc = device_get_softc(dev);
- VTNET_LOCK(sc);
+ VTNET_CORE_LOCK(sc);
vtnet_stop(sc);
sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
- VTNET_UNLOCK(sc);
+ VTNET_CORE_UNLOCK(sc);
return (0);
}
@@ -535,11 +483,11 @@ vtnet_resume(device_t dev)
sc = device_get_softc(dev);
ifp = sc->vtnet_ifp;
- VTNET_LOCK(sc);
+ VTNET_CORE_LOCK(sc);
if (ifp->if_flags & IFF_UP)
vtnet_init_locked(sc);
sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
- VTNET_UNLOCK(sc);
+ VTNET_CORE_UNLOCK(sc);
return (0);
}
@@ -556,15 +504,26 @@ vtnet_shutdown(device_t dev)
}
static int
+vtnet_attach_completed(device_t dev)
+{
+
+ vtnet_attach_disable_promisc(device_get_softc(dev));
+
+ return (0);
+}
+
+static int
vtnet_config_change(device_t dev)
{
struct vtnet_softc *sc;
sc = device_get_softc(dev);
- VTNET_LOCK(sc);
+ VTNET_CORE_LOCK(sc);
vtnet_update_link_status(sc);
- VTNET_UNLOCK(sc);
+ if (sc->vtnet_link_active != 0)
+ vtnet_tx_start_all(sc);
+ VTNET_CORE_UNLOCK(sc);
return (0);
}
@@ -578,188 +537,491 @@ vtnet_negotiate_features(struct vtnet_softc *sc)
dev = sc->vtnet_dev;
mask = 0;
- if (vtnet_csum_disable)
+ /*
+ * TSO and LRO are only available when their corresponding checksum
+ * offload feature is also negotiated.
+ */
+ if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) {
mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
+ mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES;
+ }
+ if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
+ mask |= VTNET_TSO_FEATURES;
+ if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
+ mask |= VTNET_LRO_FEATURES;
+ if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
+ mask |= VIRTIO_NET_F_MQ;
+#ifdef VTNET_LEGACY_TX
+ mask |= VIRTIO_NET_F_MQ;
+#endif
+
+ features = VTNET_FEATURES & ~mask;
+ sc->vtnet_features = virtio_negotiate_features(dev, features);
+
+ if (virtio_with_feature(dev, VTNET_LRO_FEATURES) == 0)
+ return;
+ if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF))
+ return;
/*
- * TSO and LRO are only available when their corresponding
- * checksum offload feature is also negotiated.
+ * LRO without mergeable buffers requires special care. This is not
+ * ideal because every receive buffer must be large enough to hold
+ * the maximum TCP packet, the Ethernet header, and the header. This
+ * requires up to 34 descriptors with MCLBYTES clusters. If we do
+ * not have indirect descriptors, LRO is disabled since the virtqueue
+ * will not contain very many receive buffers.
*/
+ if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0) {
+ device_printf(dev,
+ "LRO disabled due to both mergeable buffers and indirect "
+ "descriptors not negotiated\n");
- if (vtnet_csum_disable || vtnet_tso_disable)
- mask |= VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 |
- VIRTIO_NET_F_HOST_ECN;
+ features &= ~VTNET_LRO_FEATURES;
+ sc->vtnet_features = virtio_negotiate_features(dev, features);
+ } else
+ sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
+}
- if (vtnet_csum_disable || vtnet_lro_disable)
- mask |= VTNET_LRO_FEATURES;
+static void
+vtnet_setup_features(struct vtnet_softc *sc)
+{
+ device_t dev;
+ int max_pairs, max;
- features = VTNET_FEATURES & ~mask;
-#ifdef VTNET_TX_INTR_MODERATION
- features |= VIRTIO_F_NOTIFY_ON_EMPTY;
-#endif
- sc->vtnet_features = virtio_negotiate_features(dev, features);
+ dev = sc->vtnet_dev;
+
+ vtnet_negotiate_features(sc);
+
+ if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
+ /* This feature should always be negotiated. */
+ sc->vtnet_flags |= VTNET_FLAG_MAC;
+ }
+
+ if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
+ sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
+ sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+ } else
+ sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
+
+ if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
+ sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
- if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0 &&
- virtio_with_feature(dev, VTNET_LRO_FEATURES)) {
+ if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
+ sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
+ if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
+ sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
+ if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
+ sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
+ }
+
+ if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) &&
+ sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
+ max_pairs = virtio_read_dev_config_2(dev,
+ offsetof(struct virtio_net_config, max_virtqueue_pairs));
+ if (max_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
+ max_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
+ max_pairs = 1;
+ } else
+ max_pairs = 1;
+
+ if (max_pairs > 1) {
/*
- * LRO without mergeable buffers requires special care. This
- * is not ideal because every receive buffer must be large
- * enough to hold the maximum TCP packet, the Ethernet header,
- * and the vtnet_rx_header. This requires up to 34 descriptors
- * when using MCLBYTES clusters. If we do not have indirect
- * descriptors, LRO is disabled since the virtqueue will not
- * be able to contain very many receive buffers.
+ * Limit the maximum number of queue pairs to the number of
+ * CPUs or the configured maximum. The actual number of
+ * queues that get used may be less.
*/
- if (virtio_with_feature(dev,
- VIRTIO_RING_F_INDIRECT_DESC) == 0) {
- device_printf(dev,
- "LRO disabled due to lack of both mergeable "
- "buffers and indirect descriptors\n");
-
- sc->vtnet_features = virtio_negotiate_features(dev,
- features & ~VTNET_LRO_FEATURES);
- } else
- sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
+ max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
+ if (max > 0 && max_pairs > max)
+ max_pairs = max;
+ if (max_pairs > mp_ncpus)
+ max_pairs = mp_ncpus;
+ if (max_pairs > VTNET_MAX_QUEUE_PAIRS)
+ max_pairs = VTNET_MAX_QUEUE_PAIRS;
+ if (max_pairs > 1)
+ sc->vtnet_flags |= VTNET_FLAG_MULTIQ;
}
+
+ sc->vtnet_max_vq_pairs = max_pairs;
}
static int
-vtnet_alloc_virtqueues(struct vtnet_softc *sc)
+vtnet_init_rxq(struct vtnet_softc *sc, int id)
{
- device_t dev;
- struct vq_alloc_info vq_info[3];
- int nvqs, rxsegs;
+ struct vtnet_rxq *rxq;
- dev = sc->vtnet_dev;
- nvqs = 2;
+ rxq = &sc->vtnet_rxqs[id];
- /*
- * Indirect descriptors are not needed for the Rx
- * virtqueue when mergeable buffers are negotiated.
- * The header is placed inline with the data, not
- * in a separate descriptor, and mbuf clusters are
- * always physically contiguous.
- */
- if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
- rxsegs = sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG ?
- VTNET_MAX_RX_SEGS : VTNET_MIN_RX_SEGS;
- } else
- rxsegs = 0;
+ snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
+ device_get_nameunit(sc->vtnet_dev), id);
+ mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
- VQ_ALLOC_INFO_INIT(&vq_info[0], rxsegs,
- vtnet_rx_vq_intr, sc, &sc->vtnet_rx_vq,
- "%s receive", device_get_nameunit(dev));
+ rxq->vtnrx_sc = sc;
+ rxq->vtnrx_id = id;
- VQ_ALLOC_INFO_INIT(&vq_info[1], VTNET_MAX_TX_SEGS,
- vtnet_tx_vq_intr, sc, &sc->vtnet_tx_vq,
- "%s transmit", device_get_nameunit(dev));
+ TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
+ rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
+ taskqueue_thread_enqueue, &rxq->vtnrx_tq);
- if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
- nvqs++;
+ return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
+}
+
+static int
+vtnet_init_txq(struct vtnet_softc *sc, int id)
+{
+ struct vtnet_txq *txq;
+
+ txq = &sc->vtnet_txqs[id];
+
+ snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
+ device_get_nameunit(sc->vtnet_dev), id);
+ mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
+
+ txq->vtntx_sc = sc;
+ txq->vtntx_id = id;
+
+#ifndef VTNET_LEGACY_TX
+ txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
+ M_NOWAIT, &txq->vtntx_mtx);
+ if (txq->vtntx_br == NULL)
+ return (ENOMEM);
+
+ TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
+#endif
+ TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
+ txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
+ taskqueue_thread_enqueue, &txq->vtntx_tq);
+ if (txq->vtntx_tq == NULL)
+ return (ENOMEM);
+
+ return (0);
+}
- VQ_ALLOC_INFO_INIT(&vq_info[2], 0, NULL, NULL,
- &sc->vtnet_ctrl_vq, "%s control",
- device_get_nameunit(dev));
+static int
+vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
+{
+ int i, npairs, error;
+
+ npairs = sc->vtnet_max_vq_pairs;
+
+ sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
+ return (ENOMEM);
+
+ for (i = 0; i < npairs; i++) {
+ error = vtnet_init_rxq(sc, i);
+ if (error)
+ return (error);
+ error = vtnet_init_txq(sc, i);
+ if (error)
+ return (error);
}
- return (virtio_alloc_virtqueues(dev, 0, nvqs, vq_info));
+ vtnet_setup_queue_sysctl(sc);
+
+ return (0);
}
static void
-vtnet_get_hwaddr(struct vtnet_softc *sc)
+vtnet_destroy_rxq(struct vtnet_rxq *rxq)
{
- device_t dev;
- dev = sc->vtnet_dev;
+ rxq->vtnrx_sc = NULL;
+ rxq->vtnrx_id = -1;
- if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
- virtio_read_device_config(dev,
- offsetof(struct virtio_net_config, mac),
- sc->vtnet_hwaddr, ETHER_ADDR_LEN);
- } else {
- /* Generate random locally administered unicast address. */
- sc->vtnet_hwaddr[0] = 0xB2;
- arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
+ if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
+ mtx_destroy(&rxq->vtnrx_mtx);
+}
- vtnet_set_hwaddr(sc);
+static void
+vtnet_destroy_txq(struct vtnet_txq *txq)
+{
+
+ txq->vtntx_sc = NULL;
+ txq->vtntx_id = -1;
+
+#ifndef VTNET_LEGACY_TX
+ if (txq->vtntx_br != NULL) {
+ buf_ring_free(txq->vtntx_br, M_DEVBUF);
+ txq->vtntx_br = NULL;
}
+#endif
+
+ if (mtx_initialized(&txq->vtntx_mtx) != 0)
+ mtx_destroy(&txq->vtntx_mtx);
}
static void
-vtnet_set_hwaddr(struct vtnet_softc *sc)
+vtnet_free_rxtx_queues(struct vtnet_softc *sc)
{
- device_t dev;
+ int i;
- dev = sc->vtnet_dev;
+ if (sc->vtnet_rxqs != NULL) {
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
+ vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
+ free(sc->vtnet_rxqs, M_DEVBUF);
+ sc->vtnet_rxqs = NULL;
+ }
- virtio_write_device_config(dev,
- offsetof(struct virtio_net_config, mac),
- sc->vtnet_hwaddr, ETHER_ADDR_LEN);
+ if (sc->vtnet_txqs != NULL) {
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
+ vtnet_destroy_txq(&sc->vtnet_txqs[i]);
+ free(sc->vtnet_txqs, M_DEVBUF);
+ sc->vtnet_txqs = NULL;
+ }
}
static int
-vtnet_is_link_up(struct vtnet_softc *sc)
+vtnet_alloc_rx_filters(struct vtnet_softc *sc)
+{
+
+ if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
+ sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
+ M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (sc->vtnet_mac_filter == NULL)
+ return (ENOMEM);
+ }
+
+ if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
+ sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
+ VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (sc->vtnet_vlan_filter == NULL)
+ return (ENOMEM);
+ }
+
+ return (0);
+}
+
+static void
+vtnet_free_rx_filters(struct vtnet_softc *sc)
+{
+
+ if (sc->vtnet_mac_filter != NULL) {
+ free(sc->vtnet_mac_filter, M_DEVBUF);
+ sc->vtnet_mac_filter = NULL;
+ }
+
+ if (sc->vtnet_vlan_filter != NULL) {
+ free(sc->vtnet_vlan_filter, M_DEVBUF);
+ sc->vtnet_vlan_filter = NULL;
+ }
+}
+
+static int
+vtnet_alloc_virtqueues(struct vtnet_softc *sc)
{
device_t dev;
- struct ifnet *ifp;
- uint16_t status;
+ struct vq_alloc_info *info;
+ struct vtnet_rxq *rxq;
+ struct vtnet_txq *txq;
+ int i, idx, flags, nvqs, rxsegs, error;
dev = sc->vtnet_dev;
- ifp = sc->vtnet_ifp;
+ flags = 0;
+
+ /*
+ * Indirect descriptors are not needed for the Rx virtqueue when
+ * mergeable buffers are negotiated. The header is placed inline
+ * with the data, not in a separate descriptor, and mbuf clusters
+ * are always physically contiguous.
+ */
+ if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
+ rxsegs = 0;
+ else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
+ rxsegs = VTNET_MAX_RX_SEGS;
+ else
+ rxsegs = VTNET_MIN_RX_SEGS;
- VTNET_LOCK_ASSERT(sc);
+ nvqs = sc->vtnet_max_vq_pairs * 2;
+ if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
+ nvqs++;
- if ((ifp->if_capenable & IFCAP_LINKSTATE) == 0)
- return (1);
+ info = malloc(sizeof(struct vq_alloc_info) * nvqs , M_TEMP, M_NOWAIT);
+ if (info == NULL)
+ return (ENOMEM);
- status = virtio_read_dev_config_2(dev,
- offsetof(struct virtio_net_config, status));
+ for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) {
+ rxq = &sc->vtnet_rxqs[i];
+ VQ_ALLOC_INFO_INIT(&info[idx], rxsegs,
+ vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
+ "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id);
- return ((status & VIRTIO_NET_S_LINK_UP) != 0);
+ txq = &sc->vtnet_txqs[i];
+ VQ_ALLOC_INFO_INIT(&info[idx+1], VTNET_MAX_TX_SEGS,
+ vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
+ "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id);
+ }
+
+ if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
+ VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
+ &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
+ }
+
+ /*
+ * Enable interrupt binding if this is multiqueue. This only matters
+ * when per-vq MSIX is available.
+ */
+ if (sc->vtnet_flags & VTNET_FLAG_MULTIQ)
+ flags |= 0;
+
+ error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
+ free(info, M_TEMP);
+
+ return (error);
}
-static void
-vtnet_update_link_status(struct vtnet_softc *sc)
+static int
+vtnet_setup_interface(struct vtnet_softc *sc)
{
+ device_t dev;
struct ifnet *ifp;
- int link;
+ int limit;
- ifp = sc->vtnet_ifp;
+ dev = sc->vtnet_dev;
- link = vtnet_is_link_up(sc);
+ ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
+ if (ifp == NULL) {
+ device_printf(dev, "cannot allocate ifnet structure\n");
+ return (ENOSPC);
+ }
- if (link && ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0)) {
- sc->vtnet_flags |= VTNET_FLAG_LINK;
- if_link_state_change(ifp, LINK_STATE_UP);
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
- vtnet_start_locked(ifp);
- } else if (!link && (sc->vtnet_flags & VTNET_FLAG_LINK)) {
- sc->vtnet_flags &= ~VTNET_FLAG_LINK;
- if_link_state_change(ifp, LINK_STATE_DOWN);
+ if_initname(ifp, device_get_name(dev), device_get_unit(dev));
+ if_initbaudrate(ifp, IF_Gbps(10)); /* Approx. */
+ ifp->if_softc = sc;
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_init = vtnet_init;
+ ifp->if_ioctl = vtnet_ioctl;
+
+#ifndef VTNET_LEGACY_TX
+ ifp->if_transmit = vtnet_txq_mq_start;
+ ifp->if_qflush = vtnet_qflush;
+#else
+ struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
+ ifp->if_start = vtnet_start;
+ IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1);
+ ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1;
+ IFQ_SET_READY(&ifp->if_snd);
+#endif
+
+ ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
+ vtnet_ifmedia_sts);
+ ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
+ ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
+
+ /* Read (or generate) the MAC address for the adapter. */
+ vtnet_get_hwaddr(sc);
+
+ ether_ifattach(ifp, sc->vtnet_hwaddr);
+
+ if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
+ ifp->if_capabilities |= IFCAP_LINKSTATE;
+
+ /* Tell the upper layer(s) we support long frames. */
+ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
+ ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
+
+ if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
+ ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6;
+
+ if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) {
+ ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
+ sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
+ } else {
+ if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
+ ifp->if_capabilities |= IFCAP_TSO4;
+ if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
+ ifp->if_capabilities |= IFCAP_TSO6;
+ if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
+ sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
+ }
+
+ if (ifp->if_capabilities & IFCAP_TSO)
+ ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
+ }
+
+ if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM))
+ ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
+
+ if (ifp->if_capabilities & IFCAP_HWCSUM) {
+ /*
+ * VirtIO does not support VLAN tagging, but we can fake
+ * it by inserting and removing the 802.1Q header during
+ * transmit and receive. We are then able to do checksum
+ * offloading of VLAN frames.
+ */
+ ifp->if_capabilities |=
+ IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
+ }
+
+ ifp->if_capenable = ifp->if_capabilities;
+
+ /*
+ * Capabilities after here are not enabled by default.
+ */
+
+ if (ifp->if_capabilities & IFCAP_RXCSUM) {
+ if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
+ virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
+ ifp->if_capabilities |= IFCAP_LRO;
+ }
+
+ if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
+ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
+
+ sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
+ vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
+ sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
+ vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
}
+
+ limit = vtnet_tunable_int(sc, "rx_process_limit",
+ vtnet_rx_process_limit);
+ if (limit < 0)
+ limit = INT_MAX;
+ sc->vtnet_rx_process_limit = limit;
+
+ return (0);
}
-static void
-vtnet_watchdog(struct vtnet_softc *sc)
+static int
+vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
{
struct ifnet *ifp;
+ int frame_size, clsize;
ifp = sc->vtnet_ifp;
-#ifdef VTNET_TX_INTR_MODERATION
- vtnet_txeof(sc);
-#endif
+ if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU)
+ return (EINVAL);
- if (sc->vtnet_watchdog_timer == 0 || --sc->vtnet_watchdog_timer)
- return;
+ frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) +
+ new_mtu;
- if_printf(ifp, "watchdog timeout -- resetting\n");
-#ifdef VTNET_DEBUG
- virtqueue_dump(sc->vtnet_tx_vq);
-#endif
- ifp->if_oerrors++;
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- vtnet_init_locked(sc);
+ /*
+ * Based on the new MTU (and hence frame size) determine which
+ * cluster size is most appropriate for the receive queues.
+ */
+ if (frame_size <= MCLBYTES) {
+ clsize = MCLBYTES;
+ } else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
+ /* Avoid going past 9K jumbos. */
+ if (frame_size > MJUM9BYTES)
+ return (EINVAL);
+ clsize = MJUM9BYTES;
+ } else
+ clsize = MJUMPAGESIZE;
+
+ ifp->if_mtu = new_mtu;
+ sc->vtnet_rx_new_clsize = clsize;
+
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ vtnet_init_locked(sc);
+ }
+
+ return (0);
}
static int
@@ -771,22 +1033,19 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
sc = ifp->if_softc;
ifr = (struct ifreq *) data;
- reinit = 0;
error = 0;
switch (cmd) {
case SIOCSIFMTU:
- if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VTNET_MAX_MTU)
- error = EINVAL;
- else if (ifp->if_mtu != ifr->ifr_mtu) {
- VTNET_LOCK(sc);
+ if (ifp->if_mtu != ifr->ifr_mtu) {
+ VTNET_CORE_LOCK(sc);
error = vtnet_change_mtu(sc, ifr->ifr_mtu);
- VTNET_UNLOCK(sc);
+ VTNET_CORE_UNLOCK(sc);
}
break;
case SIOCSIFFLAGS:
- VTNET_LOCK(sc);
+ VTNET_CORE_LOCK(sc);
if ((ifp->if_flags & IFF_UP) == 0) {
if (ifp->if_drv_flags & IFF_DRV_RUNNING)
vtnet_stop(sc);
@@ -803,16 +1062,17 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
if (error == 0)
sc->vtnet_if_flags = ifp->if_flags;
- VTNET_UNLOCK(sc);
+ VTNET_CORE_UNLOCK(sc);
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
- VTNET_LOCK(sc);
- if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING))
+ if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
+ break;
+ VTNET_CORE_LOCK(sc);
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
vtnet_rx_filter_mac(sc);
- VTNET_UNLOCK(sc);
+ VTNET_CORE_UNLOCK(sc);
break;
case SIOCSIFMEDIA:
@@ -821,68 +1081,36 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
case SIOCSIFCAP:
+ VTNET_CORE_LOCK(sc);
mask = ifr->ifr_reqcap ^ ifp->if_capenable;
-#ifdef DEVICE_POLLING
- if (mask & IFCAP_POLLING) {
- if (ifr->ifr_reqcap & IFCAP_POLLING) {
- error = ether_poll_register(vtnet_poll, ifp);
- if (error)
- break;
-
- VTNET_LOCK(sc);
- vtnet_disable_rx_intr(sc);
- vtnet_disable_tx_intr(sc);
- ifp->if_capenable |= IFCAP_POLLING;
- VTNET_UNLOCK(sc);
- } else {
- error = ether_poll_deregister(ifp);
-
- /* Enable interrupts even in error case. */
- VTNET_LOCK(sc);
- vtnet_enable_tx_intr(sc);
- vtnet_enable_rx_intr(sc);
- ifp->if_capenable &= ~IFCAP_POLLING;
- VTNET_UNLOCK(sc);
- }
- }
-#endif
- VTNET_LOCK(sc);
-
- if (mask & IFCAP_TXCSUM) {
+ if (mask & IFCAP_TXCSUM)
ifp->if_capenable ^= IFCAP_TXCSUM;
- if (ifp->if_capenable & IFCAP_TXCSUM)
- ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
- else
- ifp->if_hwassist &= ~VTNET_CSUM_OFFLOAD;
- }
-
- if (mask & IFCAP_TSO4) {
+ if (mask & IFCAP_TXCSUM_IPV6)
+ ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
+ if (mask & IFCAP_TSO4)
ifp->if_capenable ^= IFCAP_TSO4;
- if (ifp->if_capenable & IFCAP_TSO4)
- ifp->if_hwassist |= CSUM_TSO;
- else
- ifp->if_hwassist &= ~CSUM_TSO;
- }
-
- if (mask & IFCAP_RXCSUM) {
- ifp->if_capenable ^= IFCAP_RXCSUM;
- reinit = 1;
- }
+ if (mask & IFCAP_TSO6)
+ ifp->if_capenable ^= IFCAP_TSO6;
- if (mask & IFCAP_LRO) {
- ifp->if_capenable ^= IFCAP_LRO;
+ if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
+ IFCAP_VLAN_HWFILTER)) {
+ /* These Rx features require us to renegotiate. */
reinit = 1;
- }
- if (mask & IFCAP_VLAN_HWFILTER) {
- ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
- reinit = 1;
- }
+ if (mask & IFCAP_RXCSUM)
+ ifp->if_capenable ^= IFCAP_RXCSUM;
+ if (mask & IFCAP_RXCSUM_IPV6)
+ ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
+ if (mask & IFCAP_LRO)
+ ifp->if_capenable ^= IFCAP_LRO;
+ if (mask & IFCAP_VLAN_HWFILTER)
+ ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
+ } else
+ reinit = 0;
if (mask & IFCAP_VLAN_HWTSO)
ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
-
if (mask & IFCAP_VLAN_HWTAGGING)
ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
@@ -890,9 +1118,10 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
vtnet_init_locked(sc);
}
+
+ VTNET_CORE_UNLOCK(sc);
VLAN_CAPABILITIES(ifp);
- VTNET_UNLOCK(sc);
break;
default:
@@ -900,80 +1129,32 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
}
- VTNET_LOCK_ASSERT_NOTOWNED(sc);
+ VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
return (error);
}
static int
-vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
-{
- struct ifnet *ifp;
- int new_frame_size, clsize;
-
- ifp = sc->vtnet_ifp;
-
- if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
- new_frame_size = sizeof(struct vtnet_rx_header) +
- sizeof(struct ether_vlan_header) + new_mtu;
-
- if (new_frame_size > MJUM9BYTES)
- return (EINVAL);
-
- if (new_frame_size <= MCLBYTES)
- clsize = MCLBYTES;
- else
- clsize = MJUM9BYTES;
- } else {
- new_frame_size = sizeof(struct virtio_net_hdr_mrg_rxbuf) +
- sizeof(struct ether_vlan_header) + new_mtu;
-
- if (new_frame_size <= MCLBYTES)
- clsize = MCLBYTES;
- else
- clsize = MJUMPAGESIZE;
- }
-
- sc->vtnet_rx_mbuf_size = clsize;
- sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc);
- KASSERT(sc->vtnet_rx_mbuf_count < VTNET_MAX_RX_SEGS,
- ("too many rx mbufs: %d", sc->vtnet_rx_mbuf_count));
-
- ifp->if_mtu = new_mtu;
-
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- vtnet_init_locked(sc);
- }
-
- return (0);
-}
-
-static int
-vtnet_init_rx_vq(struct vtnet_softc *sc)
+vtnet_rxq_populate(struct vtnet_rxq *rxq)
{
struct virtqueue *vq;
int nbufs, error;
- vq = sc->vtnet_rx_vq;
- nbufs = 0;
+ vq = rxq->vtnrx_vq;
error = ENOSPC;
- while (!virtqueue_full(vq)) {
- if ((error = vtnet_newbuf(sc)) != 0)
+ for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
+ error = vtnet_rxq_new_buf(rxq);
+ if (error)
break;
- nbufs++;
}
if (nbufs > 0) {
virtqueue_notify(vq);
-
/*
* EMSGSIZE signifies the virtqueue did not have enough
* entries available to hold the last mbuf. This is not
- * an error. We should not get ENOSPC since we check if
- * the virtqueue is full before attempting to add a
- * buffer.
+ * an error.
*/
if (error == EMSGSIZE)
error = 0;
@@ -983,86 +1164,32 @@ vtnet_init_rx_vq(struct vtnet_softc *sc)
}
static void
-vtnet_free_rx_mbufs(struct vtnet_softc *sc)
+vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
{
struct virtqueue *vq;
struct mbuf *m;
int last;
- vq = sc->vtnet_rx_vq;
+ vq = rxq->vtnrx_vq;
last = 0;
while ((m = virtqueue_drain(vq, &last)) != NULL)
m_freem(m);
- KASSERT(virtqueue_empty(vq), ("mbufs remaining in Rx Vq"));
-}
-
-static void
-vtnet_free_tx_mbufs(struct vtnet_softc *sc)
-{
- struct virtqueue *vq;
- struct vtnet_tx_header *txhdr;
- int last;
-
- vq = sc->vtnet_tx_vq;
- last = 0;
-
- while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
- m_freem(txhdr->vth_mbuf);
- uma_zfree(vtnet_tx_header_zone, txhdr);
- }
-
- KASSERT(virtqueue_empty(vq), ("mbufs remaining in Tx Vq"));
-}
-
-static void
-vtnet_free_ctrl_vq(struct vtnet_softc *sc)
-{
-
- /*
- * The control virtqueue is only polled, therefore
- * it should already be empty.
- */
- KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq),
- ("Ctrl Vq not empty"));
-}
-
-#ifdef DEVICE_POLLING
-static int
-vtnet_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
-{
- struct vtnet_softc *sc;
- int rx_done;
-
- sc = ifp->if_softc;
- rx_done = 0;
-
- VTNET_LOCK(sc);
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- if (cmd == POLL_AND_CHECK_STATUS)
- vtnet_update_link_status(sc);
-
- if (virtqueue_nused(sc->vtnet_rx_vq) > 0)
- vtnet_rxeof(sc, count, &rx_done);
-
- vtnet_txeof(sc);
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
- vtnet_start_locked(ifp);
- }
- VTNET_UNLOCK(sc);
-
- return (rx_done);
+ KASSERT(virtqueue_empty(vq),
+ ("%s: mbufs remaining in rx queue %p", __func__, rxq));
}
-#endif /* DEVICE_POLLING */
static struct mbuf *
-vtnet_alloc_rxbuf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
+vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
{
struct mbuf *m_head, *m_tail, *m;
int i, clsize;
- clsize = sc->vtnet_rx_mbuf_size;
+ clsize = sc->vtnet_rx_clsize;
+
+ KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
+ ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs));
m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize);
if (m_head == NULL)
@@ -1071,19 +1198,15 @@ vtnet_alloc_rxbuf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
m_head->m_len = clsize;
m_tail = m_head;
- if (nbufs > 1) {
- KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
- ("chained Rx mbuf requested without LRO_NOMRG"));
-
- for (i = 1; i < nbufs; i++) {
- m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
- if (m == NULL)
- goto fail;
+ /* Allocate the rest of the chain. */
+ for (i = 1; i < nbufs; i++) {
+ m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
+ if (m == NULL)
+ goto fail;
- m->m_len = clsize;
- m_tail->m_next = m;
- m_tail = m;
- }
+ m->m_len = clsize;
+ m_tail->m_next = m;
+ m_tail = m;
}
if (m_tailp != NULL)
@@ -1098,43 +1221,48 @@ fail:
return (NULL);
}
+/*
+ * Slow path for when LRO without mergeable buffers is negotiated.
+ */
static int
-vtnet_replace_rxbuf(struct vtnet_softc *sc, struct mbuf *m0, int len0)
+vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
+ int len0)
{
+ struct vtnet_softc *sc;
struct mbuf *m, *m_prev;
struct mbuf *m_new, *m_tail;
int len, clsize, nreplace, error;
- m = m0;
- m_prev = NULL;
- len = len0;
+ sc = rxq->vtnrx_sc;
+ clsize = sc->vtnet_rx_clsize;
+ m_prev = NULL;
m_tail = NULL;
- clsize = sc->vtnet_rx_mbuf_size;
nreplace = 0;
- KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG ||
- m->m_next == NULL, ("chained Rx mbuf without LRO_NOMRG"));
+ m = m0;
+ len = len0;
/*
- * Since LRO_NOMRG mbuf chains are so large, we want to avoid
- * allocating an entire chain for each received frame. When
- * the received frame's length is less than that of the chain,
- * the unused mbufs are reassigned to the new chain.
+ * Since these mbuf chains are so large, we avoid allocating an
+ * entire replacement chain if possible. When the received frame
+ * did not consume the entire chain, the unused mbufs are moved
+ * to the replacement chain.
*/
while (len > 0) {
/*
- * Something is seriously wrong if we received
- * a frame larger than the mbuf chain. Drop it.
+ * Something is seriously wrong if we received a frame
+ * larger than the chain. Drop it.
*/
if (m == NULL) {
sc->vtnet_stats.rx_frame_too_large++;
return (EMSGSIZE);
}
+ /* We always allocate the same cluster size. */
KASSERT(m->m_len == clsize,
- ("mbuf length not expected cluster size: %d",
- m->m_len));
+ ("%s: mbuf size %d is not the cluster size %d",
+ __func__, m->m_len, clsize));
m->m_len = MIN(m->m_len, len);
len -= m->m_len;
@@ -1144,27 +1272,26 @@ vtnet_replace_rxbuf(struct vtnet_softc *sc, struct mbuf *m0, int len0)
nreplace++;
}
- KASSERT(m_prev != NULL, ("m_prev == NULL"));
- KASSERT(nreplace <= sc->vtnet_rx_mbuf_count,
- ("too many replacement mbufs: %d/%d", nreplace,
- sc->vtnet_rx_mbuf_count));
+ KASSERT(nreplace <= sc->vtnet_rx_nmbufs,
+ ("%s: too many replacement mbufs %d max %d", __func__, nreplace,
+ sc->vtnet_rx_nmbufs));
- m_new = vtnet_alloc_rxbuf(sc, nreplace, &m_tail);
+ m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
if (m_new == NULL) {
m_prev->m_len = clsize;
return (ENOBUFS);
}
/*
- * Move unused mbufs, if any, from the original chain
- * onto the end of the new chain.
+ * Move any unused mbufs from the received chain onto the end
+ * of the new chain.
*/
if (m_prev->m_next != NULL) {
m_tail->m_next = m_prev->m_next;
m_prev->m_next = NULL;
}
- error = vtnet_enqueue_rxbuf(sc, m_new);
+ error = vtnet_rxq_enqueue_buf(rxq, m_new);
if (error) {
/*
* BAD! We could not enqueue the replacement mbuf chain. We
@@ -1189,343 +1316,321 @@ vtnet_replace_rxbuf(struct vtnet_softc *sc, struct mbuf *m0, int len0)
}
static int
-vtnet_newbuf(struct vtnet_softc *sc)
+vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
{
- struct mbuf *m;
+ struct vtnet_softc *sc;
+ struct mbuf *m_new;
int error;
- m = vtnet_alloc_rxbuf(sc, sc->vtnet_rx_mbuf_count, NULL);
- if (m == NULL)
- return (ENOBUFS);
+ sc = rxq->vtnrx_sc;
- error = vtnet_enqueue_rxbuf(sc, m);
- if (error)
- m_freem(m);
+ KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
+ ("%s: chained mbuf without LRO_NOMRG", __func__));
- return (error);
-}
-
-static void
-vtnet_discard_merged_rxbuf(struct vtnet_softc *sc, int nbufs)
-{
- struct virtqueue *vq;
- struct mbuf *m;
-
- vq = sc->vtnet_rx_vq;
+ if (m->m_next == NULL) {
+ /* Fast-path for the common case of just one mbuf. */
+ if (m->m_len < len)
+ return (EINVAL);
- while (--nbufs > 0) {
- if ((m = virtqueue_dequeue(vq, NULL)) == NULL)
- break;
- vtnet_discard_rxbuf(sc, m);
- }
-}
+ m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
+ if (m_new == NULL)
+ return (ENOBUFS);
-static void
-vtnet_discard_rxbuf(struct vtnet_softc *sc, struct mbuf *m)
-{
- int error;
+ error = vtnet_rxq_enqueue_buf(rxq, m_new);
+ if (error) {
+ /*
+ * The new mbuf is suppose to be an identical
+ * copy of the one just dequeued so this is an
+ * unexpected error.
+ */
+ m_freem(m_new);
+ sc->vtnet_stats.rx_enq_replacement_failed++;
+ } else
+ m->m_len = len;
+ } else
+ error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len);
- /*
- * Requeue the discarded mbuf. This should always be
- * successful since it was just dequeued.
- */
- error = vtnet_enqueue_rxbuf(sc, m);
- KASSERT(error == 0, ("cannot requeue discarded mbuf"));
+ return (error);
}
static int
-vtnet_enqueue_rxbuf(struct vtnet_softc *sc, struct mbuf *m)
+vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
{
struct sglist sg;
struct sglist_seg segs[VTNET_MAX_RX_SEGS];
+ struct vtnet_softc *sc;
struct vtnet_rx_header *rxhdr;
- struct virtio_net_hdr *hdr;
uint8_t *mdata;
int offset, error;
- VTNET_LOCK_ASSERT(sc);
- KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG ||
- m->m_next == NULL, ("chained Rx mbuf without LRO_NOMRG"));
-
- sglist_init(&sg, VTNET_MAX_RX_SEGS, segs);
-
+ sc = rxq->vtnrx_sc;
mdata = mtod(m, uint8_t *);
- offset = 0;
+ VTNET_RXQ_LOCK_ASSERT(rxq);
+ KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
+ ("%s: chained mbuf without LRO_NOMRG", __func__));
+ KASSERT(m->m_len == sc->vtnet_rx_clsize,
+ ("%s: unexpected cluster size %d/%d", __func__, m->m_len,
+ sc->vtnet_rx_clsize));
+
+ sglist_init(&sg, VTNET_MAX_RX_SEGS, segs);
if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
+ MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
rxhdr = (struct vtnet_rx_header *) mdata;
- hdr = &rxhdr->vrh_hdr;
- offset += sizeof(struct vtnet_rx_header);
-
- error = sglist_append(&sg, hdr, sc->vtnet_hdr_size);
- KASSERT(error == 0, ("cannot add header to sglist"));
- }
-
- error = sglist_append(&sg, mdata + offset, m->m_len - offset);
- if (error)
- return (error);
+ sglist_append(&sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
+ offset = sizeof(struct vtnet_rx_header);
+ } else
+ offset = 0;
+ sglist_append(&sg, mdata + offset, m->m_len - offset);
if (m->m_next != NULL) {
error = sglist_append_mbuf(&sg, m->m_next);
- if (error)
- return (error);
+ MPASS(error == 0);
}
- return (virtqueue_enqueue(sc->vtnet_rx_vq, m, &sg, 0, sg.sg_nseg));
+ error = virtqueue_enqueue(rxq->vtnrx_vq, m, &sg, 0, sg.sg_nseg);
+
+ return (error);
}
-static void
-vtnet_vlan_tag_remove(struct mbuf *m)
+static int
+vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
{
- struct ether_vlan_header *evl;
+ struct vtnet_softc *sc;
+ struct mbuf *m;
+ int error;
- evl = mtod(m, struct ether_vlan_header *);
+ sc = rxq->vtnrx_sc;
- m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
- m->m_flags |= M_VLANTAG;
+ m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
+ if (m == NULL)
+ return (ENOBUFS);
- /* Strip the 802.1Q header. */
- bcopy((char *) evl, (char *) evl + ETHER_VLAN_ENCAP_LEN,
- ETHER_HDR_LEN - ETHER_TYPE_LEN);
- m_adj(m, ETHER_VLAN_ENCAP_LEN);
+ error = vtnet_rxq_enqueue_buf(rxq, m);
+ if (error)
+ m_freem(m);
+
+ return (error);
}
-#ifdef notyet
+/*
+ * Use the checksum offset in the VirtIO header to set the
+ * correct CSUM_* flags.
+ */
static int
-vtnet_rx_csum(struct vtnet_softc *sc, struct mbuf *m,
- struct virtio_net_hdr *hdr)
+vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m,
+ uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
{
- struct ether_header *eh;
- struct ether_vlan_header *evh;
- struct ip *ip;
- struct ip6_hdr *ip6;
- struct udphdr *udp;
- int ip_offset, csum_start, csum_offset, hlen;
- uint16_t eth_type;
- uint8_t ip_proto;
-
- /*
- * Convert the VirtIO checksum interface to FreeBSD's interface.
- * The host only provides us with the offset at which to start
- * checksumming, and the offset from that to place the completed
- * checksum. While this maps well with how Linux does checksums,
- * for FreeBSD, we must parse the received packet in order to set
- * the appropriate CSUM_* flags.
- */
-
- /*
- * Every mbuf added to the receive virtqueue is always at least
- * MCLBYTES big, so assume something is amiss if the first mbuf
- * does not contain both the Ethernet and protocol headers.
- */
- ip_offset = sizeof(struct ether_header);
- if (m->m_len < ip_offset)
- return (1);
+ struct vtnet_softc *sc;
+#if defined(INET) || defined(INET6)
+ int offset = hdr->csum_start + hdr->csum_offset;
+#endif
- eh = mtod(m, struct ether_header *);
- eth_type = ntohs(eh->ether_type);
- if (eth_type == ETHERTYPE_VLAN) {
- ip_offset = sizeof(struct ether_vlan_header);
- if (m->m_len < ip_offset)
- return (1);
- evh = mtod(m, struct ether_vlan_header *);
- eth_type = ntohs(evh->evl_proto);
- }
+ sc = rxq->vtnrx_sc;
+ /* Only do a basic sanity check on the offset. */
switch (eth_type) {
+#if defined(INET)
case ETHERTYPE_IP:
- if (m->m_len < ip_offset + sizeof(struct ip))
- return (1);
-
- ip = (struct ip *)(mtod(m, uint8_t *) + ip_offset);
- /* Sanity check the IP header. */
- if (ip->ip_v != IPVERSION)
- return (1);
- hlen = ip->ip_hl << 2;
- if (hlen < sizeof(struct ip))
- return (1);
- if (ntohs(ip->ip_len) < hlen)
+ if (__predict_false(offset < ip_start + sizeof(struct ip)))
return (1);
- if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset))
- return (1);
-
- ip_proto = ip->ip_p;
- csum_start = ip_offset + hlen;
break;
-
+#endif
+#if defined(INET6)
case ETHERTYPE_IPV6:
- if (m->m_len < ip_offset + sizeof(struct ip6_hdr))
+ if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
return (1);
-
- /*
- * XXX FreeBSD does not handle any IPv6 checksum offloading
- * at the moment.
- */
-
- ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + ip_offset);
- /* XXX Assume no extension headers are present. */
- ip_proto = ip6->ip6_nxt;
- csum_start = ip_offset + sizeof(struct ip6_hdr);
break;
-
+#endif
default:
sc->vtnet_stats.rx_csum_bad_ethtype++;
return (1);
}
- /* Assume checksum begins right after the IP header. */
- if (hdr->csum_start != csum_start) {
- sc->vtnet_stats.rx_csum_bad_start++;
- return (1);
- }
-
- switch (ip_proto) {
- case IPPROTO_TCP:
- csum_offset = offsetof(struct tcphdr, th_sum);
- break;
-
- case IPPROTO_UDP:
- csum_offset = offsetof(struct udphdr, uh_sum);
+ /*
+ * Use the offset to determine the appropriate CSUM_* flags. This is
+ * a bit dirty, but we can get by with it since the checksum offsets
+ * happen to be different. We assume the host host does not do IPv4
+ * header checksum offloading.
+ */
+ switch (hdr->csum_offset) {
+ case offsetof(struct udphdr, uh_sum):
+ case offsetof(struct tcphdr, th_sum):
+ m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+ m->m_pkthdr.csum_data = 0xFFFF;
break;
-
- case IPPROTO_SCTP:
- csum_offset = offsetof(struct sctphdr, checksum);
+ case offsetof(struct sctphdr, checksum):
+ m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
break;
-
default:
- sc->vtnet_stats.rx_csum_bad_ipproto++;
- return (1);
- }
-
- if (hdr->csum_offset != csum_offset) {
sc->vtnet_stats.rx_csum_bad_offset++;
return (1);
}
- /*
- * The IP header checksum is almost certainly valid but I'm
- * uncertain if that is guaranteed.
- *
- * m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
- */
+ return (0);
+}
- switch (ip_proto) {
- case IPPROTO_UDP:
- if (m->m_len < csum_start + sizeof(struct udphdr))
- return (1);
+static int
+vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m,
+ uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
+{
+ struct vtnet_softc *sc;
+ int offset, proto;
- udp = (struct udphdr *)(mtod(m, uint8_t *) + csum_start);
- if (udp->uh_sum == 0)
- return (0);
+ sc = rxq->vtnrx_sc;
- /* FALLTHROUGH */
+ switch (eth_type) {
+#if defined(INET)
+ case ETHERTYPE_IP: {
+ struct ip *ip;
+ if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
+ return (1);
+ ip = (struct ip *)(m->m_data + ip_start);
+ proto = ip->ip_p;
+ offset = ip_start + (ip->ip_hl << 2);
+ break;
+ }
+#endif
+#if defined(INET6)
+ case ETHERTYPE_IPV6:
+ if (__predict_false(m->m_len < ip_start +
+ sizeof(struct ip6_hdr)))
+ return (1);
+ offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
+ if (__predict_false(offset < 0))
+ return (1);
+ break;
+#endif
+ default:
+ sc->vtnet_stats.rx_csum_bad_ethtype++;
+ return (1);
+ }
+ switch (proto) {
case IPPROTO_TCP:
+ if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
+ return (1);
+ m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+ m->m_pkthdr.csum_data = 0xFFFF;
+ break;
+ case IPPROTO_UDP:
+ if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
+ return (1);
m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
m->m_pkthdr.csum_data = 0xFFFF;
break;
-
case IPPROTO_SCTP:
+ if (__predict_false(m->m_len < offset + sizeof(struct sctphdr)))
+ return (1);
m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
break;
+ default:
+ /*
+ * For the remaining protocols, FreeBSD does not support
+ * checksum offloading, so the checksum will be recomputed.
+ */
+#if 0
+ if_printf(sc->vtnet_ifp, "cksum offload of unsupported "
+ "protocol eth_type=%#x proto=%d csum_start=%d
+ "csum_offset=%d\n", __func__, eth_type, proto,
+ hdr->csum_start, hdr->csum_offset);
+#endif
+ break;
}
- sc->vtnet_stats.rx_csum_offloaded++;
-
return (0);
}
-#endif
/*
- * Alternative method of doing receive checksum offloading. Rather
- * than parsing the received frame down to the IP header, use the
- * csum_offset to determine which CSUM_* flags are appropriate. We
- * can get by with doing this only because the checksum offsets are
- * unique for the things we care about.
+ * Set the appropriate CSUM_* flags. Unfortunately, the information
+ * provided is not directly useful to us. The VirtIO header gives the
+ * offset of the checksum, which is all Linux needs, but this is not
+ * how FreeBSD does things. We are forced to peek inside the packet
+ * a bit.
+ *
+ * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
+ * could accept the offsets and let the stack figure it out.
*/
static int
-vtnet_rx_csum(struct vtnet_softc *sc, struct mbuf *m,
+vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
struct virtio_net_hdr *hdr)
{
struct ether_header *eh;
struct ether_vlan_header *evh;
- struct udphdr *udp;
- int csum_len;
uint16_t eth_type;
-
- csum_len = hdr->csum_start + hdr->csum_offset;
-
- if (csum_len < sizeof(struct ether_header) + sizeof(struct ip))
- return (1);
- if (m->m_len < csum_len)
- return (1);
+ int offset, error;
eh = mtod(m, struct ether_header *);
eth_type = ntohs(eh->ether_type);
if (eth_type == ETHERTYPE_VLAN) {
+ /* BMV: We should handle nested VLAN tags too. */
evh = mtod(m, struct ether_vlan_header *);
eth_type = ntohs(evh->evl_proto);
- }
-
- if (eth_type != ETHERTYPE_IP && eth_type != ETHERTYPE_IPV6) {
- sc->vtnet_stats.rx_csum_bad_ethtype++;
- return (1);
- }
-
- /* Use the offset to determine the appropriate CSUM_* flags. */
- switch (hdr->csum_offset) {
- case offsetof(struct udphdr, uh_sum):
- if (m->m_len < hdr->csum_start + sizeof(struct udphdr))
- return (1);
- udp = (struct udphdr *)(mtod(m, uint8_t *) + hdr->csum_start);
- if (udp->uh_sum == 0)
- return (0);
+ offset = sizeof(struct ether_vlan_header);
+ } else
+ offset = sizeof(struct ether_header);
- /* FALLTHROUGH */
+ if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
+ error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr);
+ else
+ error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr);
- case offsetof(struct tcphdr, th_sum):
- m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
- m->m_pkthdr.csum_data = 0xFFFF;
- break;
+ return (error);
+}
- case offsetof(struct sctphdr, checksum):
- m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
- break;
+static void
+vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
+{
+ struct mbuf *m;
- default:
- sc->vtnet_stats.rx_csum_bad_offset++;
- return (1);
+ while (--nbufs > 0) {
+ m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
+ if (m == NULL)
+ break;
+ vtnet_rxq_discard_buf(rxq, m);
}
+}
- sc->vtnet_stats.rx_csum_offloaded++;
+static void
+vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
+{
+ int error;
- return (0);
+ /*
+ * Requeue the discarded mbuf. This should always be successful
+ * since it was just dequeued.
+ */
+ error = vtnet_rxq_enqueue_buf(rxq, m);
+ KASSERT(error == 0,
+ ("%s: cannot requeue discarded mbuf %d", __func__, error));
}
static int
-vtnet_rxeof_merged(struct vtnet_softc *sc, struct mbuf *m_head, int nbufs)
+vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
{
+ struct vtnet_softc *sc;
struct ifnet *ifp;
struct virtqueue *vq;
struct mbuf *m, *m_tail;
int len;
+ sc = rxq->vtnrx_sc;
+ vq = rxq->vtnrx_vq;
ifp = sc->vtnet_ifp;
- vq = sc->vtnet_rx_vq;
m_tail = m_head;
while (--nbufs > 0) {
m = virtqueue_dequeue(vq, &len);
if (m == NULL) {
- ifp->if_ierrors++;
+ rxq->vtnrx_stats.vrxs_ierrors++;
goto fail;
}
- if (vtnet_newbuf(sc) != 0) {
- ifp->if_iqdrops++;
- vtnet_discard_rxbuf(sc, m);
+ if (vtnet_rxq_new_buf(rxq) != 0) {
+ rxq->vtnrx_stats.vrxs_iqdrops++;
+ vtnet_rxq_discard_buf(rxq, m);
if (nbufs > 1)
- vtnet_discard_merged_rxbuf(sc, nbufs);
+ vtnet_rxq_discard_merged_bufs(rxq, nbufs);
goto fail;
}
@@ -1549,35 +1654,83 @@ fail:
return (1);
}
+static void
+vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
+ struct virtio_net_hdr *hdr)
+{
+ struct vtnet_softc *sc;
+ struct ifnet *ifp;
+ struct ether_header *eh;
+
+ sc = rxq->vtnrx_sc;
+ ifp = sc->vtnet_ifp;
+
+ if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
+ eh = mtod(m, struct ether_header *);
+ if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
+ vtnet_vlan_tag_remove(m);
+ /*
+ * With the 802.1Q header removed, update the
+ * checksum starting location accordingly.
+ */
+ if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
+ hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
+ }
+ }
+
+ m->m_pkthdr.flowid = rxq->vtnrx_id;
+ m->m_flags |= M_FLOWID;
+
+ /*
+ * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum
+ * distinction that Linux does. Need to reevaluate if performing
+ * offloading for the NEEDS_CSUM case is really appropriate.
+ */
+ if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
+ VIRTIO_NET_HDR_F_DATA_VALID)) {
+ if (vtnet_rxq_csum(rxq, m, hdr) == 0)
+ rxq->vtnrx_stats.vrxs_csum++;
+ else
+ rxq->vtnrx_stats.vrxs_csum_failed++;
+ }
+
+ rxq->vtnrx_stats.vrxs_ipackets++;
+ rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
+
+ /* VTNET_RXQ_UNLOCK(rxq); */
+ (*ifp->if_input)(ifp, m);
+ /* VTNET_RXQ_LOCK(rxq); */
+}
+
static int
-vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp)
+vtnet_rxq_eof(struct vtnet_rxq *rxq)
{
- struct virtio_net_hdr lhdr;
+ struct virtio_net_hdr lhdr, *hdr;
+ struct vtnet_softc *sc;
struct ifnet *ifp;
struct virtqueue *vq;
struct mbuf *m;
- struct ether_header *eh;
- struct virtio_net_hdr *hdr;
struct virtio_net_hdr_mrg_rxbuf *mhdr;
- int len, deq, nbufs, adjsz, rx_npkts;
+ int len, deq, nbufs, adjsz, count;
+ sc = rxq->vtnrx_sc;
+ vq = rxq->vtnrx_vq;
ifp = sc->vtnet_ifp;
- vq = sc->vtnet_rx_vq;
hdr = &lhdr;
deq = 0;
- rx_npkts = 0;
+ count = sc->vtnet_rx_process_limit;
- VTNET_LOCK_ASSERT(sc);
+ VTNET_RXQ_LOCK_ASSERT(rxq);
- while (--count >= 0) {
+ while (count-- > 0) {
m = virtqueue_dequeue(vq, &len);
if (m == NULL)
break;
deq++;
if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
- ifp->if_ierrors++;
- vtnet_discard_rxbuf(sc, m);
+ rxq->vtnrx_stats.vrxs_ierrors++;
+ vtnet_rxq_discard_buf(rxq, m);
continue;
}
@@ -1585,8 +1738,8 @@ vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp)
nbufs = 1;
adjsz = sizeof(struct vtnet_rx_header);
/*
- * Account for our pad between the header and
- * the actual start of the frame.
+ * Account for our pad inserted between the header
+ * and the actual start of the frame.
*/
len += VTNET_RX_HEADER_PAD;
} else {
@@ -1595,11 +1748,11 @@ vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp)
adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
}
- if (vtnet_replace_rxbuf(sc, m, len) != 0) {
- ifp->if_iqdrops++;
- vtnet_discard_rxbuf(sc, m);
+ if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
+ rxq->vtnrx_stats.vrxs_iqdrops++;
+ vtnet_rxq_discard_buf(rxq, m);
if (nbufs > 1)
- vtnet_discard_merged_rxbuf(sc, nbufs);
+ vtnet_rxq_discard_merged_bufs(rxq, nbufs);
continue;
}
@@ -1608,263 +1761,297 @@ vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp)
m->m_pkthdr.csum_flags = 0;
if (nbufs > 1) {
- if (vtnet_rxeof_merged(sc, m, nbufs) != 0)
+ /* Dequeue the rest of chain. */
+ if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
continue;
}
- ifp->if_ipackets++;
-
/*
* Save copy of header before we strip it. For both mergeable
- * and non-mergeable, the VirtIO header is placed first in the
- * mbuf's data. We no longer need num_buffers, so always use a
- * virtio_net_hdr.
+ * and non-mergeable, the header is at the beginning of the
+ * mbuf data. We no longer need num_buffers, so always use a
+ * regular header.
+ *
+ * BMV: Is this memcpy() expensive? We know the mbuf data is
+ * still valid even after the m_adj().
*/
memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
m_adj(m, adjsz);
- if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
- eh = mtod(m, struct ether_header *);
- if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
- vtnet_vlan_tag_remove(m);
-
- /*
- * With the 802.1Q header removed, update the
- * checksum starting location accordingly.
- */
- if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
- hdr->csum_start -=
- ETHER_VLAN_ENCAP_LEN;
- }
- }
-
- if (ifp->if_capenable & IFCAP_RXCSUM &&
- hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
- if (vtnet_rx_csum(sc, m, hdr) != 0)
- sc->vtnet_stats.rx_csum_failed++;
- }
-
- VTNET_UNLOCK(sc);
- rx_npkts++;
- (*ifp->if_input)(ifp, m);
- VTNET_LOCK(sc);
-
- /*
- * The interface may have been stopped while we were
- * passing the packet up the network stack.
- */
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
- break;
+ vtnet_rxq_input(rxq, m, hdr);
}
if (deq > 0)
virtqueue_notify(vq);
- if (rx_npktsp != NULL)
- *rx_npktsp = rx_npkts;
-
return (count > 0 ? 0 : EAGAIN);
}
static void
-vtnet_rx_vq_intr(void *xsc)
+vtnet_rx_vq_intr(void *xrxq)
{
struct vtnet_softc *sc;
+ struct vtnet_rxq *rxq;
struct ifnet *ifp;
- int more;
+ int tries, more;
- sc = xsc;
+ rxq = xrxq;
+ sc = rxq->vtnrx_sc;
ifp = sc->vtnet_ifp;
+ tries = 0;
+
+ if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
+ /*
+ * Ignore this interrupt. Either this is a spurious interrupt
+ * or multiqueue without per-VQ MSIX so every queue needs to
+ * be polled (a brain dead configuration we could try harder
+ * to avoid).
+ */
+ vtnet_rxq_disable_intr(rxq);
+ return;
+ }
again:
- VTNET_LOCK(sc);
+ VTNET_RXQ_LOCK(rxq);
-#ifdef DEVICE_POLLING
- if (ifp->if_capenable & IFCAP_POLLING) {
- VTNET_UNLOCK(sc);
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ VTNET_RXQ_UNLOCK(rxq);
return;
}
-#endif
+
+ more = vtnet_rxq_eof(rxq);
+ if (more || vtnet_rxq_enable_intr(rxq) != 0) {
+ if (!more)
+ vtnet_rxq_disable_intr(rxq);
+ /*
+ * This is an occasional condition or race (when !more),
+ * so retry a few times before scheduling the taskqueue.
+ */
+ rxq->vtnrx_stats.vrxs_rescheduled++;
+ VTNET_RXQ_UNLOCK(rxq);
+ if (tries++ < VTNET_INTR_DISABLE_RETRIES)
+ goto again;
+ taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
+ } else
+ VTNET_RXQ_UNLOCK(rxq);
+}
+
+static void
+vtnet_rxq_tq_intr(void *xrxq, int pending)
+{
+ struct vtnet_softc *sc;
+ struct vtnet_rxq *rxq;
+ struct ifnet *ifp;
+ int more;
+
+ rxq = xrxq;
+ sc = rxq->vtnrx_sc;
+ ifp = sc->vtnet_ifp;
+
+ VTNET_RXQ_LOCK(rxq);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
- vtnet_enable_rx_intr(sc);
- VTNET_UNLOCK(sc);
+ VTNET_RXQ_UNLOCK(rxq);
return;
}
- more = vtnet_rxeof(sc, sc->vtnet_rx_process_limit, NULL);
- if (more || vtnet_enable_rx_intr(sc) != 0) {
+ more = vtnet_rxq_eof(rxq);
+ if (more || vtnet_rxq_enable_intr(rxq) != 0) {
if (!more)
- vtnet_disable_rx_intr(sc);
- sc->vtnet_stats.rx_task_rescheduled++;
- VTNET_UNLOCK(sc);
- goto again;
+ vtnet_rxq_disable_intr(rxq);
+ rxq->vtnrx_stats.vrxs_rescheduled++;
+ taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
}
- VTNET_UNLOCK(sc);
+ VTNET_RXQ_UNLOCK(rxq);
}
static void
-vtnet_txeof(struct vtnet_softc *sc)
+vtnet_txq_free_mbufs(struct vtnet_txq *txq)
{
struct virtqueue *vq;
- struct ifnet *ifp;
struct vtnet_tx_header *txhdr;
- int deq;
-
- vq = sc->vtnet_tx_vq;
- ifp = sc->vtnet_ifp;
- deq = 0;
+ int last;
- VTNET_LOCK_ASSERT(sc);
+ vq = txq->vtntx_vq;
+ last = 0;
- while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
- deq++;
- ifp->if_opackets++;
+ while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
m_freem(txhdr->vth_mbuf);
uma_zfree(vtnet_tx_header_zone, txhdr);
}
- if (deq > 0) {
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- if (virtqueue_empty(vq))
- sc->vtnet_watchdog_timer = 0;
- }
+ KASSERT(virtqueue_empty(vq),
+ ("%s: mbufs remaining in tx queue %p", __func__, txq));
}
-static struct mbuf *
-vtnet_tx_offload(struct vtnet_softc *sc, struct mbuf *m,
- struct virtio_net_hdr *hdr)
+/*
+ * BMV: Much of this can go away once we finally have offsets in
+ * the mbuf packet header. Bug andre@.
+ */
+static int
+vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m,
+ int *etype, int *proto, int *start)
{
- struct ifnet *ifp;
- struct ether_header *eh;
+ struct vtnet_softc *sc;
struct ether_vlan_header *evh;
- struct ip *ip;
- struct ip6_hdr *ip6;
- struct tcphdr *tcp;
- int ip_offset;
- uint16_t eth_type, csum_start;
- uint8_t ip_proto, gso_type;
+ int offset;
- ifp = sc->vtnet_ifp;
+ sc = txq->vtntx_sc;
- ip_offset = sizeof(struct ether_header);
- if (m->m_len < ip_offset) {
- if ((m = m_pullup(m, ip_offset)) == NULL)
- return (NULL);
+ evh = mtod(m, struct ether_vlan_header *);
+ if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
+ /* BMV: We should handle nested VLAN tags too. */
+ *etype = ntohs(evh->evl_proto);
+ offset = sizeof(struct ether_vlan_header);
+ } else {
+ *etype = ntohs(evh->evl_encap_proto);
+ offset = sizeof(struct ether_header);
+ }
+
+ switch (*etype) {
+#if defined(INET)
+ case ETHERTYPE_IP: {
+ struct ip *ip, iphdr;
+ if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
+ m_copydata(m, offset, sizeof(struct ip),
+ (caddr_t) &iphdr);
+ ip = &iphdr;
+ } else
+ ip = (struct ip *)(m->m_data + offset);
+ *proto = ip->ip_p;
+ *start = offset + (ip->ip_hl << 2);
+ break;
}
-
- eh = mtod(m, struct ether_header *);
- eth_type = ntohs(eh->ether_type);
- if (eth_type == ETHERTYPE_VLAN) {
- ip_offset = sizeof(struct ether_vlan_header);
- if (m->m_len < ip_offset) {
- if ((m = m_pullup(m, ip_offset)) == NULL)
- return (NULL);
- }
- evh = mtod(m, struct ether_vlan_header *);
- eth_type = ntohs(evh->evl_proto);
+#endif
+#if defined(INET6)
+ case ETHERTYPE_IPV6:
+ *proto = -1;
+ *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
+ /* Assert the network stack sent us a valid packet. */
+ KASSERT(*start > offset,
+ ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
+ *start, offset, *proto));
+ break;
+#endif
+ default:
+ sc->vtnet_stats.tx_csum_bad_ethtype++;
+ return (EINVAL);
}
- switch (eth_type) {
- case ETHERTYPE_IP:
- if (m->m_len < ip_offset + sizeof(struct ip)) {
- m = m_pullup(m, ip_offset + sizeof(struct ip));
- if (m == NULL)
- return (NULL);
- }
+ return (0);
+}
- ip = (struct ip *)(mtod(m, uint8_t *) + ip_offset);
- ip_proto = ip->ip_p;
- csum_start = ip_offset + (ip->ip_hl << 2);
- gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
- break;
+static int
+vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
+ int offset, struct virtio_net_hdr *hdr)
+{
+ static struct timeval lastecn;
+ static int curecn;
+ struct vtnet_softc *sc;
+ struct tcphdr *tcp, tcphdr;
- case ETHERTYPE_IPV6:
- if (m->m_len < ip_offset + sizeof(struct ip6_hdr)) {
- m = m_pullup(m, ip_offset + sizeof(struct ip6_hdr));
- if (m == NULL)
- return (NULL);
- }
+ sc = txq->vtntx_sc;
+
+ if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
+ m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
+ tcp = &tcphdr;
+ } else
+ tcp = (struct tcphdr *)(m->m_data + offset);
+
+ hdr->hdr_len = offset + (tcp->th_off << 2);
+ hdr->gso_size = m->m_pkthdr.tso_segsz;
+ hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
+ VIRTIO_NET_HDR_GSO_TCPV6;
- ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + ip_offset);
+ if (tcp->th_flags & TH_CWR) {
/*
- * XXX Assume no extension headers are present. Presently,
- * this will always be true in the case of TSO, and FreeBSD
- * does not perform checksum offloading of IPv6 yet.
+ * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD,
+ * ECN support is not on a per-interface basis, but globally via
+ * the net.inet.tcp.ecn.enable sysctl knob. The default is off.
*/
- ip_proto = ip6->ip6_nxt;
- csum_start = ip_offset + sizeof(struct ip6_hdr);
- gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
- break;
-
- default:
- return (m);
+ if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
+ if (ppsratecheck(&lastecn, &curecn, 1))
+ if_printf(sc->vtnet_ifp,
+ "TSO with ECN not negotiated with host\n");
+ return (ENOTSUP);
+ }
+ hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
}
- if (m->m_pkthdr.csum_flags & VTNET_CSUM_OFFLOAD) {
- hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = csum_start;
- hdr->csum_offset = m->m_pkthdr.csum_data;
+ txq->vtntx_stats.vtxs_tso++;
- sc->vtnet_stats.tx_csum_offloaded++;
- }
+ return (0);
+}
- if (m->m_pkthdr.csum_flags & CSUM_TSO) {
- if (ip_proto != IPPROTO_TCP)
- return (m);
+static struct mbuf *
+vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
+ struct virtio_net_hdr *hdr)
+{
+ struct vtnet_softc *sc;
+ int flags, etype, csum_start, proto, error;
- if (m->m_len < csum_start + sizeof(struct tcphdr)) {
- m = m_pullup(m, csum_start + sizeof(struct tcphdr));
- if (m == NULL)
- return (NULL);
- }
+ sc = txq->vtntx_sc;
+ flags = m->m_pkthdr.csum_flags;
- tcp = (struct tcphdr *)(mtod(m, uint8_t *) + csum_start);
- hdr->gso_type = gso_type;
- hdr->hdr_len = csum_start + (tcp->th_off << 2);
- hdr->gso_size = m->m_pkthdr.tso_segsz;
+ error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
+ if (error)
+ goto drop;
- if (tcp->th_flags & TH_CWR) {
- /*
- * Drop if we did not negotiate VIRTIO_NET_F_HOST_ECN.
- * ECN support is only configurable globally with the
- * net.inet.tcp.ecn.enable sysctl knob.
- */
- if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
- if_printf(ifp, "TSO with ECN not supported "
- "by host\n");
- m_freem(m);
- return (NULL);
- }
+ if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) ||
+ (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) {
+ /*
+ * We could compare the IP protocol vs the CSUM_ flag too,
+ * but that really should not be necessary.
+ */
+ hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ hdr->csum_start = csum_start;
+ hdr->csum_offset = m->m_pkthdr.csum_data;
+ txq->vtntx_stats.vtxs_csum++;
+ }
- hdr->flags |= VIRTIO_NET_HDR_GSO_ECN;
+ if (flags & CSUM_TSO) {
+ if (__predict_false(proto != IPPROTO_TCP)) {
+ /* Likely failed to correctly parse the mbuf. */
+ sc->vtnet_stats.tx_tso_not_tcp++;
+ goto drop;
}
- sc->vtnet_stats.tx_tso_offloaded++;
+ KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM,
+ ("%s: mbuf %p TSO without checksum offload", __func__, m));
+
+ error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
+ if (error)
+ goto drop;
}
return (m);
+
+drop:
+ m_freem(m);
+ return (NULL);
}
static int
-vtnet_enqueue_txbuf(struct vtnet_softc *sc, struct mbuf **m_head,
+vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
struct vtnet_tx_header *txhdr)
{
struct sglist sg;
struct sglist_seg segs[VTNET_MAX_TX_SEGS];
+ struct vtnet_softc *sc;
struct virtqueue *vq;
struct mbuf *m;
int collapsed, error;
- vq = sc->vtnet_tx_vq;
+ vq = txq->vtntx_vq;
+ sc = txq->vtntx_sc;
m = *m_head;
collapsed = 0;
sglist_init(&sg, VTNET_MAX_TX_SEGS, segs);
error = sglist_append(&sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
KASSERT(error == 0 && sg.sg_nseg == 1,
- ("%s: cannot add header to sglist error %d", __func__, error));
+ ("%s: error %d adding header to sglist", __func__, error));
again:
error = sglist_append_mbuf(&sg, m);
@@ -1878,12 +2065,14 @@ again:
*m_head = m;
collapsed = 1;
+ txq->vtntx_stats.vtxs_collapsed++;
goto again;
}
txhdr->vth_mbuf = m;
+ error = virtqueue_enqueue(vq, txhdr, &sg, sg.sg_nseg, 0);
- return (virtqueue_enqueue(vq, txhdr, &sg, sg.sg_nseg, 0));
+ return (error);
fail:
m_freem(*m_head);
@@ -1893,28 +2082,29 @@ fail:
}
static int
-vtnet_encap(struct vtnet_softc *sc, struct mbuf **m_head)
+vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head)
{
+ struct vtnet_softc *sc;
struct vtnet_tx_header *txhdr;
struct virtio_net_hdr *hdr;
struct mbuf *m;
int error;
+ sc = txq->vtntx_sc;
m = *m_head;
M_ASSERTPKTHDR(m);
txhdr = uma_zalloc(vtnet_tx_header_zone, M_NOWAIT | M_ZERO);
if (txhdr == NULL) {
- *m_head = NULL;
m_freem(m);
+ *m_head = NULL;
return (ENOMEM);
}
/*
- * Always use the non-mergeable header to simplify things. When
- * the mergeable feature is negotiated, the num_buffers field
- * must be set to zero. We use vtnet_hdr_size later to enqueue
- * the correct header size to the host.
+ * Always use the non-mergeable header, regardless if the feature
+ * was negotiated. For transmit, num_buffers is always zero. The
+ * vtnet_hdr_size is used to enqueue the correct header size.
*/
hdr = &txhdr->vth_uhdr.hdr;
@@ -1927,72 +2117,55 @@ vtnet_encap(struct vtnet_softc *sc, struct mbuf **m_head)
m->m_flags &= ~M_VLANTAG;
}
- if (m->m_pkthdr.csum_flags != 0) {
- m = vtnet_tx_offload(sc, m, hdr);
+ if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
+ m = vtnet_txq_offload(txq, m, hdr);
if ((*m_head = m) == NULL) {
error = ENOBUFS;
goto fail;
}
}
- error = vtnet_enqueue_txbuf(sc, m_head, txhdr);
+ error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
+ if (error == 0)
+ return (0);
+
fail:
- if (error)
- uma_zfree(vtnet_tx_header_zone, txhdr);
+ uma_zfree(vtnet_tx_header_zone, txhdr);
return (error);
}
-static void
-vtnet_start(struct ifnet *ifp)
-{
- struct vtnet_softc *sc;
-
- sc = ifp->if_softc;
-
- VTNET_LOCK(sc);
- vtnet_start_locked(ifp);
- VTNET_UNLOCK(sc);
-}
+#ifdef VTNET_LEGACY_TX
static void
-vtnet_start_locked(struct ifnet *ifp)
+vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
{
struct vtnet_softc *sc;
struct virtqueue *vq;
struct mbuf *m0;
int enq;
- sc = ifp->if_softc;
- vq = sc->vtnet_tx_vq;
+ sc = txq->vtntx_sc;
+ vq = txq->vtntx_vq;
enq = 0;
- VTNET_LOCK_ASSERT(sc);
+ VTNET_TXQ_LOCK_ASSERT(txq);
- if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
- IFF_DRV_RUNNING || ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0))
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+ sc->vtnet_link_active == 0)
return;
-#ifdef VTNET_TX_INTR_MODERATION
- if (virtqueue_nused(vq) >= sc->vtnet_tx_size / 2)
- vtnet_txeof(sc);
-#endif
-
while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
- if (virtqueue_full(vq)) {
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ if (virtqueue_full(vq))
break;
- }
IFQ_DRV_DEQUEUE(&ifp->if_snd, m0);
if (m0 == NULL)
break;
- if (vtnet_encap(sc, &m0) != 0) {
- if (m0 == NULL)
- break;
- IFQ_DRV_PREPEND(&ifp->if_snd, m0);
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ if (vtnet_txq_encap(txq, &m0) != 0) {
+ if (m0 != NULL)
+ IFQ_DRV_PREPEND(&ifp->if_snd, m0);
break;
}
@@ -2002,65 +2175,529 @@ vtnet_start_locked(struct ifnet *ifp)
if (enq > 0) {
virtqueue_notify(vq);
- sc->vtnet_watchdog_timer = VTNET_WATCHDOG_TIMEOUT;
+ txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
}
}
static void
-vtnet_tick(void *xsc)
+vtnet_start(struct ifnet *ifp)
{
struct vtnet_softc *sc;
+ struct vtnet_txq *txq;
- sc = xsc;
+ sc = ifp->if_softc;
+ txq = &sc->vtnet_txqs[0];
- VTNET_LOCK_ASSERT(sc);
-#ifdef VTNET_DEBUG
- virtqueue_dump(sc->vtnet_rx_vq);
- virtqueue_dump(sc->vtnet_tx_vq);
-#endif
+ VTNET_TXQ_LOCK(txq);
+ vtnet_start_locked(txq, ifp);
+ VTNET_TXQ_UNLOCK(txq);
+}
- vtnet_watchdog(sc);
- callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
+#else /* !VTNET_LEGACY_TX */
+
+static int
+vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
+{
+ struct vtnet_softc *sc;
+ struct virtqueue *vq;
+ struct buf_ring *br;
+ struct ifnet *ifp;
+ int enq, error;
+
+ sc = txq->vtntx_sc;
+ vq = txq->vtntx_vq;
+ br = txq->vtntx_br;
+ ifp = sc->vtnet_ifp;
+ enq = 0;
+ error = 0;
+
+ VTNET_TXQ_LOCK_ASSERT(txq);
+
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+ sc->vtnet_link_active == 0) {
+ if (m != NULL)
+ error = drbr_enqueue(ifp, br, m);
+ return (error);
+ }
+
+ if (m != NULL) {
+ error = drbr_enqueue(ifp, br, m);
+ if (error)
+ return (error);
+ }
+
+ while ((m = drbr_peek(ifp, br)) != NULL) {
+ error = vtnet_txq_encap(txq, &m);
+ if (error) {
+ if (m != NULL)
+ drbr_putback(ifp, br, m);
+ else
+ drbr_advance(ifp, br);
+ break;
+ }
+ drbr_advance(ifp, br);
+
+ enq++;
+ ETHER_BPF_MTAP(ifp, m);
+ }
+
+ if (enq > 0) {
+ virtqueue_notify(vq);
+ txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
+ }
+
+ return (error);
+}
+
+static int
+vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
+{
+ struct vtnet_softc *sc;
+ struct vtnet_txq *txq;
+ int i, npairs, error;
+
+ sc = ifp->if_softc;
+ npairs = sc->vtnet_act_vq_pairs;
+
+ if (m->m_flags & M_FLOWID)
+ i = m->m_pkthdr.flowid % npairs;
+ else
+ i = curcpu % npairs;
+
+ txq = &sc->vtnet_txqs[i];
+
+ if (VTNET_TXQ_TRYLOCK(txq) != 0) {
+ error = vtnet_txq_mq_start_locked(txq, m);
+ VTNET_TXQ_UNLOCK(txq);
+ } else {
+ error = drbr_enqueue(ifp, txq->vtntx_br, m);
+ taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
+ }
+
+ return (error);
}
static void
-vtnet_tx_vq_intr(void *xsc)
+vtnet_txq_tq_deferred(void *xtxq, int pending)
{
struct vtnet_softc *sc;
+ struct vtnet_txq *txq;
+
+ txq = xtxq;
+ sc = txq->vtntx_sc;
+
+ VTNET_TXQ_LOCK(txq);
+ if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
+ vtnet_txq_mq_start_locked(txq, NULL);
+ VTNET_TXQ_UNLOCK(txq);
+}
+
+#endif /* VTNET_LEGACY_TX */
+
+static void
+vtnet_txq_tq_intr(void *xtxq, int pending)
+{
+ struct vtnet_softc *sc;
+ struct vtnet_txq *txq;
struct ifnet *ifp;
- sc = xsc;
+ txq = xtxq;
+ sc = txq->vtntx_sc;
ifp = sc->vtnet_ifp;
-again:
- VTNET_LOCK(sc);
+ VTNET_TXQ_LOCK(txq);
-#ifdef DEVICE_POLLING
- if (ifp->if_capenable & IFCAP_POLLING) {
- VTNET_UNLOCK(sc);
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ VTNET_TXQ_UNLOCK(txq);
return;
}
+
+ vtnet_txq_eof(txq);
+
+#ifdef VTNET_LEGACY_TX
+ if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ vtnet_start_locked(txq, ifp);
+#else
+ if (!drbr_empty(ifp, txq->vtntx_br))
+ vtnet_txq_mq_start_locked(txq, NULL);
#endif
+ if (vtnet_txq_enable_intr(txq) != 0) {
+ vtnet_txq_disable_intr(txq);
+ txq->vtntx_stats.vtxs_rescheduled++;
+ taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
+ }
+
+ VTNET_TXQ_UNLOCK(txq);
+}
+
+static void
+vtnet_txq_eof(struct vtnet_txq *txq)
+{
+ struct virtqueue *vq;
+ struct vtnet_tx_header *txhdr;
+ struct mbuf *m;
+
+ vq = txq->vtntx_vq;
+ VTNET_TXQ_LOCK_ASSERT(txq);
+
+ while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
+ m = txhdr->vth_mbuf;
+
+ txq->vtntx_stats.vtxs_opackets++;
+ txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
+ if (m->m_flags & M_MCAST)
+ txq->vtntx_stats.vtxs_omcasts++;
+
+ m_freem(m);
+ uma_zfree(vtnet_tx_header_zone, txhdr);
+ }
+
+ if (virtqueue_empty(vq))
+ txq->vtntx_watchdog = 0;
+}
+
+static void
+vtnet_tx_vq_intr(void *xtxq)
+{
+ struct vtnet_softc *sc;
+ struct vtnet_txq *txq;
+ struct ifnet *ifp;
+ int tries;
+
+ txq = xtxq;
+ sc = txq->vtntx_sc;
+ ifp = sc->vtnet_ifp;
+ tries = 0;
+
+ if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
+ /*
+ * Ignore this interrupt. Either this is a spurious interrupt
+ * or multiqueue without per-VQ MSIX so every queue needs to
+ * be polled (a brain dead configuration we could try harder
+ * to avoid).
+ */
+ vtnet_txq_disable_intr(txq);
+ return;
+ }
+
+again:
+ VTNET_TXQ_LOCK(txq);
+
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
- vtnet_enable_tx_intr(sc);
- VTNET_UNLOCK(sc);
+ VTNET_TXQ_UNLOCK(txq);
return;
}
- vtnet_txeof(sc);
+ vtnet_txq_eof(txq);
+#ifdef VTNET_LEGACY_TX
if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
- vtnet_start_locked(ifp);
+ vtnet_start_locked(txq, ifp);
+#else
+ if (!drbr_empty(ifp, txq->vtntx_br))
+ vtnet_txq_mq_start_locked(txq, NULL);
+#endif
- if (vtnet_enable_tx_intr(sc) != 0) {
- vtnet_disable_tx_intr(sc);
- sc->vtnet_stats.tx_task_rescheduled++;
- VTNET_UNLOCK(sc);
- goto again;
+ if (vtnet_txq_enable_intr(txq) != 0) {
+ vtnet_txq_disable_intr(txq);
+ /*
+ * This is an occasional race, so retry a few times
+ * before scheduling the taskqueue.
+ */
+ VTNET_TXQ_UNLOCK(txq);
+ if (tries++ < VTNET_INTR_DISABLE_RETRIES)
+ goto again;
+ txq->vtntx_stats.vtxs_rescheduled++;
+ taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
+ } else
+ VTNET_TXQ_UNLOCK(txq);
+}
+
+static void
+vtnet_tx_start_all(struct vtnet_softc *sc)
+{
+ struct ifnet *ifp;
+ struct vtnet_txq *txq;
+ int i;
+
+ ifp = sc->vtnet_ifp;
+ VTNET_CORE_LOCK_ASSERT(sc);
+
+ for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
+ txq = &sc->vtnet_txqs[i];
+
+ VTNET_TXQ_LOCK(txq);
+#ifdef VTNET_LEGACY_TX
+ if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ vtnet_start_locked(txq, ifp);
+#else
+ if (!drbr_empty(ifp, txq->vtntx_br))
+ vtnet_txq_mq_start_locked(txq, NULL);
+#endif
+ VTNET_TXQ_UNLOCK(txq);
+ }
+}
+
+#ifndef VTNET_LEGACY_TX
+static void
+vtnet_qflush(struct ifnet *ifp)
+{
+ struct vtnet_softc *sc;
+ struct vtnet_txq *txq;
+ struct mbuf *m;
+ int i;
+
+ sc = ifp->if_softc;
+
+ for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
+ txq = &sc->vtnet_txqs[i];
+
+ VTNET_TXQ_LOCK(txq);
+ while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
+ m_freem(m);
+ VTNET_TXQ_UNLOCK(txq);
+ }
+
+ if_qflush(ifp);
+}
+#endif
+
+static int
+vtnet_watchdog(struct vtnet_txq *txq)
+{
+ struct vtnet_softc *sc;
+
+ sc = txq->vtntx_sc;
+
+ VTNET_TXQ_LOCK(txq);
+ if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
+ VTNET_TXQ_UNLOCK(txq);
+ return (0);
+ }
+ VTNET_TXQ_UNLOCK(txq);
+
+ if_printf(sc->vtnet_ifp, "watchdog timeout on queue %d\n",
+ txq->vtntx_id);
+ return (1);
+}
+
+static void
+vtnet_rxq_accum_stats(struct vtnet_rxq *rxq, struct vtnet_rxq_stats *accum)
+{
+ struct vtnet_rxq_stats *st;
+
+ st = &rxq->vtnrx_stats;
+
+ accum->vrxs_ipackets += st->vrxs_ipackets;
+ accum->vrxs_ibytes += st->vrxs_ibytes;
+ accum->vrxs_iqdrops += st->vrxs_iqdrops;
+ accum->vrxs_csum += st->vrxs_csum;
+ accum->vrxs_csum_failed += st->vrxs_csum_failed;
+ accum->vrxs_rescheduled += st->vrxs_rescheduled;
+}
+
+static void
+vtnet_txq_accum_stats(struct vtnet_txq *txq, struct vtnet_txq_stats *accum)
+{
+ struct vtnet_txq_stats *st;
+
+ st = &txq->vtntx_stats;
+
+ accum->vtxs_opackets += st->vtxs_opackets;
+ accum->vtxs_obytes += st->vtxs_obytes;
+ accum->vtxs_csum += st->vtxs_csum;
+ accum->vtxs_tso += st->vtxs_tso;
+ accum->vtxs_collapsed += st->vtxs_collapsed;
+ accum->vtxs_rescheduled += st->vtxs_rescheduled;
+}
+
+static void
+vtnet_accumulate_stats(struct vtnet_softc *sc)
+{
+ struct ifnet *ifp;
+ struct vtnet_statistics *st;
+ struct vtnet_rxq_stats rxaccum;
+ struct vtnet_txq_stats txaccum;
+ int i;
+
+ ifp = sc->vtnet_ifp;
+ st = &sc->vtnet_stats;
+ bzero(&rxaccum, sizeof(struct vtnet_rxq_stats));
+ bzero(&txaccum, sizeof(struct vtnet_txq_stats));
+
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ vtnet_rxq_accum_stats(&sc->vtnet_rxqs[i], &rxaccum);
+ vtnet_txq_accum_stats(&sc->vtnet_txqs[i], &txaccum);
+ }
+
+ st->rx_csum_offloaded = rxaccum.vrxs_csum;
+ st->rx_csum_failed = rxaccum.vrxs_csum_failed;
+ st->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
+ st->tx_csum_offloaded = txaccum.vtxs_csum;
+ st->tx_tso_offloaded = txaccum.vtxs_tso;
+ st->tx_task_rescheduled = txaccum.vtxs_rescheduled;
+
+ /*
+ * With the exception of if_ierrors, these ifnet statistics are
+ * only updated in the driver, so just set them to our accumulated
+ * values. if_ierrors is updated in ether_input() for malformed
+ * frames that we should have already discarded.
+ */
+ ifp->if_ipackets = rxaccum.vrxs_ipackets;
+ ifp->if_iqdrops = rxaccum.vrxs_iqdrops;
+ ifp->if_ierrors = rxaccum.vrxs_ierrors;
+ ifp->if_opackets = txaccum.vtxs_opackets;
+#ifndef VTNET_LEGACY_TX
+ ifp->if_obytes = txaccum.vtxs_obytes;
+ ifp->if_omcasts = txaccum.vtxs_omcasts;
+#endif
+}
+
+static void
+vtnet_tick(void *xsc)
+{
+ struct vtnet_softc *sc;
+ struct ifnet *ifp;
+ int i, timedout;
+
+ sc = xsc;
+ ifp = sc->vtnet_ifp;
+ timedout = 0;
+
+ VTNET_CORE_LOCK_ASSERT(sc);
+ vtnet_accumulate_stats(sc);
+
+ for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
+ timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
+
+ if (timedout != 0) {
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ vtnet_init_locked(sc);
+ } else
+ callout_schedule(&sc->vtnet_tick_ch, hz);
+}
+
+static void
+vtnet_start_taskqueues(struct vtnet_softc *sc)
+{
+ device_t dev;
+ struct vtnet_rxq *rxq;
+ struct vtnet_txq *txq;
+ int i, error;
+
+ dev = sc->vtnet_dev;
+
+ /*
+ * Errors here are very difficult to recover from - we cannot
+ * easily fail because, if this is during boot, we will hang
+ * when freeing any successfully started taskqueues because
+ * the scheduler isn't up yet.
+ *
+ * Most drivers just ignore the return value - it only fails
+ * with ENOMEM so an error is not likely.
+ */
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ rxq = &sc->vtnet_rxqs[i];
+ error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
+ "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
+ if (error) {
+ device_printf(dev, "failed to start rx taskq %d\n",
+ rxq->vtnrx_id);
+ }
+
+ txq = &sc->vtnet_txqs[i];
+ error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
+ "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
+ if (error) {
+ device_printf(dev, "failed to start tx taskq %d\n",
+ txq->vtntx_id);
+ }
+ }
+}
+
+static void
+vtnet_free_taskqueues(struct vtnet_softc *sc)
+{
+ struct vtnet_rxq *rxq;
+ struct vtnet_txq *txq;
+ int i;
+
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ rxq = &sc->vtnet_rxqs[i];
+ if (rxq->vtnrx_tq != NULL) {
+ taskqueue_free(rxq->vtnrx_tq);
+ rxq->vtnrx_vq = NULL;
+ }
+
+ txq = &sc->vtnet_txqs[i];
+ if (txq->vtntx_tq != NULL) {
+ taskqueue_free(txq->vtntx_tq);
+ txq->vtntx_tq = NULL;
+ }
}
+}
- VTNET_UNLOCK(sc);
+static void
+vtnet_drain_taskqueues(struct vtnet_softc *sc)
+{
+ struct vtnet_rxq *rxq;
+ struct vtnet_txq *txq;
+ int i;
+
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ rxq = &sc->vtnet_rxqs[i];
+ if (rxq->vtnrx_tq != NULL)
+ taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
+
+ txq = &sc->vtnet_txqs[i];
+ if (txq->vtntx_tq != NULL) {
+ taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
+#ifndef VTNET_LEGACY_TX
+ taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
+#endif
+ }
+ }
+}
+
+static void
+vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
+{
+ struct vtnet_rxq *rxq;
+ struct vtnet_txq *txq;
+ int i;
+
+ for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
+ rxq = &sc->vtnet_rxqs[i];
+ vtnet_rxq_free_mbufs(rxq);
+
+ txq = &sc->vtnet_txqs[i];
+ vtnet_txq_free_mbufs(txq);
+ }
+}
+
+static void
+vtnet_stop_rendezvous(struct vtnet_softc *sc)
+{
+ struct vtnet_rxq *rxq;
+ struct vtnet_txq *txq;
+ int i;
+
+ /*
+ * Lock and unlock the per-queue mutex so we known the stop
+ * state is visible. Doing only the active queues should be
+ * sufficient, but it does not cost much extra to do all the
+ * queues. Note we hold the core mutex here too.
+ */
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ rxq = &sc->vtnet_rxqs[i];
+ VTNET_RXQ_LOCK(rxq);
+ VTNET_RXQ_UNLOCK(rxq);
+
+ txq = &sc->vtnet_txqs[i];
+ VTNET_TXQ_LOCK(txq);
+ VTNET_TXQ_UNLOCK(txq);
+ }
}
static void
@@ -2072,46 +2709,60 @@ vtnet_stop(struct vtnet_softc *sc)
dev = sc->vtnet_dev;
ifp = sc->vtnet_ifp;
- VTNET_LOCK_ASSERT(sc);
+ VTNET_CORE_LOCK_ASSERT(sc);
- sc->vtnet_watchdog_timer = 0;
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ sc->vtnet_link_active = 0;
callout_stop(&sc->vtnet_tick_ch);
- ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
- vtnet_disable_rx_intr(sc);
- vtnet_disable_tx_intr(sc);
+ /* Only advisory. */
+ vtnet_disable_interrupts(sc);
/*
- * Stop the host VirtIO adapter. Note this will reset the host
- * adapter's state back to the pre-initialized state, so in
- * order to make the device usable again, we must drive it
- * through virtio_reinit() and virtio_reinit_complete().
+ * Stop the host adapter. This resets it to the pre-initialized
+ * state. It will not generate any interrupts until after it is
+ * reinitialized.
*/
virtio_stop(dev);
+ vtnet_stop_rendezvous(sc);
- sc->vtnet_flags &= ~VTNET_FLAG_LINK;
-
- vtnet_free_rx_mbufs(sc);
- vtnet_free_tx_mbufs(sc);
+ /* Free any mbufs left in the virtqueues. */
+ vtnet_drain_rxtx_queues(sc);
}
static int
-vtnet_reinit(struct vtnet_softc *sc)
+vtnet_virtio_reinit(struct vtnet_softc *sc)
{
+ device_t dev;
struct ifnet *ifp;
uint64_t features;
+ int mask, error;
+ dev = sc->vtnet_dev;
ifp = sc->vtnet_ifp;
features = sc->vtnet_features;
+ mask = 0;
+#if defined(INET)
+ mask |= IFCAP_RXCSUM;
+#endif
+#if defined (INET6)
+ mask |= IFCAP_RXCSUM_IPV6;
+#endif
+
/*
* Re-negotiate with the host, removing any disabled receive
* features. Transmit features are disabled only on our side
* via if_capenable and if_hwassist.
*/
- if (ifp->if_capabilities & IFCAP_RXCSUM) {
- if ((ifp->if_capenable & IFCAP_RXCSUM) == 0)
+ if (ifp->if_capabilities & mask) {
+ /*
+ * We require both IPv4 and IPv6 offloading to be enabled
+ * in order to negotiated it: VirtIO does not distinguish
+ * between the two.
+ */
+ if ((ifp->if_capenable & mask) != mask)
features &= ~VIRTIO_NET_F_GUEST_CSUM;
}
@@ -2125,86 +2776,205 @@ vtnet_reinit(struct vtnet_softc *sc)
features &= ~VIRTIO_NET_F_CTRL_VLAN;
}
- return (virtio_reinit(sc->vtnet_dev, features));
+ error = virtio_reinit(dev, features);
+ if (error)
+ device_printf(dev, "virtio reinit error %d\n", error);
+
+ return (error);
}
static void
-vtnet_init_locked(struct vtnet_softc *sc)
+vtnet_init_rx_filters(struct vtnet_softc *sc)
{
- device_t dev;
struct ifnet *ifp;
- int error;
- dev = sc->vtnet_dev;
ifp = sc->vtnet_ifp;
- VTNET_LOCK_ASSERT(sc);
+ if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
+ /* Restore promiscuous and all-multicast modes. */
+ vtnet_rx_filter(sc);
+ /* Restore filtered MAC addresses. */
+ vtnet_rx_filter_mac(sc);
+ }
- if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
+ vtnet_rx_filter_vlan(sc);
+}
+
+static int
+vtnet_init_rx_queues(struct vtnet_softc *sc)
+{
+ device_t dev;
+ struct vtnet_rxq *rxq;
+ int i, clsize, error;
+
+ dev = sc->vtnet_dev;
+
+ /*
+ * Use the new cluster size if one has been set (via a MTU
+ * change). Otherwise, use the standard 2K clusters.
+ *
+ * BMV: It might make sense to use page sized clusters as
+ * the default (depending on the features negotiated).
+ */
+ if (sc->vtnet_rx_new_clsize != 0) {
+ clsize = sc->vtnet_rx_new_clsize;
+ sc->vtnet_rx_new_clsize = 0;
+ } else
+ clsize = MCLBYTES;
+
+ sc->vtnet_rx_clsize = clsize;
+ sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize);
+
+ /* The first segment is reserved for the header. */
+ KASSERT(sc->vtnet_rx_nmbufs < VTNET_MAX_RX_SEGS,
+ ("%s: too many rx mbufs %d", __func__, sc->vtnet_rx_nmbufs));
+
+ for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
+ rxq = &sc->vtnet_rxqs[i];
+
+ /* Hold the lock to satisfy asserts. */
+ VTNET_RXQ_LOCK(rxq);
+ error = vtnet_rxq_populate(rxq);
+ VTNET_RXQ_UNLOCK(rxq);
+
+ if (error) {
+ device_printf(dev,
+ "cannot allocate mbufs for Rx queue %d\n", i);
+ return (error);
+ }
+ }
+
+ return (0);
+}
+
+static int
+vtnet_init_tx_queues(struct vtnet_softc *sc)
+{
+ struct vtnet_txq *txq;
+ int i;
+
+ for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
+ txq = &sc->vtnet_txqs[i];
+ txq->vtntx_watchdog = 0;
+ }
+
+ return (0);
+}
+
+static int
+vtnet_init_rxtx_queues(struct vtnet_softc *sc)
+{
+ int error;
+
+ error = vtnet_init_rx_queues(sc);
+ if (error)
+ return (error);
+
+ error = vtnet_init_tx_queues(sc);
+ if (error)
+ return (error);
+
+ return (0);
+}
+
+static void
+vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
+{
+ device_t dev;
+ int npairs;
+
+ dev = sc->vtnet_dev;
+
+ if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) {
+ MPASS(sc->vtnet_max_vq_pairs == 1);
+ sc->vtnet_act_vq_pairs = 1;
return;
+ }
- /* Stop host's adapter, cancel any pending I/O. */
- vtnet_stop(sc);
+ /* BMV: Just use the maximum configured for now. */
+ npairs = sc->vtnet_max_vq_pairs;
- /* Reinitialize the host device. */
- error = vtnet_reinit(sc);
- if (error) {
+ if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
device_printf(dev,
- "reinitialization failed, stopping device...\n");
- vtnet_stop(sc);
- return;
+ "cannot set active queue pairs to %d\n", npairs);
+ npairs = 1;
}
- /* Update host with assigned MAC address. */
+ sc->vtnet_act_vq_pairs = npairs;
+}
+
+static int
+vtnet_reinit(struct vtnet_softc *sc)
+{
+ device_t dev;
+ struct ifnet *ifp;
+ int error;
+
+ dev = sc->vtnet_dev;
+ ifp = sc->vtnet_ifp;
+
+ /* Use the current MAC address. */
bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
vtnet_set_hwaddr(sc);
+ vtnet_set_active_vq_pairs(sc);
+
ifp->if_hwassist = 0;
if (ifp->if_capenable & IFCAP_TXCSUM)
ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
+ if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
+ ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6;
if (ifp->if_capenable & IFCAP_TSO4)
ifp->if_hwassist |= CSUM_TSO;
+ if (ifp->if_capenable & IFCAP_TSO6)
+ ifp->if_hwassist |= CSUM_TSO; /* No CSUM_TSO_IPV6. */
- error = vtnet_init_rx_vq(sc);
- if (error) {
- device_printf(dev,
- "cannot allocate mbufs for Rx virtqueue\n");
- vtnet_stop(sc);
- return;
- }
+ if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
+ vtnet_init_rx_filters(sc);
- if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
- if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
- /* Restore promiscuous and all-multicast modes. */
- vtnet_rx_filter(sc);
+ error = vtnet_init_rxtx_queues(sc);
+ if (error)
+ return (error);
- /* Restore filtered MAC addresses. */
- vtnet_rx_filter_mac(sc);
- }
+ vtnet_enable_interrupts(sc);
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
- /* Restore VLAN filters. */
- if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
- vtnet_rx_filter_vlan(sc);
- }
+ return (0);
+}
-#ifdef DEVICE_POLLING
- if (ifp->if_capenable & IFCAP_POLLING) {
- vtnet_disable_rx_intr(sc);
- vtnet_disable_tx_intr(sc);
- } else
-#endif
- {
- vtnet_enable_rx_intr(sc);
- vtnet_enable_tx_intr(sc);
- }
+static void
+vtnet_init_locked(struct vtnet_softc *sc)
+{
+ device_t dev;
+ struct ifnet *ifp;
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ dev = sc->vtnet_dev;
+ ifp = sc->vtnet_ifp;
+
+ VTNET_CORE_LOCK_ASSERT(sc);
+
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ return;
+
+ vtnet_stop(sc);
+
+ /* Reinitialize with the host. */
+ if (vtnet_virtio_reinit(sc) != 0)
+ goto fail;
+
+ if (vtnet_reinit(sc) != 0)
+ goto fail;
virtio_reinit_complete(dev);
vtnet_update_link_status(sc);
callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
+
+ return;
+
+fail:
+ vtnet_stop(sc);
}
static void
@@ -2214,9 +2984,24 @@ vtnet_init(void *xsc)
sc = xsc;
- VTNET_LOCK(sc);
+ VTNET_CORE_LOCK(sc);
vtnet_init_locked(sc);
- VTNET_UNLOCK(sc);
+ VTNET_CORE_UNLOCK(sc);
+}
+
+static void
+vtnet_free_ctrl_vq(struct vtnet_softc *sc)
+{
+ struct virtqueue *vq;
+
+ vq = sc->vtnet_ctrl_vq;
+
+ /*
+ * The control virtqueue is only polled and therefore it should
+ * already be empty.
+ */
+ KASSERT(virtqueue_empty(vq),
+ ("%s: ctrl vq %p not empty", __func__, vq));
}
static void
@@ -2224,87 +3009,117 @@ vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
struct sglist *sg, int readable, int writable)
{
struct virtqueue *vq;
- void *c;
vq = sc->vtnet_ctrl_vq;
- VTNET_LOCK_ASSERT(sc);
+ VTNET_CORE_LOCK_ASSERT(sc);
KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
- ("no control virtqueue"));
- KASSERT(virtqueue_empty(vq),
- ("control command already enqueued"));
+ ("%s: CTRL_VQ feature not negotiated", __func__));
+ if (!virtqueue_empty(vq))
+ return;
if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
return;
- virtqueue_notify(vq);
-
/*
- * Poll until the command is complete. Previously, we would
- * sleep until the control virtqueue interrupt handler woke
- * us up, but dropping the VTNET_MTX leads to serialization
- * difficulties.
- *
- * Furthermore, it appears QEMU/KVM only allocates three MSIX
- * vectors. Two of those vectors are needed for the Rx and Tx
- * virtqueues. We do not support sharing both a Vq and config
- * changed notification on the same MSIX vector.
+ * Poll for the response, but the command is likely already
+ * done when we return from the notify.
*/
- c = virtqueue_poll(vq, NULL);
- KASSERT(c == cookie, ("unexpected control command response"));
+ virtqueue_notify(vq);
+ virtqueue_poll(vq, NULL);
}
-static void
-vtnet_rx_filter(struct vtnet_softc *sc)
+static int
+vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
{
- device_t dev;
- struct ifnet *ifp;
+ struct virtio_net_ctrl_hdr hdr;
+ struct sglist_seg segs[3];
+ struct sglist sg;
+ uint8_t ack;
+ int error;
- dev = sc->vtnet_dev;
- ifp = sc->vtnet_ifp;
+ hdr.class = VIRTIO_NET_CTRL_MAC;
+ hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
+ ack = VIRTIO_NET_ERR;
- VTNET_LOCK_ASSERT(sc);
- KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
- ("CTRL_RX feature not negotiated"));
+ sglist_init(&sg, 3, segs);
+ error = 0;
+ error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
+ error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN);
+ error |= sglist_append(&sg, &ack, sizeof(uint8_t));
+ KASSERT(error == 0 && sg.sg_nseg == 3,
+ ("%s: error %d adding set MAC msg to sglist", __func__, error));
- if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
- device_printf(dev, "cannot %s promiscuous mode\n",
- ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
+ vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
- if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
- device_printf(dev, "cannot %s all-multicast mode\n",
- ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
+ return (ack == VIRTIO_NET_OK ? 0 : EIO);
}
static int
-vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
+vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
{
- struct virtio_net_ctrl_hdr hdr;
struct sglist_seg segs[3];
struct sglist sg;
- uint8_t onoff, ack;
+ struct {
+ struct virtio_net_ctrl_hdr hdr;
+ uint8_t pad1;
+ struct virtio_net_ctrl_mq mq;
+ uint8_t pad2;
+ uint8_t ack;
+ } s;
int error;
- if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
- return (ENOTSUP);
+ s.hdr.class = VIRTIO_NET_CTRL_MQ;
+ s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
+ s.mq.virtqueue_pairs = npairs;
+ s.ack = VIRTIO_NET_ERR;
+ sglist_init(&sg, 3, segs);
error = 0;
+ error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
+ error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
+ error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
+ KASSERT(error == 0 && sg.sg_nseg == 3,
+ ("%s: error %d adding MQ message to sglist", __func__, error));
- hdr.class = VIRTIO_NET_CTRL_RX;
- hdr.cmd = cmd;
- onoff = !!on;
- ack = VIRTIO_NET_ERR;
+ vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
+
+ return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
+}
+
+static int
+vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
+{
+ struct sglist_seg segs[3];
+ struct sglist sg;
+ struct {
+ struct virtio_net_ctrl_hdr hdr;
+ uint8_t pad1;
+ uint8_t onoff;
+ uint8_t pad2;
+ uint8_t ack;
+ } s;
+ int error;
+
+ KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
+ ("%s: CTRL_RX feature not negotiated", __func__));
+
+ s.hdr.class = VIRTIO_NET_CTRL_RX;
+ s.hdr.cmd = cmd;
+ s.onoff = !!on;
+ s.ack = VIRTIO_NET_ERR;
sglist_init(&sg, 3, segs);
- error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
- error |= sglist_append(&sg, &onoff, sizeof(uint8_t));
- error |= sglist_append(&sg, &ack, sizeof(uint8_t));
+ error = 0;
+ error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
+ error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
+ error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
KASSERT(error == 0 && sg.sg_nseg == 3,
- ("error adding Rx filter message to sglist"));
+ ("%s: error %d adding Rx message to sglist", __func__, error));
- vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
+ vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
- return (ack == VIRTIO_NET_OK ? 0 : EIO);
+ return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
}
static int
@@ -2321,6 +3136,48 @@ vtnet_set_allmulti(struct vtnet_softc *sc, int on)
return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
}
+/*
+ * The device defaults to promiscuous mode for backwards compatibility.
+ * Turn it off at attach time if possible.
+ */
+static void
+vtnet_attach_disable_promisc(struct vtnet_softc *sc)
+{
+ struct ifnet *ifp;
+
+ ifp = sc->vtnet_ifp;
+
+ VTNET_CORE_LOCK(sc);
+ if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) {
+ ifp->if_flags |= IFF_PROMISC;
+ } else if (vtnet_set_promisc(sc, 0) != 0) {
+ ifp->if_flags |= IFF_PROMISC;
+ device_printf(sc->vtnet_dev,
+ "cannot disable default promiscuous mode\n");
+ }
+ VTNET_CORE_UNLOCK(sc);
+}
+
+static void
+vtnet_rx_filter(struct vtnet_softc *sc)
+{
+ device_t dev;
+ struct ifnet *ifp;
+
+ dev = sc->vtnet_dev;
+ ifp = sc->vtnet_ifp;
+
+ VTNET_CORE_LOCK_ASSERT(sc);
+
+ if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
+ device_printf(dev, "cannot %s promiscuous mode\n",
+ ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
+
+ if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
+ device_printf(dev, "cannot %s all-multicast mode\n",
+ ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
+}
+
static void
vtnet_rx_filter_mac(struct vtnet_softc *sc)
{
@@ -2340,19 +3197,23 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
mcnt = 0;
promisc = 0;
allmulti = 0;
- error = 0;
- VTNET_LOCK_ASSERT(sc);
+ VTNET_CORE_LOCK_ASSERT(sc);
KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
- ("CTRL_RX feature not negotiated"));
+ ("%s: CTRL_RX feature not negotiated", __func__));
/* Unicast MAC addresses: */
if_addr_rlock(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_LINK)
continue;
- else if (ucnt == VTNET_MAX_MAC_ENTRIES)
+ else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
+ sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
+ continue;
+ else if (ucnt == VTNET_MAX_MAC_ENTRIES) {
+ promisc = 1;
break;
+ }
bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
&filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN);
@@ -2360,10 +3221,8 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
}
if_addr_runlock(ifp);
- if (ucnt >= VTNET_MAX_MAC_ENTRIES) {
- promisc = 1;
+ if (promisc != 0) {
filter->vmf_unicast.nentries = 0;
-
if_printf(ifp, "more than %d MAC addresses assigned, "
"falling back to promiscuous mode\n",
VTNET_MAX_MAC_ENTRIES);
@@ -2375,8 +3234,10 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_LINK)
continue;
- else if (mcnt == VTNET_MAX_MAC_ENTRIES)
+ else if (mcnt == VTNET_MAX_MAC_ENTRIES) {
+ allmulti = 1;
break;
+ }
bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
&filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN);
@@ -2384,17 +3245,15 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
}
if_maddr_runlock(ifp);
- if (mcnt >= VTNET_MAX_MAC_ENTRIES) {
- allmulti = 1;
+ if (allmulti != 0) {
filter->vmf_multicast.nentries = 0;
-
if_printf(ifp, "more than %d multicast MAC addresses "
"assigned, falling back to all-multicast mode\n",
VTNET_MAX_MAC_ENTRIES);
} else
filter->vmf_multicast.nentries = mcnt;
- if (promisc && allmulti)
+ if (promisc != 0 && allmulti != 0)
goto out;
hdr.class = VIRTIO_NET_CTRL_MAC;
@@ -2402,6 +3261,7 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
ack = VIRTIO_NET_ERR;
sglist_init(&sg, 4, segs);
+ error = 0;
error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
error |= sglist_append(&sg, &filter->vmf_unicast,
sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
@@ -2409,7 +3269,7 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
error |= sglist_append(&sg, &ack, sizeof(uint8_t));
KASSERT(error == 0 && sg.sg_nseg == 4,
- ("error adding MAC filtering message to sglist"));
+ ("%s: error %d adding MAC filter msg to sglist", __func__, error));
vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
@@ -2417,111 +3277,99 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
if_printf(ifp, "error setting host MAC filter table\n");
out:
- if (promisc)
- if (vtnet_set_promisc(sc, 1) != 0)
- if_printf(ifp, "cannot enable promiscuous mode\n");
- if (allmulti)
- if (vtnet_set_allmulti(sc, 1) != 0)
- if_printf(ifp, "cannot enable all-multicast mode\n");
+ if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
+ if_printf(ifp, "cannot enable promiscuous mode\n");
+ if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
+ if_printf(ifp, "cannot enable all-multicast mode\n");
}
static int
vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
{
- struct virtio_net_ctrl_hdr hdr;
struct sglist_seg segs[3];
struct sglist sg;
- uint8_t ack;
+ struct {
+ struct virtio_net_ctrl_hdr hdr;
+ uint8_t pad1;
+ uint16_t tag;
+ uint8_t pad2;
+ uint8_t ack;
+ } s;
int error;
- hdr.class = VIRTIO_NET_CTRL_VLAN;
- hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
- ack = VIRTIO_NET_ERR;
- error = 0;
+ s.hdr.class = VIRTIO_NET_CTRL_VLAN;
+ s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
+ s.tag = tag;
+ s.ack = VIRTIO_NET_ERR;
sglist_init(&sg, 3, segs);
- error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
- error |= sglist_append(&sg, &tag, sizeof(uint16_t));
- error |= sglist_append(&sg, &ack, sizeof(uint8_t));
+ error = 0;
+ error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
+ error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
+ error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
KASSERT(error == 0 && sg.sg_nseg == 3,
- ("error adding VLAN control message to sglist"));
+ ("%s: error %d adding VLAN message to sglist", __func__, error));
- vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
+ vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
- return (ack == VIRTIO_NET_OK ? 0 : EIO);
+ return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
}
static void
vtnet_rx_filter_vlan(struct vtnet_softc *sc)
{
- device_t dev;
- uint32_t w, mask;
+ uint32_t w;
uint16_t tag;
- int i, nvlans, error;
+ int i, bit;
- VTNET_LOCK_ASSERT(sc);
+ VTNET_CORE_LOCK_ASSERT(sc);
KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
- ("VLAN_FILTER feature not negotiated"));
+ ("%s: VLAN_FILTER feature not negotiated", __func__));
- dev = sc->vtnet_dev;
- nvlans = sc->vtnet_nvlans;
- error = 0;
+ /* Enable the filter for each configured VLAN. */
+ for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
+ w = sc->vtnet_vlan_filter[i];
+
+ while ((bit = ffs(w) - 1) != -1) {
+ w &= ~(1 << bit);
+ tag = sizeof(w) * CHAR_BIT * i + bit;
- /* Enable filtering for each configured VLAN. */
- for (i = 0; i < VTNET_VLAN_SHADOW_SIZE && nvlans > 0; i++) {
- w = sc->vtnet_vlan_shadow[i];
- for (mask = 1, tag = i * 32; w != 0; mask <<= 1, tag++) {
- if ((w & mask) != 0) {
- w &= ~mask;
- nvlans--;
- if (vtnet_exec_vlan_filter(sc, 1, tag) != 0)
- error++;
+ if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
+ device_printf(sc->vtnet_dev,
+ "cannot enable VLAN %d filter\n", tag);
}
}
}
-
- KASSERT(nvlans == 0, ("VLAN count incorrect"));
- if (error)
- device_printf(dev, "cannot restore VLAN filter table\n");
}
static void
-vtnet_set_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
+vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
{
struct ifnet *ifp;
int idx, bit;
- KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
- ("VLAN_FILTER feature not negotiated"));
-
- if ((tag == 0) || (tag > 4095))
- return;
-
ifp = sc->vtnet_ifp;
idx = (tag >> 5) & 0x7F;
bit = tag & 0x1F;
- VTNET_LOCK(sc);
+ if (tag == 0 || tag > 4095)
+ return;
+
+ VTNET_CORE_LOCK(sc);
- /* Update shadow VLAN table. */
- if (add) {
- sc->vtnet_nvlans++;
- sc->vtnet_vlan_shadow[idx] |= (1 << bit);
- } else {
- sc->vtnet_nvlans--;
- sc->vtnet_vlan_shadow[idx] &= ~(1 << bit);
- }
+ if (add)
+ sc->vtnet_vlan_filter[idx] |= (1 << bit);
+ else
+ sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
- if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
- if (vtnet_exec_vlan_filter(sc, add, tag) != 0) {
- device_printf(sc->vtnet_dev,
- "cannot %s VLAN %d %s the host filter table\n",
- add ? "add" : "remove", tag,
- add ? "to" : "from");
- }
+ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
+ vtnet_exec_vlan_filter(sc, add, tag) != 0) {
+ device_printf(sc->vtnet_dev,
+ "cannot %s VLAN %d %s the host filter table\n",
+ add ? "add" : "remove", tag, add ? "to" : "from");
}
- VTNET_UNLOCK(sc);
+ VTNET_CORE_UNLOCK(sc);
}
static void
@@ -2531,7 +3379,7 @@ vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
if (ifp->if_softc != arg)
return;
- vtnet_set_vlan_filter(arg, 1, tag);
+ vtnet_update_vlan_filter(arg, 1, tag);
}
static void
@@ -2541,7 +3389,47 @@ vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
if (ifp->if_softc != arg)
return;
- vtnet_set_vlan_filter(arg, 0, tag);
+ vtnet_update_vlan_filter(arg, 0, tag);
+}
+
+static int
+vtnet_is_link_up(struct vtnet_softc *sc)
+{
+ device_t dev;
+ struct ifnet *ifp;
+ uint16_t status;
+
+ dev = sc->vtnet_dev;
+ ifp = sc->vtnet_ifp;
+
+ if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0)
+ status = VIRTIO_NET_S_LINK_UP;
+ else
+ status = virtio_read_dev_config_2(dev,
+ offsetof(struct virtio_net_config, status));
+
+ return ((status & VIRTIO_NET_S_LINK_UP) != 0);
+}
+
+static void
+vtnet_update_link_status(struct vtnet_softc *sc)
+{
+ struct ifnet *ifp;
+ int link;
+
+ ifp = sc->vtnet_ifp;
+
+ VTNET_CORE_LOCK_ASSERT(sc);
+ link = vtnet_is_link_up(sc);
+
+ /* Notify if the link status has changed. */
+ if (link != 0 && sc->vtnet_link_active == 0) {
+ sc->vtnet_link_active = 1;
+ if_link_state_change(ifp, LINK_STATE_UP);
+ } else if (link == 0 && sc->vtnet_link_active != 0) {
+ sc->vtnet_link_active = 0;
+ if_link_state_change(ifp, LINK_STATE_DOWN);
+ }
}
static int
@@ -2569,112 +3457,334 @@ vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
ifmr->ifm_status = IFM_AVALID;
ifmr->ifm_active = IFM_ETHER;
- VTNET_LOCK(sc);
+ VTNET_CORE_LOCK(sc);
if (vtnet_is_link_up(sc) != 0) {
ifmr->ifm_status |= IFM_ACTIVE;
ifmr->ifm_active |= VTNET_MEDIATYPE;
} else
ifmr->ifm_active |= IFM_NONE;
- VTNET_UNLOCK(sc);
+ VTNET_CORE_UNLOCK(sc);
}
static void
-vtnet_add_statistics(struct vtnet_softc *sc)
+vtnet_set_hwaddr(struct vtnet_softc *sc)
{
device_t dev;
- struct vtnet_statistics *stats;
- struct sysctl_ctx_list *ctx;
+
+ dev = sc->vtnet_dev;
+
+ if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
+ if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
+ device_printf(dev, "unable to set MAC address\n");
+ } else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
+ virtio_write_device_config(dev,
+ offsetof(struct virtio_net_config, mac),
+ sc->vtnet_hwaddr, ETHER_ADDR_LEN);
+ }
+}
+
+static void
+vtnet_get_hwaddr(struct vtnet_softc *sc)
+{
+ device_t dev;
+
+ dev = sc->vtnet_dev;
+
+ if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
+ /*
+ * Generate a random locally administered unicast address.
+ *
+ * It would be nice to generate the same MAC address across
+ * reboots, but it seems all the hosts currently available
+ * support the MAC feature, so this isn't too important.
+ */
+ sc->vtnet_hwaddr[0] = 0xB2;
+ arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
+ vtnet_set_hwaddr(sc);
+ return;
+ }
+
+ virtio_read_device_config(dev, offsetof(struct virtio_net_config, mac),
+ sc->vtnet_hwaddr, ETHER_ADDR_LEN);
+}
+
+static void
+vtnet_vlan_tag_remove(struct mbuf *m)
+{
+ struct ether_vlan_header *evh;
+
+ evh = mtod(m, struct ether_vlan_header *);
+ m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
+ m->m_flags |= M_VLANTAG;
+
+ /* Strip the 802.1Q header. */
+ bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
+ ETHER_HDR_LEN - ETHER_TYPE_LEN);
+ m_adj(m, ETHER_VLAN_ENCAP_LEN);
+}
+
+static void
+vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
+ struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
+{
+ struct sysctl_oid *node;
+ struct sysctl_oid_list *list;
+ struct vtnet_rxq_stats *stats;
+ char namebuf[16];
+
+ snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
+ node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
+ CTLFLAG_RD, NULL, "Receive Queue");
+ list = SYSCTL_CHILDREN(node);
+
+ stats = &rxq->vtnrx_stats;
+
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
+ &stats->vrxs_ipackets, "Receive packets");
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
+ &stats->vrxs_ibytes, "Receive bytes");
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
+ &stats->vrxs_iqdrops, "Receive drops");
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
+ &stats->vrxs_ierrors, "Receive errors");
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
+ &stats->vrxs_csum, "Receive checksum offloaded");
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
+ &stats->vrxs_csum_failed, "Receive checksum offload failed");
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
+ &stats->vrxs_rescheduled,
+ "Receive interrupt handler rescheduled");
+}
+
+static void
+vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
+ struct sysctl_oid_list *child, struct vtnet_txq *txq)
+{
+ struct sysctl_oid *node;
+ struct sysctl_oid_list *list;
+ struct vtnet_txq_stats *stats;
+ char namebuf[16];
+
+ snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
+ node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
+ CTLFLAG_RD, NULL, "Transmit Queue");
+ list = SYSCTL_CHILDREN(node);
+
+ stats = &txq->vtntx_stats;
+
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
+ &stats->vtxs_opackets, "Transmit packets");
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
+ &stats->vtxs_obytes, "Transmit bytes");
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
+ &stats->vtxs_omcasts, "Transmit multicasts");
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
+ &stats->vtxs_csum, "Transmit checksum offloaded");
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
+ &stats->vtxs_tso, "Transmit segmentation offloaded");
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "collapsed", CTLFLAG_RD,
+ &stats->vtxs_collapsed, "Transmit mbufs collapsed");
+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
+ &stats->vtxs_rescheduled,
+ "Transmit interrupt handler rescheduled");
+}
+
+static void
+vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
+{
+ device_t dev;
+ struct sysctl_ctx_list *ctx;
struct sysctl_oid *tree;
struct sysctl_oid_list *child;
+ int i;
dev = sc->vtnet_dev;
- stats = &sc->vtnet_stats;
ctx = device_get_sysctl_ctx(dev);
tree = device_get_sysctl_tree(dev);
child = SYSCTL_CHILDREN(tree);
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_failed",
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
+ vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
+ }
+}
+
+static void
+vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
+ struct sysctl_oid_list *child, struct vtnet_softc *sc)
+{
+ struct vtnet_statistics *stats;
+
+ stats = &sc->vtnet_stats;
+
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
CTLFLAG_RD, &stats->mbuf_alloc_failed,
"Mbuf cluster allocation failures");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_frame_too_large",
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
CTLFLAG_RD, &stats->rx_frame_too_large,
"Received frame larger than the mbuf chain");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
CTLFLAG_RD, &stats->rx_enq_replacement_failed,
"Enqueuing the replacement receive mbuf failed");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_mergeable_failed",
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
CTLFLAG_RD, &stats->rx_mergeable_failed,
"Mergeable buffers receive failures");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
"Received checksum offloaded buffer with unsupported "
"Ethernet type");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_start",
- CTLFLAG_RD, &stats->rx_csum_bad_start,
- "Received checksum offloaded buffer with incorrect start offset");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
"Received checksum offloaded buffer with incorrect IP protocol");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_offset",
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
CTLFLAG_RD, &stats->rx_csum_bad_offset,
"Received checksum offloaded buffer with incorrect offset");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_failed",
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
+ CTLFLAG_RD, &stats->rx_csum_bad_proto,
+ "Received checksum offloaded buffer with incorrect protocol");
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
CTLFLAG_RD, &stats->rx_csum_failed,
"Received buffer checksum offload failed");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_offloaded",
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
CTLFLAG_RD, &stats->rx_csum_offloaded,
"Received buffer checksum offload succeeded");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_task_rescheduled",
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
CTLFLAG_RD, &stats->rx_task_rescheduled,
"Times the receive interrupt task rescheduled itself");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_csum_offloaded",
- CTLFLAG_RD, &stats->tx_csum_offloaded,
- "Offloaded checksum of transmitted buffer");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_tso_offloaded",
- CTLFLAG_RD, &stats->tx_tso_offloaded,
- "Segmentation offload of transmitted buffer");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
CTLFLAG_RD, &stats->tx_csum_bad_ethtype,
"Aborted transmit of checksum offloaded buffer with unknown "
"Ethernet type");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
CTLFLAG_RD, &stats->tx_tso_bad_ethtype,
"Aborted transmit of TSO buffer with unknown Ethernet type");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_task_rescheduled",
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
+ CTLFLAG_RD, &stats->tx_tso_not_tcp,
+ "Aborted transmit of TSO buffer with non TCP protocol");
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
+ CTLFLAG_RD, &stats->tx_csum_offloaded,
+ "Offloaded checksum of transmitted buffer");
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
+ CTLFLAG_RD, &stats->tx_tso_offloaded,
+ "Segmentation offload of transmitted buffer");
+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
CTLFLAG_RD, &stats->tx_task_rescheduled,
"Times the transmit interrupt task rescheduled itself");
}
+static void
+vtnet_setup_sysctl(struct vtnet_softc *sc)
+{
+ device_t dev;
+ struct sysctl_ctx_list *ctx;
+ struct sysctl_oid *tree;
+ struct sysctl_oid_list *child;
+
+ dev = sc->vtnet_dev;
+ ctx = device_get_sysctl_ctx(dev);
+ tree = device_get_sysctl_tree(dev);
+ child = SYSCTL_CHILDREN(tree);
+
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
+ CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
+ "Maximum number of supported virtqueue pairs");
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
+ CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
+ "Number of active virtqueue pairs");
+
+ vtnet_setup_stat_sysctl(ctx, child, sc);
+}
+
static int
-vtnet_enable_rx_intr(struct vtnet_softc *sc)
+vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
{
- return (virtqueue_enable_intr(sc->vtnet_rx_vq));
+ return (virtqueue_enable_intr(rxq->vtnrx_vq));
}
static void
-vtnet_disable_rx_intr(struct vtnet_softc *sc)
+vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
{
- virtqueue_disable_intr(sc->vtnet_rx_vq);
+ virtqueue_disable_intr(rxq->vtnrx_vq);
}
static int
-vtnet_enable_tx_intr(struct vtnet_softc *sc)
+vtnet_txq_enable_intr(struct vtnet_txq *txq)
{
-#ifdef VTNET_TX_INTR_MODERATION
- return (0);
-#else
- return (virtqueue_enable_intr(sc->vtnet_tx_vq));
-#endif
+ return (virtqueue_postpone_intr(txq->vtntx_vq, VQ_POSTPONE_LONG));
+}
+
+static void
+vtnet_txq_disable_intr(struct vtnet_txq *txq)
+{
+
+ virtqueue_disable_intr(txq->vtntx_vq);
+}
+
+static void
+vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
+{
+ int i;
+
+ for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
+ vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]);
}
static void
-vtnet_disable_tx_intr(struct vtnet_softc *sc)
+vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
{
+ int i;
+
+ for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
+ vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
+}
+
+static void
+vtnet_enable_interrupts(struct vtnet_softc *sc)
+{
+
+ vtnet_enable_rx_interrupts(sc);
+ vtnet_enable_tx_interrupts(sc);
+}
+
+static void
+vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
+{
+ int i;
+
+ for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
+ vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
+}
+
+static void
+vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
+{
+ int i;
+
+ for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
+ vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
+}
+
+static void
+vtnet_disable_interrupts(struct vtnet_softc *sc)
+{
+
+ vtnet_disable_rx_interrupts(sc);
+ vtnet_disable_tx_interrupts(sc);
+}
+
+static int
+vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
+{
+ char path[64];
+
+ snprintf(path, sizeof(path),
+ "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
+ TUNABLE_INT_FETCH(path, &def);
- virtqueue_disable_intr(sc->vtnet_tx_vq);
+ return (def);
}
diff --git a/sys/dev/virtio/network/if_vtnetvar.h b/sys/dev/virtio/network/if_vtnetvar.h
index d870436..5921103 100644
--- a/sys/dev/virtio/network/if_vtnetvar.h
+++ b/sys/dev/virtio/network/if_vtnetvar.h
@@ -29,83 +29,165 @@
#ifndef _IF_VTNETVAR_H
#define _IF_VTNETVAR_H
+struct vtnet_softc;
+
struct vtnet_statistics {
- unsigned long mbuf_alloc_failed;
-
- unsigned long rx_frame_too_large;
- unsigned long rx_enq_replacement_failed;
- unsigned long rx_mergeable_failed;
- unsigned long rx_csum_bad_ethtype;
- unsigned long rx_csum_bad_start;
- unsigned long rx_csum_bad_ipproto;
- unsigned long rx_csum_bad_offset;
- unsigned long rx_csum_failed;
- unsigned long rx_csum_offloaded;
- unsigned long rx_task_rescheduled;
-
- unsigned long tx_csum_offloaded;
- unsigned long tx_tso_offloaded;
- unsigned long tx_csum_bad_ethtype;
- unsigned long tx_tso_bad_ethtype;
- unsigned long tx_task_rescheduled;
+ uint64_t mbuf_alloc_failed;
+
+ uint64_t rx_frame_too_large;
+ uint64_t rx_enq_replacement_failed;
+ uint64_t rx_mergeable_failed;
+ uint64_t rx_csum_bad_ethtype;
+ uint64_t rx_csum_bad_ipproto;
+ uint64_t rx_csum_bad_offset;
+ uint64_t rx_csum_bad_proto;
+ uint64_t tx_csum_bad_ethtype;
+ uint64_t tx_tso_bad_ethtype;
+ uint64_t tx_tso_not_tcp;
+
+ /*
+ * These are accumulated from each Rx/Tx queue.
+ */
+ uint64_t rx_csum_failed;
+ uint64_t rx_csum_offloaded;
+ uint64_t rx_task_rescheduled;
+ uint64_t tx_csum_offloaded;
+ uint64_t tx_tso_offloaded;
+ uint64_t tx_task_rescheduled;
+};
+
+struct vtnet_rxq_stats {
+ uint64_t vrxs_ipackets; /* if_ipackets */
+ uint64_t vrxs_ibytes; /* if_ibytes */
+ uint64_t vrxs_iqdrops; /* if_iqdrops */
+ uint64_t vrxs_ierrors; /* if_ierrors */
+ uint64_t vrxs_csum;
+ uint64_t vrxs_csum_failed;
+ uint64_t vrxs_rescheduled;
};
+struct vtnet_rxq {
+ struct mtx vtnrx_mtx;
+ struct vtnet_softc *vtnrx_sc;
+ struct virtqueue *vtnrx_vq;
+ int vtnrx_id;
+ int vtnrx_process_limit;
+ struct vtnet_rxq_stats vtnrx_stats;
+ struct taskqueue *vtnrx_tq;
+ struct task vtnrx_intrtask;
+ char vtnrx_name[16];
+} __aligned(CACHE_LINE_SIZE);
+
+#define VTNET_RXQ_LOCK(_rxq) mtx_lock(&(_rxq)->vtnrx_mtx)
+#define VTNET_RXQ_UNLOCK(_rxq) mtx_unlock(&(_rxq)->vtnrx_mtx)
+#define VTNET_RXQ_LOCK_ASSERT(_rxq) \
+ mtx_assert(&(_rxq)->vtnrx_mtx, MA_OWNED)
+#define VTNET_RXQ_LOCK_ASSERT_NOTOWNED(_rxq) \
+ mtx_assert(&(_rxq)->vtnrx_mtx, MA_NOTOWNED)
+
+struct vtnet_txq_stats {
+ uint64_t vtxs_opackets; /* if_opackets */
+ uint64_t vtxs_obytes; /* if_obytes */
+ uint64_t vtxs_omcasts; /* if_omcasts */
+ uint64_t vtxs_csum;
+ uint64_t vtxs_tso;
+ uint64_t vtxs_collapsed;
+ uint64_t vtxs_rescheduled;
+};
+
+struct vtnet_txq {
+ struct mtx vtntx_mtx;
+ struct vtnet_softc *vtntx_sc;
+ struct virtqueue *vtntx_vq;
+#ifndef VTNET_LEGACY_TX
+ struct buf_ring *vtntx_br;
+#endif
+ int vtntx_id;
+ int vtntx_watchdog;
+ struct vtnet_txq_stats vtntx_stats;
+ struct taskqueue *vtntx_tq;
+ struct task vtntx_intrtask;
+#ifndef VTNET_LEGACY_TX
+ struct task vtntx_defrtask;
+#endif
+ char vtntx_name[16];
+} __aligned(CACHE_LINE_SIZE);
+
+#define VTNET_TXQ_LOCK(_txq) mtx_lock(&(_txq)->vtntx_mtx)
+#define VTNET_TXQ_TRYLOCK(_txq) mtx_trylock(&(_txq)->vtntx_mtx)
+#define VTNET_TXQ_UNLOCK(_txq) mtx_unlock(&(_txq)->vtntx_mtx)
+#define VTNET_TXQ_LOCK_ASSERT(_txq) \
+ mtx_assert(&(_txq)->vtntx_mtx, MA_OWNED)
+#define VTNET_TXQ_LOCK_ASSERT_NOTOWNED(_txq) \
+ mtx_assert(&(_txq)->vtntx_mtx, MA_NOTOWNED)
+
struct vtnet_softc {
device_t vtnet_dev;
struct ifnet *vtnet_ifp;
- struct mtx vtnet_mtx;
+ struct vtnet_rxq *vtnet_rxqs;
+ struct vtnet_txq *vtnet_txqs;
uint32_t vtnet_flags;
-#define VTNET_FLAG_LINK 0x0001
-#define VTNET_FLAG_SUSPENDED 0x0002
+#define VTNET_FLAG_SUSPENDED 0x0001
+#define VTNET_FLAG_MAC 0x0002
#define VTNET_FLAG_CTRL_VQ 0x0004
#define VTNET_FLAG_CTRL_RX 0x0008
-#define VTNET_FLAG_VLAN_FILTER 0x0010
-#define VTNET_FLAG_TSO_ECN 0x0020
-#define VTNET_FLAG_MRG_RXBUFS 0x0040
-#define VTNET_FLAG_LRO_NOMRG 0x0080
-
- struct virtqueue *vtnet_rx_vq;
- struct virtqueue *vtnet_tx_vq;
- struct virtqueue *vtnet_ctrl_vq;
+#define VTNET_FLAG_CTRL_MAC 0x0010
+#define VTNET_FLAG_VLAN_FILTER 0x0020
+#define VTNET_FLAG_TSO_ECN 0x0040
+#define VTNET_FLAG_MRG_RXBUFS 0x0080
+#define VTNET_FLAG_LRO_NOMRG 0x0100
+#define VTNET_FLAG_MULTIQ 0x0200
+ int vtnet_link_active;
int vtnet_hdr_size;
- int vtnet_tx_size;
- int vtnet_rx_size;
int vtnet_rx_process_limit;
- int vtnet_rx_mbuf_size;
- int vtnet_rx_mbuf_count;
+ int vtnet_rx_nmbufs;
+ int vtnet_rx_clsize;
+ int vtnet_rx_new_clsize;
int vtnet_if_flags;
- int vtnet_watchdog_timer;
- uint64_t vtnet_features;
+ int vtnet_act_vq_pairs;
+ int vtnet_max_vq_pairs;
- struct vtnet_statistics vtnet_stats;
+ struct virtqueue *vtnet_ctrl_vq;
+ struct vtnet_mac_filter *vtnet_mac_filter;
+ uint32_t *vtnet_vlan_filter;
+ uint64_t vtnet_features;
+ struct vtnet_statistics vtnet_stats;
struct callout vtnet_tick_ch;
-
+ struct ifmedia vtnet_media;
eventhandler_tag vtnet_vlan_attach;
eventhandler_tag vtnet_vlan_detach;
- struct ifmedia vtnet_media;
- /*
- * Fake media type; the host does not provide us with
- * any real media information.
- */
-#define VTNET_MEDIATYPE (IFM_ETHER | IFM_1000_T | IFM_FDX)
+ struct mtx vtnet_mtx;
+ char vtnet_mtx_name[16];
char vtnet_hwaddr[ETHER_ADDR_LEN];
+};
- struct vtnet_mac_filter *vtnet_mac_filter;
- /*
- * During reset, the host's VLAN filtering table is lost. The
- * array below is used to restore all the VLANs configured on
- * this interface after a reset.
- */
-#define VTNET_VLAN_SHADOW_SIZE (4096 / 32)
- int vtnet_nvlans;
- uint32_t vtnet_vlan_shadow[VTNET_VLAN_SHADOW_SIZE];
+/*
+ * Maximum number of queue pairs we will autoconfigure to.
+ */
+#define VTNET_MAX_QUEUE_PAIRS 8
- char vtnet_mtx_name[16];
-};
+/*
+ * Additional completed entries can appear in a virtqueue before we can
+ * reenable interrupts. Number of times to retry before scheduling the
+ * taskqueue to process the completed entries.
+ */
+#define VTNET_INTR_DISABLE_RETRIES 4
+
+/*
+ * Fake the media type. The host does not provide us with any real media
+ * information.
+ */
+#define VTNET_MEDIATYPE (IFM_ETHER | IFM_10G_T | IFM_FDX)
+
+/*
+ * Number of words to allocate for the VLAN shadow table. There is one
+ * bit for each VLAN.
+ */
+#define VTNET_VLAN_FILTER_NWORDS (4096 / 32)
/*
* When mergeable buffers are not negotiated, the vtnet_rx_header structure
@@ -161,8 +243,12 @@ struct vtnet_mac_filter {
*/
CTASSERT(sizeof(struct vtnet_mac_filter) <= PAGE_SIZE);
-#define VTNET_WATCHDOG_TIMEOUT 5
+#define VTNET_TX_TIMEOUT 5
#define VTNET_CSUM_OFFLOAD (CSUM_TCP | CSUM_UDP | CSUM_SCTP)
+#define VTNET_CSUM_OFFLOAD_IPV6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6)
+
+#define VTNET_CSUM_ALL_OFFLOAD \
+ (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6 | CSUM_TSO)
/* Features desired/implemented by this driver. */
#define VTNET_FEATURES \
@@ -170,8 +256,10 @@ CTASSERT(sizeof(struct vtnet_mac_filter) <= PAGE_SIZE);
VIRTIO_NET_F_STATUS | \
VIRTIO_NET_F_CTRL_VQ | \
VIRTIO_NET_F_CTRL_RX | \
+ VIRTIO_NET_F_CTRL_MAC_ADDR | \
VIRTIO_NET_F_CTRL_VLAN | \
VIRTIO_NET_F_CSUM | \
+ VIRTIO_NET_F_GSO | \
VIRTIO_NET_F_HOST_TSO4 | \
VIRTIO_NET_F_HOST_TSO6 | \
VIRTIO_NET_F_HOST_ECN | \
@@ -180,9 +268,18 @@ CTASSERT(sizeof(struct vtnet_mac_filter) <= PAGE_SIZE);
VIRTIO_NET_F_GUEST_TSO6 | \
VIRTIO_NET_F_GUEST_ECN | \
VIRTIO_NET_F_MRG_RXBUF | \
+ VIRTIO_NET_F_MQ | \
+ VIRTIO_RING_F_EVENT_IDX | \
VIRTIO_RING_F_INDIRECT_DESC)
/*
+ * The VIRTIO_NET_F_HOST_TSO[46] features permit us to send the host
+ * frames larger than 1514 bytes.
+ */
+#define VTNET_TSO_FEATURES (VIRTIO_NET_F_GSO | VIRTIO_NET_F_HOST_TSO4 | \
+ VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_ECN)
+
+/*
* The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
* frames larger than 1514 bytes. We do not yet support software LRO
* via tcp_lro_rx().
@@ -209,27 +306,34 @@ CTASSERT(((VTNET_MAX_RX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_RX_SIZE);
CTASSERT(((VTNET_MAX_TX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_MTU);
/*
+ * Number of slots in the Tx bufrings. This value matches most other
+ * multiqueue drivers.
+ */
+#define VTNET_DEFAULT_BUFRING_SIZE 4096
+
+/*
* Determine how many mbufs are in each receive buffer. For LRO without
* mergeable descriptors, we must allocate an mbuf chain large enough to
* hold both the vtnet_rx_header and the maximum receivable data.
*/
-#define VTNET_NEEDED_RX_MBUFS(_sc) \
+#define VTNET_NEEDED_RX_MBUFS(_sc, _clsize) \
((_sc)->vtnet_flags & VTNET_FLAG_LRO_NOMRG) == 0 ? 1 : \
howmany(sizeof(struct vtnet_rx_header) + VTNET_MAX_RX_SIZE, \
- (_sc)->vtnet_rx_mbuf_size)
+ (_clsize))
-#define VTNET_MTX(_sc) &(_sc)->vtnet_mtx
-#define VTNET_LOCK(_sc) mtx_lock(VTNET_MTX((_sc)))
-#define VTNET_UNLOCK(_sc) mtx_unlock(VTNET_MTX((_sc)))
-#define VTNET_LOCK_DESTROY(_sc) mtx_destroy(VTNET_MTX((_sc)))
-#define VTNET_LOCK_ASSERT(_sc) mtx_assert(VTNET_MTX((_sc)), MA_OWNED)
-#define VTNET_LOCK_ASSERT_NOTOWNED(_sc) \
- mtx_assert(VTNET_MTX((_sc)), MA_NOTOWNED)
+#define VTNET_CORE_MTX(_sc) &(_sc)->vtnet_mtx
+#define VTNET_CORE_LOCK(_sc) mtx_lock(VTNET_CORE_MTX((_sc)))
+#define VTNET_CORE_UNLOCK(_sc) mtx_unlock(VTNET_CORE_MTX((_sc)))
+#define VTNET_CORE_LOCK_DESTROY(_sc) mtx_destroy(VTNET_CORE_MTX((_sc)))
+#define VTNET_CORE_LOCK_ASSERT(_sc) \
+ mtx_assert(VTNET_CORE_MTX((_sc)), MA_OWNED)
+#define VTNET_CORE_LOCK_ASSERT_NOTOWNED(_sc) \
+ mtx_assert(VTNET_CORE_MTX((_sc)), MA_NOTOWNED)
-#define VTNET_LOCK_INIT(_sc) do { \
+#define VTNET_CORE_LOCK_INIT(_sc) do { \
snprintf((_sc)->vtnet_mtx_name, sizeof((_sc)->vtnet_mtx_name), \
"%s", device_get_nameunit((_sc)->vtnet_dev)); \
- mtx_init(VTNET_MTX((_sc)), (_sc)->vtnet_mtx_name, \
+ mtx_init(VTNET_CORE_MTX((_sc)), (_sc)->vtnet_mtx_name, \
"VTNET Core Lock", MTX_DEF); \
} while (0)
diff --git a/sys/modules/virtio/network/Makefile b/sys/modules/virtio/network/Makefile
index 8463309c..f124d99 100644
--- a/sys/modules/virtio/network/Makefile
+++ b/sys/modules/virtio/network/Makefile
@@ -23,14 +23,29 @@
# SUCH DAMAGE.
#
+.include <bsd.own.mk>
+
.PATH: ${.CURDIR}/../../../dev/virtio/network
KMOD= if_vtnet
SRCS= if_vtnet.c
SRCS+= virtio_bus_if.h virtio_if.h
SRCS+= bus_if.h device_if.h
+SRCS+= opt_inet.h opt_inet6.h
MFILES= kern/bus_if.m kern/device_if.m \
dev/virtio/virtio_bus_if.m dev/virtio/virtio_if.m
+.if !defined(KERNBUILDDIR)
+.if ${MK_INET_SUPPORT} != "no"
+opt_inet.h:
+ @echo "#define INET 1" > ${.TARGET}
+.endif
+
+.if ${MK_INET6_SUPPORT} != "no"
+opt_inet6.h:
+ @echo "#define INET6 1" > ${.TARGET}
+.endif
+.endif
+
.include <bsd.kmod.mk>
OpenPOWER on IntegriCloud