MFC r268481:

Rework when the Tx queue completion interrupt is enabled The Tx interrupt is now kept disabled in the common case, only enabled when the number of free descriptors in the queue falls below a threshold. Transmitted frames are cleared from the VQ before subsequent transmit, or in the watchdog timer. This was a very big performance improvement for an experimental Netmap bhyve backend.
author: bryanv <bryanv@FreeBSD.org> 2014-08-22 15:12:20 +0000
committer: bryanv <bryanv@FreeBSD.org> 2014-08-22 15:12:20 +0000
commit: df42e806cb6116755002afbea27aeee87bea245d (patch)
tree: 54424173ea41711fa649ed8c29c83daab308caf7 /sys/dev
parent: 3d5a7426d58b33e4f0df098287e5847aa755b457 (diff)
download: FreeBSD-src-df42e806cb6116755002afbea27aeee87bea245d.zip
FreeBSD-src-df42e806cb6116755002afbea27aeee87bea245d.tar.gz
2 files changed, 155 insertions, 65 deletions
diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c
index 17630b9..6d1f13f 100644
--- a/sys/dev/virtio/network/if_vtnet.c
+++ b/sys/dev/virtio/network/if_vtnet.c
@@ -126,6 +126,8 @@ static int	vtnet_rxq_eof(struct vtnet_rxq *);
 static void	vtnet_rx_vq_intr(void *);
 static void	vtnet_rxq_tq_intr(void *, int);
 
+static int	vtnet_txq_below_threshold(struct vtnet_txq *);
+static int	vtnet_txq_notify(struct vtnet_txq *);
 static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
 static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
 		    int *, int *, int *);
@@ -147,7 +149,7 @@ static void	vtnet_txq_tq_deferred(void *, int);
 #endif
 static void	vtnet_txq_start(struct vtnet_txq *);
 static void	vtnet_txq_tq_intr(void *, int);
-static void	vtnet_txq_eof(struct vtnet_txq *);
+static int	vtnet_txq_eof(struct vtnet_txq *);
 static void	vtnet_tx_vq_intr(void *);
 static void	vtnet_tx_start_all(struct vtnet_softc *);
 
@@ -204,6 +206,8 @@ static void	vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
 static void	vtnet_get_hwaddr(struct vtnet_softc *);
 static void	vtnet_set_hwaddr(struct vtnet_softc *);
 static void	vtnet_vlan_tag_remove(struct mbuf *);
+static void	vtnet_set_rx_process_limit(struct vtnet_softc *);
+static void	vtnet_set_tx_intr_threshold(struct vtnet_softc *);
 
 static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
 		    struct sysctl_oid_list *, struct vtnet_rxq *);
@@ -239,19 +243,6 @@ TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
 static int vtnet_rx_process_limit = 512;
 TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
 
-/*
- * Reducing the number of transmit completed interrupts can improve
- * performance. To do so, the define below keeps the Tx vq interrupt
- * disabled and adds calls to vtnet_txeof() in the start and watchdog
- * paths. The price to pay for this is the m_free'ing of transmitted
- * mbufs may be delayed until the watchdog fires.
- *
- * BMV: Reintroduce this later as a run-time option, if it makes
- * sense after the EVENT_IDX feature is supported.
- *
- * #define VTNET_TX_INTR_MODERATION
- */
-
 static uma_zone_t vtnet_tx_header_zone;
 
 static struct virtio_feature_desc vtnet_feature_desc[] = {
@@ -901,7 +892,6 @@ vtnet_setup_interface(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct ifnet *ifp;
-	int limit;
 
 	dev = sc->vtnet_dev;
 
@@ -1000,11 +990,8 @@ vtnet_setup_interface(struct vtnet_softc *sc)
 		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
 	}
 
-	limit = vtnet_tunable_int(sc, "rx_process_limit",
-	    vtnet_rx_process_limit);
-	if (limit < 0)
-		limit = INT_MAX;
-	sc->vtnet_rx_process_limit = limit;
+	vtnet_set_rx_process_limit(sc);
+	vtnet_set_tx_intr_threshold(sc);
 
 	return (0);
 }
@@ -1895,6 +1882,44 @@ vtnet_rxq_tq_intr(void *xrxq, int pending)
 	VTNET_RXQ_UNLOCK(rxq);
 }
 
+static int
+vtnet_txq_below_threshold(struct vtnet_txq *txq)
+{
+	struct vtnet_softc *sc;
+	struct virtqueue *vq;
+
+	sc = txq->vtntx_sc;
+	vq = txq->vtntx_vq;
+
+	return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh);
+}
+
+static int
+vtnet_txq_notify(struct vtnet_txq *txq)
+{
+	struct virtqueue *vq;
+
+	vq = txq->vtntx_vq;
+
+	txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
+	virtqueue_notify(vq);
+
+	if (vtnet_txq_enable_intr(txq) == 0)
+		return (0);
+
+	/*
+	 * Drain frames that were completed since last checked. If this
+	 * causes the queue to go above the threshold, the caller should
+	 * continue transmitting.
+	 */
+	if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
+		virtqueue_disable_intr(vq);
+		return (1);
+	}
+
+	return (0);
+}
+
 static void
 vtnet_txq_free_mbufs(struct vtnet_txq *txq)
 {
@@ -2169,11 +2194,11 @@ vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct mbuf *m0;
-	int enq;
+	int tries, enq;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
-	enq = 0;
+	tries = 0;
 
 	VTNET_TXQ_LOCK_ASSERT(txq);
 
@@ -2183,6 +2208,9 @@ vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
 
 	vtnet_txq_eof(txq);
 
+again:
+	enq = 0;
+
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
 		if (virtqueue_full(vq))
 			break;
@@ -2201,9 +2229,12 @@ vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
 		ETHER_BPF_MTAP(ifp, m0);
 	}
 
-	if (enq > 0) {
-		virtqueue_notify(vq);
-		txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
+	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
+		if (tries++ < VTNET_NOTIFY_RETRIES)
+			goto again;
+
+		txq->vtntx_stats.vtxs_rescheduled++;
+		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
 	}
 }
 
@@ -2230,13 +2261,13 @@ vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
 	struct virtqueue *vq;
 	struct buf_ring *br;
 	struct ifnet *ifp;
-	int enq, error;
+	int enq, tries, error;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
 	br = txq->vtntx_br;
 	ifp = sc->vtnet_ifp;
-	enq = 0;
+	tries = 0;
 	error = 0;
 
 	VTNET_TXQ_LOCK_ASSERT(txq);
@@ -2256,14 +2287,16 @@ vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
 
 	vtnet_txq_eof(txq);
 
+again:
+	enq = 0;
+
 	while ((m = drbr_peek(ifp, br)) != NULL) {
 		if (virtqueue_full(vq)) {
 			drbr_putback(ifp, br, m);
 			break;
 		}
 
-		error = vtnet_txq_encap(txq, &m);
-		if (error) {
+		if (vtnet_txq_encap(txq, &m) != 0) {
 			if (m != NULL)
 				drbr_putback(ifp, br, m);
 			else
@@ -2276,9 +2309,12 @@ vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
 		ETHER_BPF_MTAP(ifp, m);
 	}
 
-	if (enq > 0) {
-		virtqueue_notify(vq);
-		txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
+	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
+		if (tries++ < VTNET_NOTIFY_RETRIES)
+			goto again;
+
+		txq->vtntx_stats.vtxs_rescheduled++;
+		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
 	}
 
 	return (0);
@@ -2366,30 +2402,26 @@ vtnet_txq_tq_intr(void *xtxq, int pending)
 	}
 
 	vtnet_txq_eof(txq);
-
 	vtnet_txq_start(txq);
 
-	if (vtnet_txq_enable_intr(txq) != 0) {
-		vtnet_txq_disable_intr(txq);
-		txq->vtntx_stats.vtxs_rescheduled++;
-		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
-	}
-
 	VTNET_TXQ_UNLOCK(txq);
 }
 
-static void
+static int
 vtnet_txq_eof(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
 	struct vtnet_tx_header *txhdr;
 	struct mbuf *m;
+	int deq;
 
 	vq = txq->vtntx_vq;
+	deq = 0;
 	VTNET_TXQ_LOCK_ASSERT(txq);
 
 	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
 		m = txhdr->vth_mbuf;
+		deq++;
 
 		txq->vtntx_stats.vtxs_opackets++;
 		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
@@ -2402,6 +2434,8 @@ vtnet_txq_eof(struct vtnet_txq *txq)
 
 	if (virtqueue_empty(vq))
 		txq->vtntx_watchdog = 0;
+
+	return (deq);
 }
 
 static void
@@ -2410,12 +2444,10 @@ vtnet_tx_vq_intr(void *xtxq)
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 	struct ifnet *ifp;
-	int tries;
 
 	txq = xtxq;
 	sc = txq->vtntx_sc;
 	ifp = sc->vtnet_ifp;
-	tries = 0;
 
 	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
 		/*
@@ -2430,30 +2462,15 @@ vtnet_tx_vq_intr(void *xtxq)
 
 	VTNET_TXQ_LOCK(txq);
 
-again:
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		VTNET_TXQ_UNLOCK(txq);
 		return;
 	}
 
 	vtnet_txq_eof(txq);
-
 	vtnet_txq_start(txq);
 
-	if (vtnet_txq_enable_intr(txq) != 0) {
-		vtnet_txq_disable_intr(txq);
-		/*
-		 * This is an occasional race, so retry a few times
-		 * before scheduling the taskqueue.
-		 */
-		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
-			goto again;
-
-		VTNET_TXQ_UNLOCK(txq);
-		txq->vtntx_stats.vtxs_rescheduled++;
-		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
-	} else
-		VTNET_TXQ_UNLOCK(txq);
+	VTNET_TXQ_UNLOCK(txq);
 }
 
 static void
@@ -2500,21 +2517,31 @@ vtnet_qflush(struct ifnet *ifp)
 static int
 vtnet_watchdog(struct vtnet_txq *txq)
 {
-	struct vtnet_softc *sc;
+	struct ifnet *ifp;
 
-	sc = txq->vtntx_sc;
+	ifp = txq->vtntx_sc->vtnet_ifp;
 
 	VTNET_TXQ_LOCK(txq);
-	if (sc->vtnet_flags & VTNET_FLAG_EVENT_IDX)
-		vtnet_txq_eof(txq);
+	if (txq->vtntx_watchdog == 1) {
+		/*
+		 * Only drain completed frames if the watchdog is about to
+		 * expire. If any frames were drained, there may be enough
+		 * free descriptors now available to transmit queued frames.
+		 * In that case, the timer will immediately be decremented
+		 * below, but the timeout is generous enough that should not
+		 * be a problem.
+		 */
+		if (vtnet_txq_eof(txq) != 0)
+			vtnet_txq_start(txq);
+	}
+
 	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
 		VTNET_TXQ_UNLOCK(txq);
 		return (0);
 	}
 	VTNET_TXQ_UNLOCK(txq);
 
-	if_printf(sc->vtnet_ifp, "watchdog timeout on queue %d\n",
-	    txq->vtntx_id);
+	if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
 	return (1);
 }
 
@@ -3564,6 +3591,50 @@ vtnet_vlan_tag_remove(struct mbuf *m)
 }
 
 static void
+vtnet_set_rx_process_limit(struct vtnet_softc *sc)
+{
+	int limit;
+
+	limit = vtnet_tunable_int(sc, "rx_process_limit",
+	    vtnet_rx_process_limit);
+	if (limit < 0)
+		limit = INT_MAX;
+	sc->vtnet_rx_process_limit = limit;
+}
+
+static void
+vtnet_set_tx_intr_threshold(struct vtnet_softc *sc)
+{
+	device_t dev;
+	int size, thresh;
+
+	dev = sc->vtnet_dev;
+	size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq);
+
+	/*
+	 * The Tx interrupt is disabled until the queue free count falls
+	 * below our threshold. Completed frames are drained from the Tx
+	 * virtqueue before transmitting new frames and in the watchdog
+	 * callout, so the frequency of Tx interrupts is greatly reduced,
+	 * at the cost of not freeing mbufs as quickly as they otherwise
+	 * would be.
+	 *
+	 * N.B. We assume all the Tx queues are the same size.
+	 */
+	thresh = size / 4;
+
+	/*
+	 * Without indirect descriptors, leave enough room for the most
+	 * segments we handle.
+	 */
+	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0 &&
+	    thresh < sc->vtnet_tx_nsegs)
+		thresh = sc->vtnet_tx_nsegs;
+
+	sc->vtnet_tx_intr_thresh = thresh;
+}
+
+static void
 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
     struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
 {
@@ -3758,8 +3829,18 @@ vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
 static int
 vtnet_txq_enable_intr(struct vtnet_txq *txq)
 {
+	struct virtqueue *vq;
+
+	vq = txq->vtntx_vq;
+
+	if (vtnet_txq_below_threshold(txq) != 0)
+		return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
 
-	return (virtqueue_postpone_intr(txq->vtntx_vq, VQ_POSTPONE_LONG));
+	/*
+	 * The free count is above our threshold. Keep the Tx interrupt
+	 * disabled until the queue is fuller.
+	 */
+	return (0);
 }
 
 static void
diff --git a/sys/dev/virtio/network/if_vtnetvar.h b/sys/dev/virtio/network/if_vtnetvar.h
index 23c31f4..7249a3e 100644
--- a/sys/dev/virtio/network/if_vtnetvar.h
+++ b/sys/dev/virtio/network/if_vtnetvar.h
@@ -149,6 +149,7 @@ struct vtnet_softc {
 	int			 vtnet_rx_nmbufs;
 	int			 vtnet_rx_clsize;
 	int			 vtnet_rx_new_clsize;
+	int			 vtnet_tx_intr_thresh;
 	int			 vtnet_tx_nsegs;
 	int			 vtnet_if_flags;
 	int			 vtnet_act_vq_pairs;
@@ -183,6 +184,14 @@ struct vtnet_softc {
 #define VTNET_INTR_DISABLE_RETRIES	4
 
 /*
+ * Similarly, additional completed entries can appear in a virtqueue
+ * between when lasted checked and before notifying the host. Number
+ * of times to retry before scheduling the taskqueue to process the
+ * queue.
+ */
+#define VTNET_NOTIFY_RETRIES		4
+
+/*
  * Fake the media type. The host does not provide us with any real media
  * information.
  */
author	bryanv <bryanv@FreeBSD.org>	2014-08-22 15:12:20 +0000
committer	bryanv <bryanv@FreeBSD.org>	2014-08-22 15:12:20 +0000
commit	df42e806cb6116755002afbea27aeee87bea245d (patch)
tree	54424173ea41711fa649ed8c29c83daab308caf7 /sys/dev
parent	3d5a7426d58b33e4f0df098287e5847aa755b457 (diff)
download	FreeBSD-src-df42e806cb6116755002afbea27aeee87bea245d.zip FreeBSD-src-df42e806cb6116755002afbea27aeee87bea245d.tar.gz