2 files changed, 155 insertions, 65 deletions
diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c
index 17630b9..6d1f13f 100644
--- a/sys/dev/virtio/network/if_vtnet.c
+++ b/sys/dev/virtio/network/if_vtnet.c
@@ -126,6 +126,8 @@ static int	vtnet_rxq_eof(struct vtnet_rxq *);
 static void	vtnet_rx_vq_intr(void *);
 static void	vtnet_rxq_tq_intr(void *, int);
 
+static int	vtnet_txq_below_threshold(struct vtnet_txq *);
+static int	vtnet_txq_notify(struct vtnet_txq *);
 static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
 static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
 		    int *, int *, int *);
@@ -147,7 +149,7 @@ static void	vtnet_txq_tq_deferred(void *, int);
 #endif
 static void	vtnet_txq_start(struct vtnet_txq *);
 static void	vtnet_txq_tq_intr(void *, int);
-static void	vtnet_txq_eof(struct vtnet_txq *);
+static int	vtnet_txq_eof(struct vtnet_txq *);
 static void	vtnet_tx_vq_intr(void *);
 static void	vtnet_tx_start_all(struct vtnet_softc *);
 
@@ -204,6 +206,8 @@ static void	vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
 static void	vtnet_get_hwaddr(struct vtnet_softc *);
 static void	vtnet_set_hwaddr(struct vtnet_softc *);
 static void	vtnet_vlan_tag_remove(struct mbuf *);
+static void	vtnet_set_rx_process_limit(struct vtnet_softc *);
+static void	vtnet_set_tx_intr_threshold(struct vtnet_softc *);
 
 static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
 		    struct sysctl_oid_list *, struct vtnet_rxq *);
@@ -239,19 +243,6 @@ TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
 static int vtnet_rx_process_limit = 512;
 TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
 
-/*
- * Reducing the number of transmit completed interrupts can improve
- * performance. To do so, the define below keeps the Tx vq interrupt
- * disabled and adds calls to vtnet_txeof() in the start and watchdog
- * paths. The price to pay for this is the m_free'ing of transmitted
- * mbufs may be delayed until the watchdog fires.
- *
- * BMV: Reintroduce this later as a run-time option, if it makes
- * sense after the EVENT_IDX feature is supported.
- *
- * #define VTNET_TX_INTR_MODERATION
- */
-
 static uma_zone_t vtnet_tx_header_zone;
 
 static struct virtio_feature_desc vtnet_feature_desc[] = {
@@ -901,7 +892,6 @@ vtnet_setup_interface(struct vtnet_softc *sc)
 {
 	device_t dev;
 	struct ifnet *ifp;
-	int limit;
 
 	dev = sc->vtnet_dev;
 
@@ -1000,11 +990,8 @@ vtnet_setup_interface(struct vtnet_softc *sc)
 		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
 	}
 
-	limit = vtnet_tunable_int(sc, "rx_process_limit",
-	    vtnet_rx_process_limit);
-	if (limit < 0)
-		limit = INT_MAX;
-	sc->vtnet_rx_process_limit = limit;
+	vtnet_set_rx_process_limit(sc);
+	vtnet_set_tx_intr_threshold(sc);
 
 	return (0);
 }
@@ -1895,6 +1882,44 @@ vtnet_rxq_tq_intr(void *xrxq, int pending)
 	VTNET_RXQ_UNLOCK(rxq);
 }
 
+static int
+vtnet_txq_below_threshold(struct vtnet_txq *txq)
+{
+	struct vtnet_softc *sc;
+	struct virtqueue *vq;
+
+	sc = txq->vtntx_sc;
+	vq = txq->vtntx_vq;
+
+	return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh);
+}
+
+static int
+vtnet_txq_notify(struct vtnet_txq *txq)
+{
+	struct virtqueue *vq;
+
+	vq = txq->vtntx_vq;
+
+	txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
+	virtqueue_notify(vq);
+
+	if (vtnet_txq_enable_intr(txq) == 0)
+		return (0);
+
+	/*
+	 * Drain frames that were completed since last checked. If this
+	 * causes the queue to go above the threshold, the caller should
+	 * continue transmitting.
+	 */
+	if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
+		virtqueue_disable_intr(vq);
+		return (1);
+	}
+
+	return (0);
+}
+
 static void
 vtnet_txq_free_mbufs(struct vtnet_txq *txq)
 {
@@ -2169,11 +2194,11 @@ vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct mbuf *m0;
-	int enq;
+	int tries, enq;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
-	enq = 0;
+	tries = 0;
 
 	VTNET_TXQ_LOCK_ASSERT(txq);
 
@@ -2183,6 +2208,9 @@ vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
 
 	vtnet_txq_eof(txq);
 
+again:
+	enq = 0;
+
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
 		if (virtqueue_full(vq))
 			break;
@@ -2201,9 +2229,12 @@ vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
 		ETHER_BPF_MTAP(ifp, m0);
 	}
 
-	if (enq > 0) {
-		virtqueue_notify(vq);
-		txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
+	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
+		if (tries++ < VTNET_NOTIFY_RETRIES)
+			goto again;
+
+		txq->vtntx_stats.vtxs_rescheduled++;
+		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
 	}
 }
 
@@ -2230,13 +2261,13 @@ vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
 	struct virtqueue *vq;
 	struct buf_ring *br;
 	struct ifnet *ifp;
-	int enq, error;
+	int enq, tries, error;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
 	br = txq->vtntx_br;
 	ifp = sc->vtnet_ifp;
-	enq = 0;
+	tries = 0;
 	error = 0;
 
 	VTNET_TXQ_LOCK_ASSERT(txq);
@@ -2256,14 +2287,16 @@ vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
 
 	vtnet_txq_eof(txq);
 
+again:
+	enq = 0;
+
 	while ((m = drbr_peek(ifp, br)) != NULL) {
 		if (virtqueue_full(vq)) {
 			drbr_putback(ifp, br, m);
 			break;
 		}
 
-		error = vtnet_txq_encap(txq, &m);
-		if (error) {
+		if (vtnet_txq_encap(txq, &m) != 0) {
 			if (m != NULL)
 				drbr_putback(ifp, br, m);
 			else
@@ -2276,9 +2309,12 @@ vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
 		ETHER_BPF_MTAP(ifp, m);
 	}
 
-	if (enq > 0) {
-		virtqueue_notify(vq);
-		txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
+	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
+		if (tries++ < VTNET_NOTIFY_RETRIES)
+			goto again;
+
+		txq->vtntx_stats.vtxs_rescheduled++;
+		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
 	}
 
 	return (0);
@@ -2366,30 +2402,26 @@ vtnet_txq_tq_intr(void *xtxq, int pending)
 	}
 
 	vtnet_txq_eof(txq);
-
 	vtnet_txq_start(txq);
 
-	if (vtnet_txq_enable_intr(txq) != 0) {
-		vtnet_txq_disable_intr(txq);
-		txq->vtntx_stats.vtxs_rescheduled++;
-		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
-	}
-
 	VTNET_TXQ_UNLOCK(txq);
 }
 
-static void
+static int
 vtnet_txq_eof(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
 	struct vtnet_tx_header *txhdr;
 	struct mbuf *m;
+	int deq;
 
 	vq = txq->vtntx_vq;
+	deq = 0;
 	VTNET_TXQ_LOCK_ASSERT(txq);
 
 	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
 		m = txhdr->vth_mbuf;
+		deq++;
 
 		txq->vtntx_stats.vtxs_opackets++;
 		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
@@ -2402,6 +2434,8 @@ vtnet_txq_eof(struct vtnet_txq *txq)
 
 	if (virtqueue_empty(vq))
 		txq->vtntx_watchdog = 0;
+
+	return (deq);
 }
 
 static void
@@ -2410,12 +2444,10 @@ vtnet_tx_vq_intr(void *xtxq)
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 	struct ifnet *ifp;
-	int tries;
 
 	txq = xtxq;
 	sc = txq->vtntx_sc;
 	ifp = sc->vtnet_ifp;
-	tries = 0;
 
 	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
 		/*
@@ -2430,30 +2462,15 @@ vtnet_tx_vq_intr(void *xtxq)
 
 	VTNET_TXQ_LOCK(txq);
 
-again:
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		VTNET_TXQ_UNLOCK(txq);
 		return;
 	}
 
 	vtnet_txq_eof(txq);
-
 	vtnet_txq_start(txq);
 
-	if (vtnet_txq_enable_intr(txq) != 0) {
-		vtnet_txq_disable_intr(txq);
-		/*
-		 * This is an occasional race, so retry a few times
-		 * before scheduling the taskqueue.
-		 */
-		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
-			goto again;
-
-		VTNET_TXQ_UNLOCK(txq);
-		txq->vtntx_stats.vtxs_rescheduled++;
-		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
-	} else
-		VTNET_TXQ_UNLOCK(txq);
+	VTNET_TXQ_UNLOCK(txq);
 }
 
 static void
@@ -2500,21 +2517,31 @@ vtnet_qflush(struct ifnet *ifp)
 static int
 vtnet_watchdog(struct vtnet_txq *txq)
 {
-	struct vtnet_softc *sc;
+	struct ifnet *ifp;
 
-	sc = txq->vtntx_sc;
+	ifp = txq->vtntx_sc->vtnet_ifp;
 
 	VTNET_TXQ_LOCK(txq);
-	if (sc->vtnet_flags & VTNET_FLAG_EVENT_IDX)
-		vtnet_txq_eof(txq);
+	if (txq->vtntx_watchdog == 1) {
+		/*
+		 * Only drain completed frames if the watchdog is about to
+		 * expire. If any frames were drained, there may be enough
+		 * free descriptors now available to transmit queued frames.
+		 * In that case, the timer will immediately be decremented
+		 * below, but the timeout is generous enough that should not
+		 * be a problem.
+		 */
+		if (vtnet_txq_eof(txq) != 0)
+			vtnet_txq_start(txq);
+	}
+
 	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
 		VTNET_TXQ_UNLOCK(txq);
 		return (0);
 	}
 	VTNET_TXQ_UNLOCK(txq);
 
-	if_printf(sc->vtnet_ifp, "watchdog timeout on queue %d\n",
-	    txq->vtntx_id);
+	if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
 	return (1);
 }
 
@@ -3564,6 +3591,50 @@ vtnet_vlan_tag_remove(struct mbuf *m)
 }
 
 static void
+vtnet_set_rx_process_limit(struct vtnet_softc *sc)
+{
+	int limit;
+
+	limit = vtnet_tunable_int(sc, "rx_process_limit",
+	    vtnet_rx_process_limit);
+	if (limit < 0)
+		limit = INT_MAX;
+	sc->vtnet_rx_process_limit = limit;
+}
+
+static void
+vtnet_set_tx_intr_threshold(struct vtnet_softc *sc)
+{
+	device_t dev;
+	int size, thresh;
+
+	dev = sc->vtnet_dev;
+	size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq);
+
+	/*
+	 * The Tx interrupt is disabled until the queue free count falls
+	 * below our threshold. Completed frames are drained from the Tx
+	 * virtqueue before transmitting new frames and in the watchdog
+	 * callout, so the frequency of Tx interrupts is greatly reduced,
+	 * at the cost of not freeing mbufs as quickly as they otherwise
+	 * would be.
+	 *
+	 * N.B. We assume all the Tx queues are the same size.
+	 */
+	thresh = size / 4;
+
+	/*
+	 * Without indirect descriptors, leave enough room for the most
+	 * segments we handle.
+	 */
+	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0 &&
+	    thresh < sc->vtnet_tx_nsegs)
+		thresh = sc->vtnet_tx_nsegs;
+
+	sc->vtnet_tx_intr_thresh = thresh;
+}
+
+static void
 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
     struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
 {
@@ -3758,8 +3829,18 @@ vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
 static int
 vtnet_txq_enable_intr(struct vtnet_txq *txq)
 {
+	struct virtqueue *vq;
+
+	vq = txq->vtntx_vq;
+
+	if (vtnet_txq_below_threshold(txq) != 0)
+		return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
 
-	return (virtqueue_postpone_intr(txq->vtntx_vq, VQ_POSTPONE_LONG));
+	/*
+	 * The free count is above our threshold. Keep the Tx interrupt
+	 * disabled until the queue is fuller.
+	 */
+	return (0);
 }
 
 static void
diff --git a/sys/dev/virtio/network/if_vtnetvar.h b/sys/dev/virtio/network/if_vtnetvar.h
index 23c31f4..7249a3e 100644
--- a/sys/dev/virtio/network/if_vtnetvar.h
+++ b/sys/dev/virtio/network/if_vtnetvar.h
@@ -149,6 +149,7 @@ struct vtnet_softc {
 	int			 vtnet_rx_nmbufs;
 	int			 vtnet_rx_clsize;
 	int			 vtnet_rx_new_clsize;
+	int			 vtnet_tx_intr_thresh;
 	int			 vtnet_tx_nsegs;
 	int			 vtnet_if_flags;
 	int			 vtnet_act_vq_pairs;
@@ -183,6 +184,14 @@ struct vtnet_softc {
 #define VTNET_INTR_DISABLE_RETRIES	4
 
 /*
+ * Similarly, additional completed entries can appear in a virtqueue
+ * between when lasted checked and before notifying the host. Number
+ * of times to retry before scheduling the taskqueue to process the
+ * queue.
+ */
+#define VTNET_NOTIFY_RETRIES		4
+
+/*
  * Fake the media type. The host does not provide us with any real media
  * information.
  */