summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/dev/virtio/network/if_vtnet.c211
-rw-r--r--sys/dev/virtio/network/if_vtnetvar.h9
2 files changed, 155 insertions, 65 deletions
diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c
index 17630b9..6d1f13f 100644
--- a/sys/dev/virtio/network/if_vtnet.c
+++ b/sys/dev/virtio/network/if_vtnet.c
@@ -126,6 +126,8 @@ static int vtnet_rxq_eof(struct vtnet_rxq *);
static void vtnet_rx_vq_intr(void *);
static void vtnet_rxq_tq_intr(void *, int);
+static int vtnet_txq_below_threshold(struct vtnet_txq *);
+static int vtnet_txq_notify(struct vtnet_txq *);
static void vtnet_txq_free_mbufs(struct vtnet_txq *);
static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
int *, int *, int *);
@@ -147,7 +149,7 @@ static void vtnet_txq_tq_deferred(void *, int);
#endif
static void vtnet_txq_start(struct vtnet_txq *);
static void vtnet_txq_tq_intr(void *, int);
-static void vtnet_txq_eof(struct vtnet_txq *);
+static int vtnet_txq_eof(struct vtnet_txq *);
static void vtnet_tx_vq_intr(void *);
static void vtnet_tx_start_all(struct vtnet_softc *);
@@ -204,6 +206,8 @@ static void vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
static void vtnet_get_hwaddr(struct vtnet_softc *);
static void vtnet_set_hwaddr(struct vtnet_softc *);
static void vtnet_vlan_tag_remove(struct mbuf *);
+static void vtnet_set_rx_process_limit(struct vtnet_softc *);
+static void vtnet_set_tx_intr_threshold(struct vtnet_softc *);
static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
struct sysctl_oid_list *, struct vtnet_rxq *);
@@ -239,19 +243,6 @@ TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
static int vtnet_rx_process_limit = 512;
TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
-/*
- * Reducing the number of transmit completed interrupts can improve
- * performance. To do so, the define below keeps the Tx vq interrupt
- * disabled and adds calls to vtnet_txeof() in the start and watchdog
- * paths. The price to pay for this is the m_free'ing of transmitted
- * mbufs may be delayed until the watchdog fires.
- *
- * BMV: Reintroduce this later as a run-time option, if it makes
- * sense after the EVENT_IDX feature is supported.
- *
- * #define VTNET_TX_INTR_MODERATION
- */
-
static uma_zone_t vtnet_tx_header_zone;
static struct virtio_feature_desc vtnet_feature_desc[] = {
@@ -901,7 +892,6 @@ vtnet_setup_interface(struct vtnet_softc *sc)
{
device_t dev;
struct ifnet *ifp;
- int limit;
dev = sc->vtnet_dev;
@@ -1000,11 +990,8 @@ vtnet_setup_interface(struct vtnet_softc *sc)
vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
}
- limit = vtnet_tunable_int(sc, "rx_process_limit",
- vtnet_rx_process_limit);
- if (limit < 0)
- limit = INT_MAX;
- sc->vtnet_rx_process_limit = limit;
+ vtnet_set_rx_process_limit(sc);
+ vtnet_set_tx_intr_threshold(sc);
return (0);
}
@@ -1895,6 +1882,44 @@ vtnet_rxq_tq_intr(void *xrxq, int pending)
VTNET_RXQ_UNLOCK(rxq);
}
+static int
+vtnet_txq_below_threshold(struct vtnet_txq *txq)
+{
+ struct vtnet_softc *sc;
+ struct virtqueue *vq;
+
+ sc = txq->vtntx_sc;
+ vq = txq->vtntx_vq;
+
+ return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh);
+}
+
+static int
+vtnet_txq_notify(struct vtnet_txq *txq)
+{
+ struct virtqueue *vq;
+
+ vq = txq->vtntx_vq;
+
+ txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
+ virtqueue_notify(vq);
+
+ if (vtnet_txq_enable_intr(txq) == 0)
+ return (0);
+
+ /*
+ * Drain frames that were completed since last checked. If this
+ * causes the queue to go above the threshold, the caller should
+ * continue transmitting.
+ */
+ if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
+ virtqueue_disable_intr(vq);
+ return (1);
+ }
+
+ return (0);
+}
+
static void
vtnet_txq_free_mbufs(struct vtnet_txq *txq)
{
@@ -2169,11 +2194,11 @@ vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
struct vtnet_softc *sc;
struct virtqueue *vq;
struct mbuf *m0;
- int enq;
+ int tries, enq;
sc = txq->vtntx_sc;
vq = txq->vtntx_vq;
- enq = 0;
+ tries = 0;
VTNET_TXQ_LOCK_ASSERT(txq);
@@ -2183,6 +2208,9 @@ vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
vtnet_txq_eof(txq);
+again:
+ enq = 0;
+
while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
if (virtqueue_full(vq))
break;
@@ -2201,9 +2229,12 @@ vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
ETHER_BPF_MTAP(ifp, m0);
}
- if (enq > 0) {
- virtqueue_notify(vq);
- txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
+ if (enq > 0 && vtnet_txq_notify(txq) != 0) {
+ if (tries++ < VTNET_NOTIFY_RETRIES)
+ goto again;
+
+ txq->vtntx_stats.vtxs_rescheduled++;
+ taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
}
}
@@ -2230,13 +2261,13 @@ vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
struct virtqueue *vq;
struct buf_ring *br;
struct ifnet *ifp;
- int enq, error;
+ int enq, tries, error;
sc = txq->vtntx_sc;
vq = txq->vtntx_vq;
br = txq->vtntx_br;
ifp = sc->vtnet_ifp;
- enq = 0;
+ tries = 0;
error = 0;
VTNET_TXQ_LOCK_ASSERT(txq);
@@ -2256,14 +2287,16 @@ vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
vtnet_txq_eof(txq);
+again:
+ enq = 0;
+
while ((m = drbr_peek(ifp, br)) != NULL) {
if (virtqueue_full(vq)) {
drbr_putback(ifp, br, m);
break;
}
- error = vtnet_txq_encap(txq, &m);
- if (error) {
+ if (vtnet_txq_encap(txq, &m) != 0) {
if (m != NULL)
drbr_putback(ifp, br, m);
else
@@ -2276,9 +2309,12 @@ vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
ETHER_BPF_MTAP(ifp, m);
}
- if (enq > 0) {
- virtqueue_notify(vq);
- txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
+ if (enq > 0 && vtnet_txq_notify(txq) != 0) {
+ if (tries++ < VTNET_NOTIFY_RETRIES)
+ goto again;
+
+ txq->vtntx_stats.vtxs_rescheduled++;
+ taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
}
return (0);
@@ -2366,30 +2402,26 @@ vtnet_txq_tq_intr(void *xtxq, int pending)
}
vtnet_txq_eof(txq);
-
vtnet_txq_start(txq);
- if (vtnet_txq_enable_intr(txq) != 0) {
- vtnet_txq_disable_intr(txq);
- txq->vtntx_stats.vtxs_rescheduled++;
- taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
- }
-
VTNET_TXQ_UNLOCK(txq);
}
-static void
+static int
vtnet_txq_eof(struct vtnet_txq *txq)
{
struct virtqueue *vq;
struct vtnet_tx_header *txhdr;
struct mbuf *m;
+ int deq;
vq = txq->vtntx_vq;
+ deq = 0;
VTNET_TXQ_LOCK_ASSERT(txq);
while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
m = txhdr->vth_mbuf;
+ deq++;
txq->vtntx_stats.vtxs_opackets++;
txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
@@ -2402,6 +2434,8 @@ vtnet_txq_eof(struct vtnet_txq *txq)
if (virtqueue_empty(vq))
txq->vtntx_watchdog = 0;
+
+ return (deq);
}
static void
@@ -2410,12 +2444,10 @@ vtnet_tx_vq_intr(void *xtxq)
struct vtnet_softc *sc;
struct vtnet_txq *txq;
struct ifnet *ifp;
- int tries;
txq = xtxq;
sc = txq->vtntx_sc;
ifp = sc->vtnet_ifp;
- tries = 0;
if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
/*
@@ -2430,30 +2462,15 @@ vtnet_tx_vq_intr(void *xtxq)
VTNET_TXQ_LOCK(txq);
-again:
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
VTNET_TXQ_UNLOCK(txq);
return;
}
vtnet_txq_eof(txq);
-
vtnet_txq_start(txq);
- if (vtnet_txq_enable_intr(txq) != 0) {
- vtnet_txq_disable_intr(txq);
- /*
- * This is an occasional race, so retry a few times
- * before scheduling the taskqueue.
- */
- if (tries++ < VTNET_INTR_DISABLE_RETRIES)
- goto again;
-
- VTNET_TXQ_UNLOCK(txq);
- txq->vtntx_stats.vtxs_rescheduled++;
- taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
- } else
- VTNET_TXQ_UNLOCK(txq);
+ VTNET_TXQ_UNLOCK(txq);
}
static void
@@ -2500,21 +2517,31 @@ vtnet_qflush(struct ifnet *ifp)
static int
vtnet_watchdog(struct vtnet_txq *txq)
{
- struct vtnet_softc *sc;
+ struct ifnet *ifp;
- sc = txq->vtntx_sc;
+ ifp = txq->vtntx_sc->vtnet_ifp;
VTNET_TXQ_LOCK(txq);
- if (sc->vtnet_flags & VTNET_FLAG_EVENT_IDX)
- vtnet_txq_eof(txq);
+ if (txq->vtntx_watchdog == 1) {
+ /*
+ * Only drain completed frames if the watchdog is about to
+ * expire. If any frames were drained, there may be enough
+ * free descriptors now available to transmit queued frames.
+ * In that case, the timer will immediately be decremented
+ * below, but the timeout is generous enough that should not
+ * be a problem.
+ */
+ if (vtnet_txq_eof(txq) != 0)
+ vtnet_txq_start(txq);
+ }
+
if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
VTNET_TXQ_UNLOCK(txq);
return (0);
}
VTNET_TXQ_UNLOCK(txq);
- if_printf(sc->vtnet_ifp, "watchdog timeout on queue %d\n",
- txq->vtntx_id);
+ if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
return (1);
}
@@ -3564,6 +3591,50 @@ vtnet_vlan_tag_remove(struct mbuf *m)
}
static void
+vtnet_set_rx_process_limit(struct vtnet_softc *sc)
+{
+ int limit;
+
+ limit = vtnet_tunable_int(sc, "rx_process_limit",
+ vtnet_rx_process_limit);
+ if (limit < 0)
+ limit = INT_MAX;
+ sc->vtnet_rx_process_limit = limit;
+}
+
+static void
+vtnet_set_tx_intr_threshold(struct vtnet_softc *sc)
+{
+ device_t dev;
+ int size, thresh;
+
+ dev = sc->vtnet_dev;
+ size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq);
+
+ /*
+ * The Tx interrupt is disabled until the queue free count falls
+ * below our threshold. Completed frames are drained from the Tx
+ * virtqueue before transmitting new frames and in the watchdog
+ * callout, so the frequency of Tx interrupts is greatly reduced,
+ * at the cost of not freeing mbufs as quickly as they otherwise
+ * would be.
+ *
+ * N.B. We assume all the Tx queues are the same size.
+ */
+ thresh = size / 4;
+
+ /*
+ * Without indirect descriptors, leave enough room for the most
+ * segments we handle.
+ */
+ if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0 &&
+ thresh < sc->vtnet_tx_nsegs)
+ thresh = sc->vtnet_tx_nsegs;
+
+ sc->vtnet_tx_intr_thresh = thresh;
+}
+
+static void
vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
{
@@ -3758,8 +3829,18 @@ vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
static int
vtnet_txq_enable_intr(struct vtnet_txq *txq)
{
+ struct virtqueue *vq;
+
+ vq = txq->vtntx_vq;
+
+ if (vtnet_txq_below_threshold(txq) != 0)
+ return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
- return (virtqueue_postpone_intr(txq->vtntx_vq, VQ_POSTPONE_LONG));
+ /*
+ * The free count is above our threshold. Keep the Tx interrupt
+ * disabled until the queue is fuller.
+ */
+ return (0);
}
static void
diff --git a/sys/dev/virtio/network/if_vtnetvar.h b/sys/dev/virtio/network/if_vtnetvar.h
index 23c31f4..7249a3e 100644
--- a/sys/dev/virtio/network/if_vtnetvar.h
+++ b/sys/dev/virtio/network/if_vtnetvar.h
@@ -149,6 +149,7 @@ struct vtnet_softc {
int vtnet_rx_nmbufs;
int vtnet_rx_clsize;
int vtnet_rx_new_clsize;
+ int vtnet_tx_intr_thresh;
int vtnet_tx_nsegs;
int vtnet_if_flags;
int vtnet_act_vq_pairs;
@@ -183,6 +184,14 @@ struct vtnet_softc {
#define VTNET_INTR_DISABLE_RETRIES 4
/*
+ * Similarly, additional completed entries can appear in a virtqueue
+ * between when lasted checked and before notifying the host. Number
+ * of times to retry before scheduling the taskqueue to process the
+ * queue.
+ */
+#define VTNET_NOTIFY_RETRIES 4
+
+/*
* Fake the media type. The host does not provide us with any real media
* information.
*/
OpenPOWER on IntegriCloud