summaryrefslogtreecommitdiffstats
path: root/sys/dev
diff options
context:
space:
mode:
authoradrian <adrian@FreeBSD.org>2013-05-21 18:13:57 +0000
committeradrian <adrian@FreeBSD.org>2013-05-21 18:13:57 +0000
commite1aea355e7d97b836dca0e78673e95e0361ccd6b (patch)
tree3828059c54a8475bebeb15e51a0c0f27823143ff /sys/dev
parentc283b0b0aedb8f9857faaa84acf02ecc01957958 (diff)
downloadFreeBSD-src-e1aea355e7d97b836dca0e78673e95e0361ccd6b.zip
FreeBSD-src-e1aea355e7d97b836dca0e78673e95e0361ccd6b.tar.gz
Implement a separate hardware queue threshold for aggregate and non-aggr
traffic. When transmitting non-aggregate traffic, we need to keep the hardware busy whilst transmitting or small bursts in txdone/tx latency will kill us. This restores non-aggregate iperf performance, especially when doing TDMA. Tested: * AR5416<->AR5416, TDMA * AR5416 STA <-> AR9280 AP
Diffstat (limited to 'sys/dev')
-rw-r--r--sys/dev/ath/if_ath.c3
-rw-r--r--sys/dev/ath/if_ath_sysctl.c7
-rw-r--r--sys/dev/ath/if_ath_tx.c53
-rw-r--r--sys/dev/ath/if_ath_tx.h1
-rw-r--r--sys/dev/ath/if_athvar.h13
5 files changed, 65 insertions, 12 deletions
diff --git a/sys/dev/ath/if_ath.c b/sys/dev/ath/if_ath.c
index 6f5d9bf..cd0e749 100644
--- a/sys/dev/ath/if_ath.c
+++ b/sys/dev/ath/if_ath.c
@@ -839,7 +839,8 @@ ath_attach(u_int16_t devid, struct ath_softc *sc)
/*
* Initial aggregation settings.
*/
- sc->sc_hwq_limit = ATH_AGGR_MIN_QDEPTH;
+ sc->sc_hwq_limit_aggr = ATH_AGGR_MIN_QDEPTH;
+ sc->sc_hwq_limit_nonaggr = ATH_NONAGGR_MIN_QDEPTH;
sc->sc_tid_hwq_lo = ATH_AGGR_SCHED_LOW;
sc->sc_tid_hwq_hi = ATH_AGGR_SCHED_HIGH;
sc->sc_aggr_limit = ATH_AGGR_MAXSIZE;
diff --git a/sys/dev/ath/if_ath_sysctl.c b/sys/dev/ath/if_ath_sysctl.c
index 01ad3c6..0a5719a 100644
--- a/sys/dev/ath/if_ath_sysctl.c
+++ b/sys/dev/ath/if_ath_sysctl.c
@@ -722,8 +722,11 @@ ath_sysctlattach(struct ath_softc *sc)
"mask of error frames to pass when monitoring");
SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "hwq_limit", CTLFLAG_RW, &sc->sc_hwq_limit, 0,
- "Hardware queue depth before software-queuing TX frames");
+ "hwq_limit_nonaggr", CTLFLAG_RW, &sc->sc_hwq_limit_nonaggr, 0,
+ "Hardware non-AMPDU queue depth before software-queuing TX frames");
+ SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "hwq_limit_aggr", CTLFLAG_RW, &sc->sc_hwq_limit_aggr, 0,
+ "Hardware AMPDU queue depth before software-queuing TX frames");
SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
"tid_hwq_lo", CTLFLAG_RW, &sc->sc_tid_hwq_lo, 0,
"");
diff --git a/sys/dev/ath/if_ath_tx.c b/sys/dev/ath/if_ath_tx.c
index aed9825..b8b45fa 100644
--- a/sys/dev/ath/if_ath_tx.c
+++ b/sys/dev/ath/if_ath_tx.c
@@ -3108,9 +3108,15 @@ ath_tx_swq(struct ath_softc *sc, struct ieee80211_node *ni,
* the head frame in the list. Don't schedule the
* TID - let it build some more frames first?
*
+ * When running A-MPDU, always just check the hardware
+ * queue depth against the aggregate frame limit.
+ * We don't want to burst a large number of single frames
+ * out to the hardware; we want to aggressively hold back.
+ *
* Otherwise, schedule the TID.
*/
- if (txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit) {
+ /* XXX TXQ locking */
+ if (txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit_aggr) {
bf = ATH_TID_FIRST(atid);
ATH_TID_REMOVE(atid, bf, bf_list);
@@ -3134,7 +3140,22 @@ ath_tx_swq(struct ath_softc *sc, struct ieee80211_node *ni,
ath_tx_tid_sched(sc, atid);
}
- } else if (txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit) {
+ /*
+ * If we're not doing A-MPDU, be prepared to direct dispatch
+ * up to both limits if possible. This particular corner
+ * case may end up with packet starvation between aggregate
+ * traffic and non-aggregate traffic: we wnat to ensure
+ * that non-aggregate stations get a few frames queued to the
+ * hardware before the aggregate station(s) get their chance.
+ *
+ * So if you only ever see a couple of frames direct dispatched
+ * to the hardware from a non-AMPDU client, check both here
+ * and in the software queue dispatcher to ensure that those
+ * non-AMPDU stations get a fair chance to transmit.
+ */
+ /* XXX TXQ locking */
+ } else if ((txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit_nonaggr) &&
+ (txq->axq_aggr_depth < sc->sc_hwq_limit_aggr)) {
/* AMPDU not running, attempt direct dispatch */
DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: xmit_normal\n", __func__);
/* See if clrdmask needs to be set */
@@ -5339,7 +5360,8 @@ ath_tx_tid_hw_queue_aggr(struct ath_softc *sc, struct ath_node *an,
*
* XXX locking on txq here?
*/
- if (txq->axq_aggr_depth >= sc->sc_hwq_limit ||
+ /* XXX TXQ locking */
+ if (txq->axq_aggr_depth >= sc->sc_hwq_limit_aggr ||
(status == ATH_AGGR_BAW_CLOSED ||
status == ATH_AGGR_LEAK_CLOSED))
break;
@@ -5348,6 +5370,15 @@ ath_tx_tid_hw_queue_aggr(struct ath_softc *sc, struct ath_node *an,
/*
* Schedule some packets from the given node/TID to the hardware.
+ *
+ * XXX TODO: this routine doesn't enforce the maximum TXQ depth.
+ * It just dumps frames into the TXQ. We should limit how deep
+ * the transmit queue can grow for frames dispatched to the given
+ * TXQ.
+ *
+ * To avoid locking issues, either we need to own the TXQ lock
+ * at this point, or we need to pass in the maximum frame count
+ * from the caller.
*/
void
ath_tx_tid_hw_queue_norm(struct ath_softc *sc, struct ath_node *an,
@@ -5452,8 +5483,16 @@ ath_txq_sched(struct ath_softc *sc, struct ath_txq *txq)
* Don't schedule if the hardware queue is busy.
* This (hopefully) gives some more time to aggregate
* some packets in the aggregation queue.
+ *
+ * XXX It doesn't stop a parallel sender from sneaking
+ * in transmitting a frame!
*/
- if (txq->axq_aggr_depth >= sc->sc_hwq_limit) {
+ /* XXX TXQ locking */
+ if (txq->axq_aggr_depth + txq->fifo.axq_depth >= sc->sc_hwq_limit_aggr) {
+ sc->sc_aggr_stats.aggr_sched_nopkt++;
+ return;
+ }
+ if (txq->axq_depth >= sc->sc_hwq_limit_nonaggr) {
sc->sc_aggr_stats.aggr_sched_nopkt++;
return;
}
@@ -5489,7 +5528,11 @@ ath_txq_sched(struct ath_softc *sc, struct ath_txq *txq)
* packets. If we aren't running aggregation then
* we should still limit the hardware queue depth.
*/
- if (txq->axq_depth >= sc->sc_hwq_limit) {
+ /* XXX TXQ locking */
+ if (txq->axq_aggr_depth + txq->fifo.axq_depth >= sc->sc_hwq_limit_aggr) {
+ break;
+ }
+ if (txq->axq_depth >= sc->sc_hwq_limit_nonaggr) {
break;
}
diff --git a/sys/dev/ath/if_ath_tx.h b/sys/dev/ath/if_ath_tx.h
index 8f3785f..281dbcb 100644
--- a/sys/dev/ath/if_ath_tx.h
+++ b/sys/dev/ath/if_ath_tx.h
@@ -47,6 +47,7 @@
* How 'busy' to try and keep the hardware txq
*/
#define ATH_AGGR_MIN_QDEPTH 2
+#define ATH_NONAGGR_MIN_QDEPTH 32
/*
* Watermark for scheduling TIDs in order to maximise aggregation.
diff --git a/sys/dev/ath/if_athvar.h b/sys/dev/ath/if_athvar.h
index f7d262e..092c61e 100644
--- a/sys/dev/ath/if_athvar.h
+++ b/sys/dev/ath/if_athvar.h
@@ -814,16 +814,21 @@ struct ath_softc {
int sc_txq_node_psq_maxdepth;
/*
- * Aggregation twiddles
+ * Software queue twiddles
*
- * hwq_limit: how busy to keep the hardware queue - don't schedule
- * further packets to the hardware, regardless of the TID
+ * hwq_limit_nonaggr:
+ * when to begin limiting non-aggregate frames to the
+ * hardware queue, regardless of the TID.
+ * hwq_limit_aggr:
+ * when to begin limiting A-MPDU frames to the
+ * hardware queue, regardless of the TID.
* tid_hwq_lo: how low the per-TID hwq count has to be before the
* TID will be scheduled again
* tid_hwq_hi: how many frames to queue to the HWQ before the TID
* stops being scheduled.
*/
- int sc_hwq_limit;
+ int sc_hwq_limit_nonaggr;
+ int sc_hwq_limit_aggr;
int sc_tid_hwq_lo;
int sc_tid_hwq_hi;
OpenPOWER on IntegriCloud