diff options
author | adrian <adrian@FreeBSD.org> | 2013-05-21 18:13:57 +0000 |
---|---|---|
committer | adrian <adrian@FreeBSD.org> | 2013-05-21 18:13:57 +0000 |
commit | e1aea355e7d97b836dca0e78673e95e0361ccd6b (patch) | |
tree | 3828059c54a8475bebeb15e51a0c0f27823143ff /sys/dev | |
parent | c283b0b0aedb8f9857faaa84acf02ecc01957958 (diff) | |
download | FreeBSD-src-e1aea355e7d97b836dca0e78673e95e0361ccd6b.zip FreeBSD-src-e1aea355e7d97b836dca0e78673e95e0361ccd6b.tar.gz |
Implement a separate hardware queue threshold for aggregate and non-aggr
traffic.
When transmitting non-aggregate traffic, we need to keep the hardware
busy whilst transmitting or small bursts in txdone/tx latency will
kill us.
This restores non-aggregate iperf performance, especially when doing
TDMA.
Tested:
* AR5416<->AR5416, TDMA
* AR5416 STA <-> AR9280 AP
Diffstat (limited to 'sys/dev')
-rw-r--r-- | sys/dev/ath/if_ath.c | 3 | ||||
-rw-r--r-- | sys/dev/ath/if_ath_sysctl.c | 7 | ||||
-rw-r--r-- | sys/dev/ath/if_ath_tx.c | 53 | ||||
-rw-r--r-- | sys/dev/ath/if_ath_tx.h | 1 | ||||
-rw-r--r-- | sys/dev/ath/if_athvar.h | 13 |
5 files changed, 65 insertions, 12 deletions
diff --git a/sys/dev/ath/if_ath.c b/sys/dev/ath/if_ath.c index 6f5d9bf..cd0e749 100644 --- a/sys/dev/ath/if_ath.c +++ b/sys/dev/ath/if_ath.c @@ -839,7 +839,8 @@ ath_attach(u_int16_t devid, struct ath_softc *sc) /* * Initial aggregation settings. */ - sc->sc_hwq_limit = ATH_AGGR_MIN_QDEPTH; + sc->sc_hwq_limit_aggr = ATH_AGGR_MIN_QDEPTH; + sc->sc_hwq_limit_nonaggr = ATH_NONAGGR_MIN_QDEPTH; sc->sc_tid_hwq_lo = ATH_AGGR_SCHED_LOW; sc->sc_tid_hwq_hi = ATH_AGGR_SCHED_HIGH; sc->sc_aggr_limit = ATH_AGGR_MAXSIZE; diff --git a/sys/dev/ath/if_ath_sysctl.c b/sys/dev/ath/if_ath_sysctl.c index 01ad3c6..0a5719a 100644 --- a/sys/dev/ath/if_ath_sysctl.c +++ b/sys/dev/ath/if_ath_sysctl.c @@ -722,8 +722,11 @@ ath_sysctlattach(struct ath_softc *sc) "mask of error frames to pass when monitoring"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "hwq_limit", CTLFLAG_RW, &sc->sc_hwq_limit, 0, - "Hardware queue depth before software-queuing TX frames"); + "hwq_limit_nonaggr", CTLFLAG_RW, &sc->sc_hwq_limit_nonaggr, 0, + "Hardware non-AMPDU queue depth before software-queuing TX frames"); + SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, + "hwq_limit_aggr", CTLFLAG_RW, &sc->sc_hwq_limit_aggr, 0, + "Hardware AMPDU queue depth before software-queuing TX frames"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "tid_hwq_lo", CTLFLAG_RW, &sc->sc_tid_hwq_lo, 0, ""); diff --git a/sys/dev/ath/if_ath_tx.c b/sys/dev/ath/if_ath_tx.c index aed9825..b8b45fa 100644 --- a/sys/dev/ath/if_ath_tx.c +++ b/sys/dev/ath/if_ath_tx.c @@ -3108,9 +3108,15 @@ ath_tx_swq(struct ath_softc *sc, struct ieee80211_node *ni, * the head frame in the list. Don't schedule the * TID - let it build some more frames first? * + * When running A-MPDU, always just check the hardware + * queue depth against the aggregate frame limit. + * We don't want to burst a large number of single frames + * out to the hardware; we want to aggressively hold back. + * * Otherwise, schedule the TID. */ - if (txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit) { + /* XXX TXQ locking */ + if (txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit_aggr) { bf = ATH_TID_FIRST(atid); ATH_TID_REMOVE(atid, bf, bf_list); @@ -3134,7 +3140,22 @@ ath_tx_swq(struct ath_softc *sc, struct ieee80211_node *ni, ath_tx_tid_sched(sc, atid); } - } else if (txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit) { + /* + * If we're not doing A-MPDU, be prepared to direct dispatch + * up to both limits if possible. This particular corner + * case may end up with packet starvation between aggregate + * traffic and non-aggregate traffic: we wnat to ensure + * that non-aggregate stations get a few frames queued to the + * hardware before the aggregate station(s) get their chance. + * + * So if you only ever see a couple of frames direct dispatched + * to the hardware from a non-AMPDU client, check both here + * and in the software queue dispatcher to ensure that those + * non-AMPDU stations get a fair chance to transmit. + */ + /* XXX TXQ locking */ + } else if ((txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit_nonaggr) && + (txq->axq_aggr_depth < sc->sc_hwq_limit_aggr)) { /* AMPDU not running, attempt direct dispatch */ DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: xmit_normal\n", __func__); /* See if clrdmask needs to be set */ @@ -5339,7 +5360,8 @@ ath_tx_tid_hw_queue_aggr(struct ath_softc *sc, struct ath_node *an, * * XXX locking on txq here? */ - if (txq->axq_aggr_depth >= sc->sc_hwq_limit || + /* XXX TXQ locking */ + if (txq->axq_aggr_depth >= sc->sc_hwq_limit_aggr || (status == ATH_AGGR_BAW_CLOSED || status == ATH_AGGR_LEAK_CLOSED)) break; @@ -5348,6 +5370,15 @@ ath_tx_tid_hw_queue_aggr(struct ath_softc *sc, struct ath_node *an, /* * Schedule some packets from the given node/TID to the hardware. + * + * XXX TODO: this routine doesn't enforce the maximum TXQ depth. + * It just dumps frames into the TXQ. We should limit how deep + * the transmit queue can grow for frames dispatched to the given + * TXQ. + * + * To avoid locking issues, either we need to own the TXQ lock + * at this point, or we need to pass in the maximum frame count + * from the caller. */ void ath_tx_tid_hw_queue_norm(struct ath_softc *sc, struct ath_node *an, @@ -5452,8 +5483,16 @@ ath_txq_sched(struct ath_softc *sc, struct ath_txq *txq) * Don't schedule if the hardware queue is busy. * This (hopefully) gives some more time to aggregate * some packets in the aggregation queue. + * + * XXX It doesn't stop a parallel sender from sneaking + * in transmitting a frame! */ - if (txq->axq_aggr_depth >= sc->sc_hwq_limit) { + /* XXX TXQ locking */ + if (txq->axq_aggr_depth + txq->fifo.axq_depth >= sc->sc_hwq_limit_aggr) { + sc->sc_aggr_stats.aggr_sched_nopkt++; + return; + } + if (txq->axq_depth >= sc->sc_hwq_limit_nonaggr) { sc->sc_aggr_stats.aggr_sched_nopkt++; return; } @@ -5489,7 +5528,11 @@ ath_txq_sched(struct ath_softc *sc, struct ath_txq *txq) * packets. If we aren't running aggregation then * we should still limit the hardware queue depth. */ - if (txq->axq_depth >= sc->sc_hwq_limit) { + /* XXX TXQ locking */ + if (txq->axq_aggr_depth + txq->fifo.axq_depth >= sc->sc_hwq_limit_aggr) { + break; + } + if (txq->axq_depth >= sc->sc_hwq_limit_nonaggr) { break; } diff --git a/sys/dev/ath/if_ath_tx.h b/sys/dev/ath/if_ath_tx.h index 8f3785f..281dbcb 100644 --- a/sys/dev/ath/if_ath_tx.h +++ b/sys/dev/ath/if_ath_tx.h @@ -47,6 +47,7 @@ * How 'busy' to try and keep the hardware txq */ #define ATH_AGGR_MIN_QDEPTH 2 +#define ATH_NONAGGR_MIN_QDEPTH 32 /* * Watermark for scheduling TIDs in order to maximise aggregation. diff --git a/sys/dev/ath/if_athvar.h b/sys/dev/ath/if_athvar.h index f7d262e..092c61e 100644 --- a/sys/dev/ath/if_athvar.h +++ b/sys/dev/ath/if_athvar.h @@ -814,16 +814,21 @@ struct ath_softc { int sc_txq_node_psq_maxdepth; /* - * Aggregation twiddles + * Software queue twiddles * - * hwq_limit: how busy to keep the hardware queue - don't schedule - * further packets to the hardware, regardless of the TID + * hwq_limit_nonaggr: + * when to begin limiting non-aggregate frames to the + * hardware queue, regardless of the TID. + * hwq_limit_aggr: + * when to begin limiting A-MPDU frames to the + * hardware queue, regardless of the TID. * tid_hwq_lo: how low the per-TID hwq count has to be before the * TID will be scheduled again * tid_hwq_hi: how many frames to queue to the HWQ before the TID * stops being scheduled. */ - int sc_hwq_limit; + int sc_hwq_limit_nonaggr; + int sc_hwq_limit_aggr; int sc_tid_hwq_lo; int sc_tid_hwq_hi; |