diff options
-rw-r--r-- | sys/dev/ath/ath_rate/amrr/amrr.c | 24 | ||||
-rw-r--r-- | sys/dev/ath/ath_rate/onoe/onoe.c | 24 | ||||
-rw-r--r-- | sys/dev/ath/ath_rate/sample/sample.c | 510 | ||||
-rw-r--r-- | sys/dev/ath/ath_rate/sample/sample.h | 2 | ||||
-rw-r--r-- | sys/dev/ath/if_ath.c | 62 | ||||
-rw-r--r-- | sys/dev/ath/if_ath_misc.h | 3 | ||||
-rw-r--r-- | sys/dev/ath/if_ath_tx.c | 3126 | ||||
-rw-r--r-- | sys/dev/ath/if_ath_tx.h | 65 | ||||
-rw-r--r-- | sys/dev/ath/if_ath_tx_ht.c | 616 | ||||
-rw-r--r-- | sys/dev/ath/if_ath_tx_ht.h | 29 | ||||
-rw-r--r-- | sys/dev/ath/if_athrate.h | 8 |
11 files changed, 4114 insertions, 355 deletions
diff --git a/sys/dev/ath/ath_rate/amrr/amrr.c b/sys/dev/ath/ath_rate/amrr/amrr.c index 5fee76e..b10b826 100644 --- a/sys/dev/ath/ath_rate/amrr/amrr.c +++ b/sys/dev/ath/ath_rate/amrr/amrr.c @@ -122,19 +122,21 @@ ath_rate_findrate(struct ath_softc *sc, struct ath_node *an, */ void ath_rate_getxtxrates(struct ath_softc *sc, struct ath_node *an, - uint8_t rix0, uint8_t *rix, uint8_t *try) + uint8_t rix0, struct ath_rc_series *rc) { struct amrr_node *amn = ATH_NODE_AMRR(an); -/* rix[0] = amn->amn_tx_rate0; */ - rix[1] = amn->amn_tx_rate1; - rix[2] = amn->amn_tx_rate2; - rix[3] = amn->amn_tx_rate3; + rc[0].flags = rc[1].flags = rc[2].flags = rc[3].flags = 0; - try[0] = amn->amn_tx_try0; - try[1] = amn->amn_tx_try1; - try[2] = amn->amn_tx_try2; - try[3] = amn->amn_tx_try3; + rc[0].rix = amn->amn_tx_rate0; + rc[1].rix = amn->amn_tx_rate1; + rc[2].rix = amn->amn_tx_rate2; + rc[3].rix = amn->amn_tx_rate3; + + rc[0].tries = amn->amn_tx_try0; + rc[1].tries = amn->amn_tx_try1; + rc[2].tries = amn->amn_tx_try2; + rc[3].tries = amn->amn_tx_try3; } @@ -153,10 +155,10 @@ ath_rate_setupxtxdesc(struct ath_softc *sc, struct ath_node *an, void ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an, - const struct ath_buf *bf) + const struct ath_rc_series *rc, const struct ath_tx_status *ts, + int frame_size, int nframes, int nbad) { struct amrr_node *amn = ATH_NODE_AMRR(an); - const struct ath_tx_status *ts = &bf->bf_status.ds_txstat; int sr = ts->ts_shortretry; int lr = ts->ts_longretry; int retry_count = sr + lr; diff --git a/sys/dev/ath/ath_rate/onoe/onoe.c b/sys/dev/ath/ath_rate/onoe/onoe.c index 77236ad..b5e2c2d 100644 --- a/sys/dev/ath/ath_rate/onoe/onoe.c +++ b/sys/dev/ath/ath_rate/onoe/onoe.c @@ -130,19 +130,21 @@ ath_rate_findrate(struct ath_softc *sc, struct ath_node *an, */ void ath_rate_getxtxrates(struct ath_softc *sc, struct ath_node *an, - uint8_t rix0, uint8_t *rix, uint8_t *try) + uint8_t rix0, struct ath_rc_series *rc) { struct onoe_node *on = ATH_NODE_ONOE(an); -/* rix[0] = on->on_tx_rate0; */ - rix[1] = on->on_tx_rate1; - rix[2] = on->on_tx_rate2; - rix[3] = on->on_tx_rate3; + rc[0].flags = rc[1].flags = rc[2].flags = rc[3].flags = 0; - try[0] = on->on_tx_try0; - try[1] = 2; - try[2] = 2; - try[3] = 2; + rc[0].rix = on->on_tx_rate0; + rc[1].rix = on->on_tx_rate1; + rc[2].rix = on->on_tx_rate2; + rc[3].rix = on->on_tx_rate3; + + rc[0].tries = on->on_tx_try0; + rc[1].tries = 2; + rc[2].tries = 2; + rc[3].tries = 2; } void @@ -160,10 +162,10 @@ ath_rate_setupxtxdesc(struct ath_softc *sc, struct ath_node *an, void ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an, - const struct ath_buf *bf) + const struct ath_rc_series *rc, const struct ath_tx_status *ts, + int frame_size, int nframes, int nbad) { struct onoe_node *on = ATH_NODE_ONOE(an); - const struct ath_tx_status *ts = &bf->bf_status.ds_txstat; if (ts->ts_status == 0) on->on_tx_ok++; diff --git a/sys/dev/ath/ath_rate/sample/sample.c b/sys/dev/ath/ath_rate/sample/sample.c index 028f69a..e639f6c 100644 --- a/sys/dev/ath/ath_rate/sample/sample.c +++ b/sys/dev/ath/ath_rate/sample/sample.c @@ -170,12 +170,13 @@ pick_best_rate(struct ath_node *an, const HAL_RATE_TABLE *rt, int size_bin, int require_acked_before) { struct sample_node *sn = ATH_NODE_SAMPLE(an); - int best_rate_rix, best_rate_tt; + int best_rate_rix, best_rate_tt, best_rate_pct; uint32_t mask; - int rix, tt; + int rix, tt, pct; best_rate_rix = 0; best_rate_tt = 0; + best_rate_pct = 0; for (mask = sn->ratemask, rix = 0; mask != 0; mask >>= 1, rix++) { if ((mask & 1) == 0) /* not a supported rate */ continue; @@ -192,13 +193,54 @@ pick_best_rate(struct ath_node *an, const HAL_RATE_TABLE *rt, !sn->stats[size_bin][rix].packets_acked)) continue; + /* Calculate percentage if possible */ + if (sn->stats[size_bin][rix].total_packets > 0) { + pct = sn->stats[size_bin][rix].ewma_pct; + } else { + /* XXX for now, assume 95% ok */ + pct = 95; + } + /* don't use a bit-rate that has been failing */ if (sn->stats[size_bin][rix].successive_failures > 3) continue; - if (best_rate_tt == 0 || tt < best_rate_tt) { - best_rate_tt = tt; - best_rate_rix = rix; + /* + * For HT, Don't use a bit rate that is much more + * lossy than the best. + * + * XXX this isn't optimal; it's just designed to + * eliminate rates that are going to be obviously + * worse. + */ + if (an->an_node.ni_flags & IEEE80211_NODE_HT) { + if (best_rate_pct > (pct + 50)) + continue; + } + + /* + * For non-MCS rates, use the current average txtime for + * comparison. + */ + if (! (an->an_node.ni_flags & IEEE80211_NODE_HT)) { + if (best_rate_tt == 0 || tt <= best_rate_tt) { + best_rate_tt = tt; + best_rate_rix = rix; + best_rate_pct = pct; + } + } + + /* + * Since 2 stream rates have slightly higher TX times, + * allow a little bit of leeway. This should later + * be abstracted out and properly handled. + */ + if (an->an_node.ni_flags & IEEE80211_NODE_HT) { + if (best_rate_tt == 0 || (tt * 8 <= best_rate_tt * 10)) { + best_rate_tt = tt; + best_rate_rix = rix; + best_rate_pct = pct; + } } } return (best_rate_tt ? best_rate_rix : -1); @@ -257,6 +299,28 @@ pick_sample_rate(struct sample_softc *ssc , struct ath_node *an, goto nextrate; } + /* + * When doing aggregation, successive failures don't happen + * as often, as sometimes some of the sub-frames get through. + * + * If the sample rix average tx time is greater than the + * average tx time of the current rix, don't immediately use + * the rate for sampling. + */ + if (an->an_node.ni_flags & IEEE80211_NODE_HT) { + if ((sn->stats[size_bin][rix].average_tx_time * 10 > + sn->stats[size_bin][current_rix].average_tx_time * 9) && + (ticks - sn->stats[size_bin][rix].last_tx < ssc->stale_failure_timeout)) { + mask &= ~(1<<rix); + goto nextrate; + } + } + + /* + * XXX TODO + * For HT, limit sample somehow? + */ + /* Don't sample more than 2 rates higher for rates > 11M for non-HT rates */ if (! (an->an_node.ni_flags & IEEE80211_NODE_HT)) { if (DOT11RATE(rix) > 2*11 && rix > current_rix + 2) { @@ -320,6 +384,96 @@ ath_rate_update_static_rix(struct ath_softc *sc, struct ieee80211_node *ni) } } +/* + * Pick a non-HT rate to begin using. + */ +static int +ath_rate_pick_seed_rate_legacy(struct ath_softc *sc, struct ath_node *an, + int frameLen) +{ +#define DOT11RATE(ix) (rt->info[ix].dot11Rate & IEEE80211_RATE_VAL) +#define MCS(ix) (rt->info[ix].dot11Rate | IEEE80211_RATE_MCS) +#define RATE(ix) (DOT11RATE(ix) / 2) + int rix = -1; + const HAL_RATE_TABLE *rt = sc->sc_currates; + struct sample_node *sn = ATH_NODE_SAMPLE(an); + const int size_bin = size_to_bin(frameLen); + + /* no packet has been sent successfully yet */ + for (rix = rt->rateCount-1; rix > 0; rix--) { + if ((sn->ratemask & (1<<rix)) == 0) + continue; + + /* Skip HT rates */ + if (rt->info[rix].phy == IEEE80211_T_HT) + continue; + + /* + * Pick the highest rate <= 36 Mbps + * that hasn't failed. + */ + if (DOT11RATE(rix) <= 72 && + sn->stats[size_bin][rix].successive_failures == 0) { + break; + } + } + return rix; +#undef RATE +#undef MCS +#undef DOT11RATE +} + +/* + * Pick a HT rate to begin using. + * + * Don't use any non-HT rates; only consider HT rates. + */ +static int +ath_rate_pick_seed_rate_ht(struct ath_softc *sc, struct ath_node *an, + int frameLen) +{ +#define DOT11RATE(ix) (rt->info[ix].dot11Rate & IEEE80211_RATE_VAL) +#define MCS(ix) (rt->info[ix].dot11Rate | IEEE80211_RATE_MCS) +#define RATE(ix) (DOT11RATE(ix) / 2) + int rix = -1, ht_rix = -1; + const HAL_RATE_TABLE *rt = sc->sc_currates; + struct sample_node *sn = ATH_NODE_SAMPLE(an); + const int size_bin = size_to_bin(frameLen); + + /* no packet has been sent successfully yet */ + for (rix = rt->rateCount-1; rix > 0; rix--) { + /* Skip rates we can't use */ + if ((sn->ratemask & (1<<rix)) == 0) + continue; + + /* Keep a copy of the last seen HT rate index */ + if (rt->info[rix].phy == IEEE80211_T_HT) + ht_rix = rix; + + /* Skip non-HT rates */ + if (rt->info[rix].phy != IEEE80211_T_HT) + continue; + + /* + * Pick a medium-speed rate regardless of stream count + * which has not seen any failures. Higher rates may fail; + * we'll try them later. + */ + if (((MCS(rix) & 0x7) <= 4) && + sn->stats[size_bin][rix].successive_failures == 0) { + break; + } + } + + /* + * If all the MCS rates have successive failures, rix should be + * > 0; otherwise use the lowest MCS rix (hopefully MCS 0.) + */ + return MAX(rix, ht_rix); +#undef RATE +#undef MCS +#undef DOT11RATE +} void @@ -363,9 +517,14 @@ ath_rate_findrate(struct ath_softc *sc, struct ath_node *an, if (sn->sample_tt[size_bin] < average_tx_time * (sn->packets_since_sample[size_bin]*ssc->sample_rate/100)) { rix = pick_sample_rate(ssc, an, rt, size_bin); IEEE80211_NOTE(an->an_node.ni_vap, IEEE80211_MSG_RATECTL, - &an->an_node, "size %u sample rate %d current rate %d", - bin_to_size(size_bin), RATE(rix), - RATE(sn->current_rix[size_bin])); + &an->an_node, "att %d sample_tt %d size %u sample rate %d %s current rate %d %s", + average_tx_time, + sn->sample_tt[size_bin], + bin_to_size(size_bin), + dot11rate(rt, rix), + dot11rate_label(rt, rix), + dot11rate(rt, sn->current_rix[size_bin]), + dot11rate_label(rt, sn->current_rix[size_bin])); if (rix != sn->current_rix[size_bin]) { sn->current_sample_rix[size_bin] = rix; } else { @@ -376,29 +535,58 @@ ath_rate_findrate(struct ath_softc *sc, struct ath_node *an, change_rates = 0; if (!sn->packets_sent[size_bin] || best_rix == -1) { /* no packet has been sent successfully yet */ - for (rix = rt->rateCount-1; rix > 0; rix--) { - if ((sn->ratemask & (1<<rix)) == 0) - continue; - /* - * Pick the highest rate <= 36 Mbps - * that hasn't failed. - */ - if (DOT11RATE(rix) <= 72 && - sn->stats[size_bin][rix].successive_failures == 0) { - break; - } - } change_rates = 1; - best_rix = rix; + if (an->an_node.ni_flags & IEEE80211_NODE_HT) + best_rix = + ath_rate_pick_seed_rate_ht(sc, an, frameLen); + else + best_rix = + ath_rate_pick_seed_rate_legacy(sc, an, frameLen); } else if (sn->packets_sent[size_bin] < 20) { /* let the bit-rate switch quickly during the first few packets */ + IEEE80211_NOTE(an->an_node.ni_vap, + IEEE80211_MSG_RATECTL, &an->an_node, + "%s: switching quickly..", __func__); change_rates = 1; } else if (ticks - ssc->min_switch > sn->ticks_since_switch[size_bin]) { /* min_switch seconds have gone by */ + IEEE80211_NOTE(an->an_node.ni_vap, + IEEE80211_MSG_RATECTL, &an->an_node, + "%s: min_switch %d > ticks_since_switch %d..", + __func__, ticks - ssc->min_switch, sn->ticks_since_switch[size_bin]); change_rates = 1; - } else if (2*average_tx_time < sn->stats[size_bin][sn->current_rix[size_bin]].average_tx_time) { + } else if ((! (an->an_node.ni_flags & IEEE80211_NODE_HT)) && + (2*average_tx_time < sn->stats[size_bin][sn->current_rix[size_bin]].average_tx_time)) { /* the current bit-rate is twice as slow as the best one */ + IEEE80211_NOTE(an->an_node.ni_vap, + IEEE80211_MSG_RATECTL, &an->an_node, + "%s: 2x att (= %d) < cur_rix att %d", + __func__, + 2 * average_tx_time, sn->stats[size_bin][sn->current_rix[size_bin]].average_tx_time); change_rates = 1; + } else if ((an->an_node.ni_flags & IEEE80211_NODE_HT)) { + int cur_rix = sn->current_rix[size_bin]; + int cur_att = sn->stats[size_bin][cur_rix].average_tx_time; + /* + * If the node is HT, upgrade it if the MCS rate is + * higher and the average tx time is within 20% of + * the current rate. It can fail a little. + * + * This is likely not optimal! + */ +#if 0 + printf("cur rix/att %x/%d, best rix/att %x/%d\n", + MCS(cur_rix), cur_att, MCS(best_rix), average_tx_time); +#endif + if ((MCS(best_rix) > MCS(cur_rix)) && + (average_tx_time * 8) <= (cur_att * 10)) { + IEEE80211_NOTE(an->an_node.ni_vap, + IEEE80211_MSG_RATECTL, &an->an_node, + "%s: HT: best_rix 0x%d > cur_rix 0x%x, average_tx_time %d, cur_att %d", + __func__, + MCS(best_rix), MCS(cur_rix), average_tx_time, cur_att); + change_rates = 1; + } } sn->packets_since_sample[size_bin]++; @@ -450,22 +638,24 @@ done: */ void ath_rate_getxtxrates(struct ath_softc *sc, struct ath_node *an, - uint8_t rix0, uint8_t *rix, uint8_t *try) + uint8_t rix0, struct ath_rc_series *rc) { struct sample_node *sn = ATH_NODE_SAMPLE(an); const struct txschedule *sched = &sn->sched[rix0]; KASSERT(rix0 == sched->r0, ("rix0 (%x) != sched->r0 (%x)!\n", rix0, sched->r0)); -/* rix[0] = sched->r0; */ - rix[1] = sched->r1; - rix[2] = sched->r2; - rix[3] = sched->r3; + rc[0].flags = rc[1].flags = rc[2].flags = rc[3].flags = 0; + + rc[0].rix = sched->r0; + rc[1].rix = sched->r1; + rc[2].rix = sched->r2; + rc[3].rix = sched->r3; - try[0] = sched->t0; - try[1] = sched->t1; - try[2] = sched->t2; - try[3] = sched->t3; + rc[0].tries = sched->t0; + rc[1].tries = sched->t1; + rc[2].tries = sched->t2; + rc[3].tries = sched->t3; } void @@ -493,6 +683,71 @@ ath_rate_setupxtxdesc(struct ath_softc *sc, struct ath_node *an, s3code, sched->t3); /* series 3 */ } +/* + * Update the EWMA percentage. + * + * This is a simple hack to track an EWMA based on the current + * rate scenario. For the rate codes which failed, this will + * record a 0% against it. For the rate code which succeeded, + * EWMA will record the nbad*100/nframes percentage against it. + */ +static void +update_ewma_stats(struct ath_softc *sc, struct ath_node *an, + int frame_size, + int rix0, int tries0, + int rix1, int tries1, + int rix2, int tries2, + int rix3, int tries3, + int short_tries, int tries, int status, + int nframes, int nbad) +{ + struct sample_node *sn = ATH_NODE_SAMPLE(an); + struct sample_softc *ssc = ATH_SOFTC_SAMPLE(sc); + const int size_bin = size_to_bin(frame_size); + int tries_so_far; + int pct; + int rix = rix0; + + /* Calculate percentage based on current rate */ + if (nframes == 0) + nframes = nbad = 1; + pct = ((nframes - nbad) * 1000) / nframes; + + /* Figure out which rate index succeeded */ + tries_so_far = tries0; + + if (tries1 && tries_so_far < tries) { + tries_so_far += tries1; + rix = rix1; + /* XXX bump ewma pct */ + } + + if (tries2 && tries_so_far < tries) { + tries_so_far += tries2; + rix = rix2; + /* XXX bump ewma pct */ + } + + if (tries3 && tries_so_far < tries) { + rix = rix3; + /* XXX bump ewma pct */ + } + + /* rix is the successful rate, update EWMA for final rix */ + if (sn->stats[size_bin][rix].total_packets < + ssc->smoothing_minpackets) { + /* just average the first few packets */ + int a_pct = (sn->stats[size_bin][rix].packets_acked * 1000) / + (sn->stats[size_bin][rix].total_packets); + sn->stats[size_bin][rix].ewma_pct = a_pct; + } else { + /* use a ewma */ + sn->stats[size_bin][rix].ewma_pct = + ((sn->stats[size_bin][rix].ewma_pct * ssc->smoothing_rate) + + (pct * (100 - ssc->smoothing_rate))) / 100; + } +} + static void update_stats(struct ath_softc *sc, struct ath_node *an, int frame_size, @@ -500,10 +755,12 @@ update_stats(struct ath_softc *sc, struct ath_node *an, int rix1, int tries1, int rix2, int tries2, int rix3, int tries3, - int short_tries, int tries, int status) + int short_tries, int tries, int status, + int nframes, int nbad) { struct sample_node *sn = ATH_NODE_SAMPLE(an); struct sample_softc *ssc = ATH_SOFTC_SAMPLE(sc); + const HAL_RATE_TABLE *rt = sc->sc_currates; const int size_bin = size_to_bin(frame_size); const int size = bin_to_size(size_bin); int tt, tries_so_far; @@ -542,7 +799,7 @@ update_stats(struct ath_softc *sc, struct ath_node *an, /* just average the first few packets */ int avg_tx = sn->stats[size_bin][rix0].average_tx_time; int packets = sn->stats[size_bin][rix0].total_packets; - sn->stats[size_bin][rix0].average_tx_time = (tt+(avg_tx*packets))/(packets+1); + sn->stats[size_bin][rix0].average_tx_time = (tt+(avg_tx*packets))/(packets+nframes); } else { /* use a ewma */ sn->stats[size_bin][rix0].average_tx_time = @@ -550,38 +807,50 @@ update_stats(struct ath_softc *sc, struct ath_node *an, (tt * (100 - ssc->smoothing_rate))) / 100; } - if (status != 0) { + /* + * XXX Don't mark the higher bit rates as also having failed; as this + * unfortunately stops those rates from being tasted when trying to + * TX. This happens with 11n aggregation. + */ + if (nframes == nbad) { +#if 0 int y; - sn->stats[size_bin][rix0].successive_failures++; +#endif + sn->stats[size_bin][rix0].successive_failures += nbad; +#if 0 for (y = size_bin+1; y < NUM_PACKET_SIZE_BINS; y++) { /* * Also say larger packets failed since we * assume if a small packet fails at a * bit-rate then a larger one will also. */ - sn->stats[y][rix0].successive_failures++; + sn->stats[y][rix0].successive_failures += nbad; sn->stats[y][rix0].last_tx = ticks; sn->stats[y][rix0].tries += tries; - sn->stats[y][rix0].total_packets++; + sn->stats[y][rix0].total_packets += nframes; } +#endif } else { - sn->stats[size_bin][rix0].packets_acked++; + sn->stats[size_bin][rix0].packets_acked += (nframes - nbad); sn->stats[size_bin][rix0].successive_failures = 0; } sn->stats[size_bin][rix0].tries += tries; sn->stats[size_bin][rix0].last_tx = ticks; - sn->stats[size_bin][rix0].total_packets++; + sn->stats[size_bin][rix0].total_packets += nframes; if (rix0 == sn->current_sample_rix[size_bin]) { IEEE80211_NOTE(an->an_node.ni_vap, IEEE80211_MSG_RATECTL, &an->an_node, -"%s: size %d %s sample rate %d tries (%d/%d) tt %d avg_tt (%d/%d)", +"%s: size %d %s sample rate %d %s tries (%d/%d) tt %d avg_tt (%d/%d) nfrm %d nbad %d", __func__, size, status ? "FAIL" : "OK", - rix0, short_tries, tries, tt, + dot11rate(rt, rix0), + dot11rate_label(rt, rix0), + short_tries, tries, tt, sn->stats[size_bin][rix0].average_tx_time, - sn->stats[size_bin][rix0].perfect_tx_time); + sn->stats[size_bin][rix0].perfect_tx_time, + nframes, nbad); sn->sample_tt[size_bin] = tt; sn->current_sample_rix[size_bin] = -1; } @@ -596,21 +865,21 @@ badrate(struct ifnet *ifp, int series, int hwrate, int tries, int status) void ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an, - const struct ath_buf *bf) + const struct ath_rc_series *rc, const struct ath_tx_status *ts, + int frame_size, int nframes, int nbad) { struct ifnet *ifp = sc->sc_ifp; struct ieee80211com *ic = ifp->if_l2com; struct sample_node *sn = ATH_NODE_SAMPLE(an); - const struct ath_tx_status *ts = &bf->bf_status.ds_txstat; - const struct ath_desc *ds0 = &bf->bf_desc[0]; - int final_rix, short_tries, long_tries, frame_size; + int final_rix, short_tries, long_tries; const HAL_RATE_TABLE *rt = sc->sc_currates; + int status = ts->ts_status; int mrr; final_rix = rt->rateCodeToIndex[ts->ts_rate]; short_tries = ts->ts_shortretry; long_tries = ts->ts_longretry + 1; - frame_size = ds0->ds_ctl0 & 0x0fff; /* low-order 12 bits of ds_ctl0 */ + if (frame_size == 0) /* NB: should not happen */ frame_size = 1500; @@ -620,63 +889,73 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an, "%s: size %d %s rate/try %d/%d no rates yet", __func__, bin_to_size(size_to_bin(frame_size)), - ts->ts_status ? "FAIL" : "OK", + status ? "FAIL" : "OK", short_tries, long_tries); return; } mrr = sc->sc_mrretry && !(ic->ic_flags & IEEE80211_F_USEPROT); if (!mrr || ts->ts_finaltsi == 0) { if (!IS_RATE_DEFINED(sn, final_rix)) { - badrate(ifp, 0, ts->ts_rate, long_tries, ts->ts_status); + badrate(ifp, 0, ts->ts_rate, long_tries, status); return; } /* * Only one rate was used; optimize work. */ IEEE80211_NOTE(an->an_node.ni_vap, IEEE80211_MSG_RATECTL, - &an->an_node, "%s: size %d (%d bytes) %s rate/try %d %s/%d/%d", + &an->an_node, "%s: size %d (%d bytes) %s rate/try %d %s/%d/%d nframes/nbad [%d/%d]", __func__, bin_to_size(size_to_bin(frame_size)), frame_size, - ts->ts_status ? "FAIL" : "OK", - dot11rate(rt, final_rix), dot11rate_label(rt, final_rix), short_tries, long_tries); + status ? "FAIL" : "OK", + dot11rate(rt, final_rix), dot11rate_label(rt, final_rix), + short_tries, long_tries, nframes, nbad); update_stats(sc, an, frame_size, final_rix, long_tries, 0, 0, 0, 0, 0, 0, - short_tries, long_tries, ts->ts_status); + short_tries, long_tries, status, + nframes, nbad); + update_ewma_stats(sc, an, frame_size, + final_rix, long_tries, + 0, 0, + 0, 0, + 0, 0, + short_tries, long_tries, status, + nframes, nbad); + } else { - int hwrates[4], tries[4], rix[4]; int finalTSIdx = ts->ts_finaltsi; int i; /* * Process intermediate rates that failed. */ - ath_hal_gettxcompletionrates(sc->sc_ah, ds0, hwrates, tries); - - for (i = 0; i < 4; i++) { - rix[i] = rt->rateCodeToIndex[hwrates[i]]; - } IEEE80211_NOTE(an->an_node.ni_vap, IEEE80211_MSG_RATECTL, &an->an_node, -"%s: size %d (%d bytes) finaltsidx %d tries %d %s rate/try [%d %s/%d %d %s/%d %d %s/%d %d %s/%d]", +"%s: size %d (%d bytes) finaltsidx %d tries %d %s rate/try [%d %s/%d %d %s/%d %d %s/%d %d %s/%d] nframes/nbad [%d/%d]", __func__, bin_to_size(size_to_bin(frame_size)), frame_size, finalTSIdx, - long_tries, - ts->ts_status ? "FAIL" : "OK", - dot11rate(rt, rix[0]), dot11rate_label(rt, rix[0]), tries[0], - dot11rate(rt, rix[1]), dot11rate_label(rt, rix[1]), tries[1], - dot11rate(rt, rix[2]), dot11rate_label(rt, rix[2]), tries[2], - dot11rate(rt, rix[3]), dot11rate_label(rt, rix[3]), tries[3]); + long_tries, + status ? "FAIL" : "OK", + dot11rate(rt, rc[0].rix), + dot11rate_label(rt, rc[0].rix), rc[0].tries, + dot11rate(rt, rc[1].rix), + dot11rate_label(rt, rc[1].rix), rc[1].tries, + dot11rate(rt, rc[2].rix), + dot11rate_label(rt, rc[2].rix), rc[2].tries, + dot11rate(rt, rc[3].rix), + dot11rate_label(rt, rc[3].rix), rc[3].tries, + nframes, nbad); for (i = 0; i < 4; i++) { - if (tries[i] && !IS_RATE_DEFINED(sn, rix[i])) - badrate(ifp, 0, hwrates[i], tries[i], ts->ts_status); + if (rc[i].tries && !IS_RATE_DEFINED(sn, rc[i].rix)) + badrate(ifp, 0, rc[i].ratecode, rc[i].tries, + status); } /* @@ -686,48 +965,62 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an, * sample higher rates 1 try at a time doing so * may unfairly penalize them. */ - if (tries[0]) { - update_stats(sc, an, frame_size, - rix[0], tries[0], - rix[1], tries[1], - rix[2], tries[2], - rix[3], tries[3], - short_tries, long_tries, - long_tries > tries[0]); - long_tries -= tries[0]; + if (rc[0].tries) { + update_stats(sc, an, frame_size, + rc[0].rix, rc[0].tries, + rc[1].rix, rc[1].tries, + rc[2].rix, rc[2].tries, + rc[3].rix, rc[3].tries, + short_tries, long_tries, + long_tries > rc[0].tries, + nframes, nbad); + long_tries -= rc[0].tries; } - if (tries[1] && finalTSIdx > 0) { - update_stats(sc, an, frame_size, - rix[1], tries[1], - rix[2], tries[2], - rix[3], tries[3], - 0, 0, - short_tries, long_tries, - ts->ts_status); - long_tries -= tries[1]; + if (rc[1].tries && finalTSIdx > 0) { + update_stats(sc, an, frame_size, + rc[1].rix, rc[1].tries, + rc[2].rix, rc[2].tries, + rc[3].rix, rc[3].tries, + 0, 0, + short_tries, long_tries, + status, + nframes, nbad); + long_tries -= rc[1].tries; } - if (tries[2] && finalTSIdx > 1) { - update_stats(sc, an, frame_size, - rix[2], tries[2], - rix[3], tries[3], + if (rc[2].tries && finalTSIdx > 1) { + update_stats(sc, an, frame_size, + rc[2].rix, rc[2].tries, + rc[3].rix, rc[3].tries, 0, 0, 0, 0, - short_tries, long_tries, - ts->ts_status); - long_tries -= tries[2]; + short_tries, long_tries, + status, + nframes, nbad); + long_tries -= rc[2].tries; } - if (tries[3] && finalTSIdx > 2) { - update_stats(sc, an, frame_size, - rix[3], tries[3], + if (rc[3].tries && finalTSIdx > 2) { + update_stats(sc, an, frame_size, + rc[3].rix, rc[3].tries, 0, 0, 0, 0, 0, 0, - short_tries, long_tries, - ts->ts_status); + short_tries, long_tries, + status, + nframes, nbad); } + + update_ewma_stats(sc, an, frame_size, + rc[0].rix, rc[0].tries, + rc[1].rix, rc[1].tries, + rc[2].rix, rc[2].tries, + rc[3].rix, rc[3].tries, + short_tries, long_tries, + long_tries > rc[0].tries, + nframes, nbad); + } } @@ -849,6 +1142,7 @@ ath_rate_ctl_reset(struct ath_softc *sc, struct ieee80211_node *ni) sn->stats[y][rix].total_packets = 0; sn->stats[y][rix].packets_acked = 0; sn->stats[y][rix].last_tx = 0; + sn->stats[y][rix].ewma_pct = 0; sn->stats[y][rix].perfect_tx_time = calc_usecs_unicast_packet(sc, size, rix, 0, 0, @@ -886,18 +1180,24 @@ sample_stats(void *arg, struct ieee80211_node *ni) uint32_t mask; int rix, y; - printf("\n[%s] refcnt %d static_rix %d ratemask 0x%x\n", + printf("\n[%s] refcnt %d static_rix (%d %s) ratemask 0x%x\n", ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni), - sn->static_rix, sn->ratemask); + dot11rate(rt, sn->static_rix), + dot11rate_label(rt, sn->static_rix), + sn->ratemask); for (y = 0; y < NUM_PACKET_SIZE_BINS; y++) { printf("[%4u] cur rix %d (%d %s) since switch: packets %d ticks %u\n", bin_to_size(y), sn->current_rix[y], dot11rate(rt, sn->current_rix[y]), dot11rate_label(rt, sn->current_rix[y]), sn->packets_since_switch[y], sn->ticks_since_switch[y]); - printf("[%4u] last sample %d cur sample %d packets sent %d\n", - bin_to_size(y), sn->last_sample_rix[y], - sn->current_sample_rix[y], sn->packets_sent[y]); + printf("[%4u] last sample (%d %s) cur sample (%d %s) packets sent %d\n", + bin_to_size(y), + dot11rate(rt, sn->last_sample_rix[y]), + dot11rate_label(rt, sn->last_sample_rix[y]), + dot11rate(rt, sn->current_sample_rix[y]), + dot11rate_label(rt, sn->current_sample_rix[y]), + sn->packets_sent[y]); printf("[%4u] packets since sample %d sample tt %u\n", bin_to_size(y), sn->packets_since_sample[y], sn->sample_tt[y]); @@ -908,13 +1208,15 @@ sample_stats(void *arg, struct ieee80211_node *ni) for (y = 0; y < NUM_PACKET_SIZE_BINS; y++) { if (sn->stats[y][rix].total_packets == 0) continue; - printf("[%2u %s:%4u] %8ju:%-8ju (%3d%%) T %8ju F %4d avg %5u last %u\n", + printf("[%2u %s:%4u] %8ju:%-8ju (%3d%%) (EWMA %3d.%1d%%) T %8ju F %4d avg %5u last %u\n", dot11rate(rt, rix), dot11rate_label(rt, rix), bin_to_size(y), (uintmax_t) sn->stats[y][rix].total_packets, (uintmax_t) sn->stats[y][rix].packets_acked, (int) ((sn->stats[y][rix].packets_acked * 100ULL) / sn->stats[y][rix].total_packets), + sn->stats[y][rix].ewma_pct / 10, + sn->stats[y][rix].ewma_pct % 10, (uintmax_t) sn->stats[y][rix].tries, sn->stats[y][rix].successive_failures, sn->stats[y][rix].average_tx_time, diff --git a/sys/dev/ath/ath_rate/sample/sample.h b/sys/dev/ath/ath_rate/sample/sample.h index 1ec13b4..805ae46 100644 --- a/sys/dev/ath/ath_rate/sample/sample.h +++ b/sys/dev/ath/ath_rate/sample/sample.h @@ -51,6 +51,7 @@ struct sample_softc { int max_successive_failures; int stale_failure_timeout; /* how long to honor max_successive_failures */ int min_switch; /* min time between rate changes */ + int min_good_pct; /* min good percentage for a rate to be considered */ }; #define ATH_SOFTC_SAMPLE(sc) ((struct sample_softc *)sc->sc_rc) @@ -60,6 +61,7 @@ struct rate_stats { uint64_t tries; uint64_t total_packets; /* pkts total since assoc */ uint64_t packets_acked; /* pkts acked since assoc */ + int ewma_pct; /* EWMA percentage */ unsigned perfect_tx_time; /* transmit time for 0 retries */ int last_tx; }; diff --git a/sys/dev/ath/if_ath.c b/sys/dev/ath/if_ath.c index ef3db94..210b965 100644 --- a/sys/dev/ath/if_ath.c +++ b/sys/dev/ath/if_ath.c @@ -723,6 +723,19 @@ ath_attach(u_int16_t devid, struct ath_softc *sc) ic->ic_scan_end = ath_scan_end; ic->ic_set_channel = ath_set_channel; + /* 802.11n specific - but just override anyway */ + sc->sc_addba_request = ic->ic_addba_request; + sc->sc_addba_response = ic->ic_addba_response; + sc->sc_addba_stop = ic->ic_addba_stop; + sc->sc_bar_response = ic->ic_bar_response; + sc->sc_addba_response_timeout = ic->ic_addba_response_timeout; + + ic->ic_addba_request = ath_addba_request; + ic->ic_addba_response = ath_addba_response; + ic->ic_addba_response_timeout = ath_addba_response_timeout; + ic->ic_addba_stop = ath_addba_stop; + ic->ic_bar_response = ath_bar_response; + ieee80211_radiotap_attach(ic, &sc->sc_tx_th.wt_ihdr, sizeof(sc->sc_tx_th), ATH_TX_RADIOTAP_PRESENT, @@ -3343,6 +3356,9 @@ ath_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) device_get_nameunit(sc->sc_dev), an); mtx_init(&an->an_mtx, an->an_name, NULL, MTX_DEF); + /* XXX setup ath_tid */ + ath_tx_tid_init(sc, an); + DPRINTF(sc, ATH_DEBUG_NODE, "%s: an %p\n", __func__, an); return &an->an_node; } @@ -3354,6 +3370,7 @@ ath_node_cleanup(struct ieee80211_node *ni) struct ath_softc *sc = ic->ic_ifp->if_softc; /* Cleanup ath_tid, free unused bufs, unlink bufs in TXQ */ + ath_tx_node_flush(sc, ATH_NODE(ni)); ath_rate_node_cleanup(sc, ATH_NODE(ni)); sc->sc_node_cleanup(ni); } @@ -4364,6 +4381,29 @@ ath_tx_default_comp(struct ath_softc *sc, struct ath_buf *bf, int fail) } /* + * Update rate control with the given completion status. + */ +void +ath_tx_update_ratectrl(struct ath_softc *sc, struct ieee80211_node *ni, + struct ath_rc_series *rc, struct ath_tx_status *ts, int frmlen, + int nframes, int nbad) +{ + struct ath_node *an; + + /* Only for unicast frames */ + if (ni == NULL) + return; + + an = ATH_NODE(ni); + + if ((ts->ts_status & HAL_TXERR_FILT) == 0) { + ATH_NODE_LOCK(an); + ath_rate_tx_complete(sc, an, rc, ts, frmlen, nframes, nbad); + ATH_NODE_UNLOCK(an); + } +} + +/* * Update the busy status of the last frame on the free list. * When doing TDMA, the busy flag tracks whether the hardware * currently points to this buffer or not, and thus gated DMA @@ -4396,6 +4436,8 @@ ath_tx_update_busy(struct ath_softc *sc) /* * Process completed xmit descriptors from the specified queue. + * Kick the packet scheduler if needed. This can occur from this + * particular task. */ static int ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq, int dosched) @@ -4405,6 +4447,7 @@ ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq, int dosched) struct ath_desc *ds; struct ath_tx_status *ts; struct ieee80211_node *ni; + struct ath_node *an; int nacked; HAL_STATUS status; @@ -4471,6 +4514,7 @@ ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq, int dosched) /* If unicast frame, update general statistics */ if (ni != NULL) { + an = ATH_NODE(ni); /* update statistics */ ath_tx_update_stats(sc, ts, bf); } @@ -4490,7 +4534,10 @@ ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq, int dosched) * XXX assume this isn't an aggregate * frame. */ - ath_rate_tx_complete(sc, ATH_NODE(ni), bf); + ath_tx_update_ratectrl(sc, ni, + bf->bf_state.bfs_rc, ts, + bf->bf_state.bfs_pktlen, 1, + (ts->ts_status == 0 ? 0 : 1)); } ath_tx_default_comp(sc, bf, 0); } else @@ -4503,6 +4550,14 @@ ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq, int dosched) if (txq->axq_depth <= 1) ieee80211_ff_flush(ic, txq->axq_ac); #endif + + /* Kick the TXQ scheduler */ + if (dosched) { + ATH_TXQ_LOCK(txq); + ath_txq_sched(sc, txq); + ATH_TXQ_UNLOCK(txq); + } + return nacked; } @@ -4736,6 +4791,11 @@ ath_tx_draintxq(struct ath_softc *sc, struct ath_txq *txq) ath_tx_default_comp(sc, bf, 1); } + /* + * Drain software queued frames which are on + * active TIDs. + */ + ath_tx_txq_drain(sc, txq); } static void diff --git a/sys/dev/ath/if_ath_misc.h b/sys/dev/ath/if_ath_misc.h index 6f2b2e6..c48590e 100644 --- a/sys/dev/ath/if_ath_misc.h +++ b/sys/dev/ath/if_ath_misc.h @@ -60,6 +60,9 @@ extern int ath_reset(struct ifnet *, ATH_RESET_TYPE); extern void ath_tx_draintxq(struct ath_softc *sc, struct ath_txq *txq); extern void ath_tx_default_comp(struct ath_softc *sc, struct ath_buf *bf, int fail); +extern void ath_tx_update_ratectrl(struct ath_softc *sc, + struct ieee80211_node *ni, struct ath_rc_series *rc, + struct ath_tx_status *ts, int frmlen, int nframes, int nbad); extern void ath_tx_freebuf(struct ath_softc *sc, struct ath_buf *bf, int status); diff --git a/sys/dev/ath/if_ath_tx.c b/sys/dev/ath/if_ath_tx.c index 8de688a..3793c72 100644 --- a/sys/dev/ath/if_ath_tx.c +++ b/sys/dev/ath/if_ath_tx.c @@ -77,6 +77,7 @@ __FBSDID("$FreeBSD$"); #ifdef IEEE80211_SUPPORT_TDMA #include <net80211/ieee80211_tdma.h> #endif +#include <net80211/ieee80211_ht.h> #include <net/bpf.h> @@ -100,6 +101,20 @@ __FBSDID("$FreeBSD$"); #include <dev/ath/if_ath_tx_ht.h> /* + * How many retries to perform in software + */ +#define SWMAX_RETRIES 10 + +static int ath_tx_ampdu_pending(struct ath_softc *sc, struct ath_node *an, + int tid); +static int ath_tx_ampdu_running(struct ath_softc *sc, struct ath_node *an, + int tid); +static ieee80211_seq ath_tx_tid_seqno_assign(struct ath_softc *sc, + struct ieee80211_node *ni, struct ath_buf *bf, struct mbuf *m0); +static int ath_tx_action_frame_override_queue(struct ath_softc *sc, + struct ieee80211_node *ni, struct mbuf *m0, int *tid); + +/* * Whether to use the 11n rate scenario functions or not */ static inline int @@ -108,6 +123,56 @@ ath_tx_is_11n(struct ath_softc *sc) return (sc->sc_ah->ah_magic == 0x20065416); } +/* + * Obtain the current TID from the given frame. + * + * Non-QoS frames need to go into TID 16 (IEEE80211_NONQOS_TID.) + * This has implications for which AC/priority the packet is placed + * in. + */ +static int +ath_tx_gettid(struct ath_softc *sc, const struct mbuf *m0) +{ + const struct ieee80211_frame *wh; + int pri = M_WME_GETAC(m0); + + wh = mtod(m0, const struct ieee80211_frame *); + if (! IEEE80211_QOS_HAS_SEQ(wh)) + return IEEE80211_NONQOS_TID; + else + return WME_AC_TO_TID(pri); +} + +/* + * Determine what the correct AC queue for the given frame + * should be. + * + * This code assumes that the TIDs map consistently to + * the underlying hardware (or software) ath_txq. + * Since the sender may try to set an AC which is + * arbitrary, non-QoS TIDs may end up being put on + * completely different ACs. There's no way to put a + * TID into multiple ath_txq's for scheduling, so + * for now we override the AC/TXQ selection and set + * non-QOS TID frames into the BE queue. + * + * This may be completely incorrect - specifically, + * some management frames may end up out of order + * compared to the QoS traffic they're controlling. + * I'll look into this later. + */ +static int +ath_tx_getac(struct ath_softc *sc, const struct mbuf *m0) +{ + const struct ieee80211_frame *wh; + int pri = M_WME_GETAC(m0); + wh = mtod(m0, const struct ieee80211_frame *); + if (IEEE80211_QOS_HAS_SEQ(wh)) + return pri; + + return WME_AC_BE; +} + void ath_txfrag_cleanup(struct ath_softc *sc, ath_bufhead *frags, struct ieee80211_node *ni) @@ -229,7 +294,7 @@ ath_tx_dmasetup(struct ath_softc *sc, struct ath_buf *bf, struct mbuf *m0) * Chain together segments+descriptors for a non-11n frame. */ static void -ath_tx_chaindesclist(struct ath_softc *sc, struct ath_txq *txq, struct ath_buf *bf) +ath_tx_chaindesclist(struct ath_softc *sc, struct ath_buf *bf) { struct ath_hal *ah = sc->sc_ah; struct ath_desc *ds, *ds0; @@ -259,13 +324,168 @@ ath_tx_chaindesclist(struct ath_softc *sc, struct ath_txq *txq, struct ath_buf * } } +/* + * Fill in the descriptor list for a aggregate subframe. + * + * The subframe is returned with the ds_link field in the last subframe + * pointing to 0. + */ static void -ath_tx_handoff(struct ath_softc *sc, struct ath_txq *txq, struct ath_buf *bf) +ath_tx_chaindesclist_subframe(struct ath_softc *sc, struct ath_buf *bf) { struct ath_hal *ah = sc->sc_ah; + struct ath_desc *ds, *ds0; + int i; - /* Fill in the details in the descriptor list */ - ath_tx_chaindesclist(sc, txq, bf); + ds0 = ds = bf->bf_desc; + + /* + * There's no need to call ath_hal_setupfirsttxdesc here; + * That's only going to occur for the first frame in an aggregate. + */ + for (i = 0; i < bf->bf_nseg; i++, ds++) { + ds->ds_data = bf->bf_segs[i].ds_addr; + if (i == bf->bf_nseg - 1) + ds->ds_link = 0; + else + ds->ds_link = bf->bf_daddr + sizeof(*ds) * (i + 1); + + /* + * This performs the setup for an aggregate frame. + * This includes enabling the aggregate flags if needed. + */ + ath_hal_chaintxdesc(ah, ds, + bf->bf_state.bfs_pktlen, + bf->bf_state.bfs_hdrlen, + HAL_PKT_TYPE_AMPDU, /* forces aggregate bits to be set */ + bf->bf_state.bfs_keyix, + 0, /* cipher, calculated from keyix */ + bf->bf_state.bfs_ndelim, + bf->bf_segs[i].ds_len, /* segment length */ + i == 0, /* first segment */ + i == bf->bf_nseg - 1 /* last segment */ + ); + + DPRINTF(sc, ATH_DEBUG_XMIT, + "%s: %d: %08x %08x %08x %08x %08x %08x\n", + __func__, i, ds->ds_link, ds->ds_data, + ds->ds_ctl0, ds->ds_ctl1, ds->ds_hw[0], ds->ds_hw[1]); + bf->bf_lastds = ds; + } +} + +/* + * Setup segments+descriptors for an 11n aggregate. + * bf_first is the first buffer in the aggregate. + * The descriptor list must already been linked together using + * bf->bf_next. + */ +static void +ath_tx_setds_11n(struct ath_softc *sc, struct ath_buf *bf_first) +{ + struct ath_buf *bf, *bf_prev = NULL; + + DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: nframes=%d, al=%d\n", + __func__, bf_first->bf_state.bfs_nframes, + bf_first->bf_state.bfs_al); + + /* + * Setup all descriptors of all subframes. + */ + bf = bf_first; + while (bf != NULL) { + DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, + "%s: bf=%p, nseg=%d, pktlen=%d, seqno=%d\n", + __func__, bf, bf->bf_nseg, bf->bf_state.bfs_pktlen, + SEQNO(bf->bf_state.bfs_seqno)); + + /* Sub-frame setup */ + ath_tx_chaindesclist_subframe(sc, bf); + + /* + * Link the last descriptor of the previous frame + * to the beginning descriptor of this frame. + */ + if (bf_prev != NULL) + bf_prev->bf_lastds->ds_link = bf->bf_daddr; + + /* Save a copy so we can link the next descriptor in */ + bf_prev = bf; + bf = bf->bf_next; + } + + /* + * Setup first descriptor of first frame. + * chaintxdesc() overwrites the descriptor entries; + * setupfirsttxdesc() merges in things. + * Otherwise various fields aren't set correctly (eg flags). + */ + ath_hal_setupfirsttxdesc(sc->sc_ah, + bf_first->bf_desc, + bf_first->bf_state.bfs_al, + bf_first->bf_state.bfs_flags | HAL_TXDESC_INTREQ, + bf_first->bf_state.bfs_txpower, + bf_first->bf_state.bfs_txrate0, + bf_first->bf_state.bfs_try0, + bf_first->bf_state.bfs_txantenna, + bf_first->bf_state.bfs_ctsrate, + bf_first->bf_state.bfs_ctsduration); + + /* + * Setup the last descriptor in the list. + * bf_prev points to the last; bf is NULL here. + */ + ath_hal_setuplasttxdesc(sc->sc_ah, bf_prev->bf_desc, bf_first->bf_desc); + + /* + * Set the first descriptor bf_lastds field to point to + * the last descriptor in the last subframe, that's where + * the status update will occur. + */ + bf_first->bf_lastds = bf_prev->bf_lastds; + + /* + * And bf_last in the first descriptor points to the end of + * the aggregate list. + */ + bf_first->bf_last = bf_prev; + + DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: end\n", __func__); +} + +static void +ath_tx_handoff_mcast(struct ath_softc *sc, struct ath_txq *txq, + struct ath_buf *bf) +{ + ATH_TXQ_LOCK_ASSERT(txq); + KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0, + ("%s: busy status 0x%x", __func__, bf->bf_flags)); + if (txq->axq_link != NULL) { + struct ath_buf *last = ATH_TXQ_LAST(txq, axq_q_s); + struct ieee80211_frame *wh; + + /* mark previous frame */ + wh = mtod(last->bf_m, struct ieee80211_frame *); + wh->i_fc[1] |= IEEE80211_FC1_MORE_DATA; + bus_dmamap_sync(sc->sc_dmat, last->bf_dmamap, + BUS_DMASYNC_PREWRITE); + + /* link descriptor */ + *txq->axq_link = bf->bf_daddr; + } + ATH_TXQ_INSERT_TAIL(txq, bf, bf_list); + txq->axq_link = &bf->bf_lastds->ds_link; +} + + + +/* + * Hand-off packet to a hardware queue. + */ +static void +ath_tx_handoff_hw(struct ath_softc *sc, struct ath_txq *txq, struct ath_buf *bf) +{ + struct ath_hal *ah = sc->sc_ah; /* * Insert the frame on the outbound list and pass it on @@ -275,10 +495,14 @@ ath_tx_handoff(struct ath_softc *sc, struct ath_txq *txq, struct ath_buf *bf) * the SWBA handler since frames only go out on DTIM and * to avoid possible races. */ - ATH_TXQ_LOCK(txq); + ATH_TXQ_LOCK_ASSERT(txq); KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0, - ("busy status 0x%x", bf->bf_flags)); - if (txq->axq_qnum != ATH_TXQ_SWQ) { + ("%s: busy status 0x%x", __func__, bf->bf_flags)); + KASSERT(txq->axq_qnum != ATH_TXQ_SWQ, + ("ath_tx_handoff_hw called for mcast queue")); + + /* For now, so not to generate whitespace diffs */ + if (1) { #ifdef IEEE80211_SUPPORT_TDMA int qbusy; @@ -354,24 +578,48 @@ ath_tx_handoff(struct ath_softc *sc, struct ath_txq *txq, struct ath_buf *bf) txq->axq_aggr_depth++; txq->axq_link = &bf->bf_lastds->ds_link; ath_hal_txstart(ah, txq->axq_qnum); - } else { - if (txq->axq_link != NULL) { - struct ath_buf *last = ATH_TXQ_LAST(txq, axq_q_s); - struct ieee80211_frame *wh; + } +} + +/* + * Restart TX DMA for the given TXQ. + * + * This must be called whether the queue is empty or not. + */ +void +ath_txq_restart_dma(struct ath_softc *sc, struct ath_txq *txq) +{ + struct ath_hal *ah = sc->sc_ah; + struct ath_buf *bf; - /* mark previous frame */ - wh = mtod(last->bf_m, struct ieee80211_frame *); - wh->i_fc[1] |= IEEE80211_FC1_MORE_DATA; - bus_dmamap_sync(sc->sc_dmat, last->bf_dmamap, - BUS_DMASYNC_PREWRITE); + ATH_TXQ_LOCK_ASSERT(txq); - /* link descriptor */ - *txq->axq_link = bf->bf_daddr; - } - ATH_TXQ_INSERT_TAIL(txq, bf, bf_list); - txq->axq_link = &bf->bf_desc[bf->bf_nseg - 1].ds_link; - } - ATH_TXQ_UNLOCK(txq); + /* This is always going to be cleared, empty or not */ + txq->axq_flags &= ~ATH_TXQ_PUTPENDING; + + bf = TAILQ_FIRST(&txq->axq_q); + if (bf == NULL) + return; + + ath_hal_puttxbuf(ah, txq->axq_qnum, bf->bf_daddr); + txq->axq_link = &bf->bf_lastds->ds_link; + ath_hal_txstart(ah, txq->axq_qnum); +} + +/* + * Hand off a packet to the hardware (or mcast queue.) + * + * The relevant hardware txq should be locked. + */ +static void +ath_tx_handoff(struct ath_softc *sc, struct ath_txq *txq, struct ath_buf *bf) +{ + ATH_TXQ_LOCK_ASSERT(txq); + + if (txq->axq_qnum == ATH_TXQ_SWQ) + ath_tx_handoff_mcast(sc, txq, bf); + else + ath_tx_handoff_hw(sc, txq, bf); } static int @@ -427,7 +675,7 @@ ath_tx_tag_crypto(struct ath_softc *sc, struct ieee80211_node *ni, static uint8_t ath_tx_get_rtscts_rate(struct ath_hal *ah, const HAL_RATE_TABLE *rt, - int rix, int cix, int shortPreamble) + int cix, int shortPreamble) { uint8_t ctsrate; @@ -447,7 +695,6 @@ ath_tx_get_rtscts_rate(struct ath_hal *ah, const HAL_RATE_TABLE *rt, return ctsrate; } - /* * Calculate the RTS/CTS duration for legacy frames. */ @@ -493,9 +740,238 @@ ath_tx_calc_ctsduration(struct ath_hal *ah, int rix, int cix, return ctsduration; } -int -ath_tx_start(struct ath_softc *sc, struct ieee80211_node *ni, struct ath_buf *bf, - struct mbuf *m0) +/* + * Update the given ath_buf with updated rts/cts setup and duration + * values. + * + * To support rate lookups for each software retry, the rts/cts rate + * and cts duration must be re-calculated. + * + * This function assumes the RTS/CTS flags have been set as needed; + * mrr has been disabled; and the rate control lookup has been done. + * + * XXX TODO: MRR need only be disabled for the pre-11n NICs. + * XXX The 11n NICs support per-rate RTS/CTS configuration. + */ +static void +ath_tx_set_rtscts(struct ath_softc *sc, struct ath_buf *bf) +{ + uint16_t ctsduration = 0; + uint8_t ctsrate = 0; + uint8_t rix = bf->bf_state.bfs_rc[0].rix; + uint8_t cix = 0; + const HAL_RATE_TABLE *rt = sc->sc_currates; + + /* + * No RTS/CTS enabled? Don't bother. + */ + if ((bf->bf_state.bfs_flags & + (HAL_TXDESC_RTSENA | HAL_TXDESC_CTSENA)) == 0) { + /* XXX is this really needed? */ + bf->bf_state.bfs_ctsrate = 0; + bf->bf_state.bfs_ctsduration = 0; + return; + } + + /* + * If protection is enabled, use the protection rix control + * rate. Otherwise use the rate0 control rate. + */ + if (bf->bf_state.bfs_doprot) + rix = sc->sc_protrix; + else + rix = bf->bf_state.bfs_rc[0].rix; + + /* + * If the raw path has hard-coded ctsrate0 to something, + * use it. + */ + if (bf->bf_state.bfs_ctsrate0 != 0) + cix = ath_tx_findrix(sc, bf->bf_state.bfs_ctsrate0); + else + /* Control rate from above */ + cix = rt->info[rix].controlRate; + + /* Calculate the rtscts rate for the given cix */ + ctsrate = ath_tx_get_rtscts_rate(sc->sc_ah, rt, cix, + bf->bf_state.bfs_shpream); + + /* The 11n chipsets do ctsduration calculations for you */ + if (! ath_tx_is_11n(sc)) + ctsduration = ath_tx_calc_ctsduration(sc->sc_ah, rix, cix, + bf->bf_state.bfs_shpream, bf->bf_state.bfs_pktlen, + rt, bf->bf_state.bfs_flags); + + /* Squirrel away in ath_buf */ + bf->bf_state.bfs_ctsrate = ctsrate; + bf->bf_state.bfs_ctsduration = ctsduration; + + /* + * Must disable multi-rate retry when using RTS/CTS. + * XXX TODO: only for pre-11n NICs. + */ + bf->bf_state.bfs_ismrr = 0; + bf->bf_state.bfs_try0 = + bf->bf_state.bfs_rc[0].tries = ATH_TXMGTTRY; /* XXX ew */ +} + +/* + * Setup the descriptor chain for a normal or fast-frame + * frame. + */ +static void +ath_tx_setds(struct ath_softc *sc, struct ath_buf *bf) +{ + struct ath_desc *ds = bf->bf_desc; + struct ath_hal *ah = sc->sc_ah; + + ath_hal_setuptxdesc(ah, ds + , bf->bf_state.bfs_pktlen /* packet length */ + , bf->bf_state.bfs_hdrlen /* header length */ + , bf->bf_state.bfs_atype /* Atheros packet type */ + , bf->bf_state.bfs_txpower /* txpower */ + , bf->bf_state.bfs_txrate0 + , bf->bf_state.bfs_try0 /* series 0 rate/tries */ + , bf->bf_state.bfs_keyix /* key cache index */ + , bf->bf_state.bfs_txantenna /* antenna mode */ + , bf->bf_state.bfs_flags /* flags */ + , bf->bf_state.bfs_ctsrate /* rts/cts rate */ + , bf->bf_state.bfs_ctsduration /* rts/cts duration */ + ); + + /* + * This will be overriden when the descriptor chain is written. + */ + bf->bf_lastds = ds; + bf->bf_last = bf; + + /* XXX TODO: Setup descriptor chain */ +} + +/* + * Do a rate lookup. + * + * This performs a rate lookup for the given ath_buf only if it's required. + * Non-data frames and raw frames don't require it. + * + * This populates the primary and MRR entries; MRR values are + * then disabled later on if something requires it (eg RTS/CTS on + * pre-11n chipsets. + * + * This needs to be done before the RTS/CTS fields are calculated + * as they may depend upon the rate chosen. + */ +static void +ath_tx_do_ratelookup(struct ath_softc *sc, struct ath_buf *bf) +{ + uint8_t rate, rix; + int try0; + + if (! bf->bf_state.bfs_doratelookup) + return; + + /* Get rid of any previous state */ + bzero(bf->bf_state.bfs_rc, sizeof(bf->bf_state.bfs_rc)); + + ATH_NODE_LOCK(ATH_NODE(bf->bf_node)); + ath_rate_findrate(sc, ATH_NODE(bf->bf_node), bf->bf_state.bfs_shpream, + bf->bf_state.bfs_pktlen, &rix, &try0, &rate); + + /* In case MRR is disabled, make sure rc[0] is setup correctly */ + bf->bf_state.bfs_rc[0].rix = rix; + bf->bf_state.bfs_rc[0].ratecode = rate; + bf->bf_state.bfs_rc[0].tries = try0; + + if (bf->bf_state.bfs_ismrr && try0 != ATH_TXMAXTRY) + ath_rate_getxtxrates(sc, ATH_NODE(bf->bf_node), rix, + bf->bf_state.bfs_rc); + ATH_NODE_UNLOCK(ATH_NODE(bf->bf_node)); + + sc->sc_txrix = rix; /* for LED blinking */ + sc->sc_lastdatarix = rix; /* for fast frames */ + bf->bf_state.bfs_try0 = try0; + bf->bf_state.bfs_txrate0 = rate; +} + +/* + * Set the rate control fields in the given descriptor based on + * the bf_state fields and node state. + * + * The bfs fields should already be set with the relevant rate + * control information, including whether MRR is to be enabled. + * + * Since the FreeBSD HAL currently sets up the first TX rate + * in ath_hal_setuptxdesc(), this will setup the MRR + * conditionally for the pre-11n chips, and call ath_buf_set_rate + * unconditionally for 11n chips. These require the 11n rate + * scenario to be set if MCS rates are enabled, so it's easier + * to just always call it. The caller can then only set rates 2, 3 + * and 4 if multi-rate retry is needed. + */ +static void +ath_tx_set_ratectrl(struct ath_softc *sc, struct ieee80211_node *ni, + struct ath_buf *bf) +{ + struct ath_rc_series *rc = bf->bf_state.bfs_rc; + + /* If mrr is disabled, blank tries 1, 2, 3 */ + if (! bf->bf_state.bfs_ismrr) + rc[1].tries = rc[2].tries = rc[3].tries = 0; + + /* + * Always call - that way a retried descriptor will + * have the MRR fields overwritten. + * + * XXX TODO: see if this is really needed - setting up + * the first descriptor should set the MRR fields to 0 + * for us anyway. + */ + if (ath_tx_is_11n(sc)) { + ath_buf_set_rate(sc, ni, bf); + } else { + ath_hal_setupxtxdesc(sc->sc_ah, bf->bf_desc + , rc[1].ratecode, rc[1].tries + , rc[2].ratecode, rc[2].tries + , rc[3].ratecode, rc[3].tries + ); + } +} + +/* + * Transmit the given frame to the hardware. + * + * The frame must already be setup; rate control must already have + * been done. + * + * XXX since the TXQ lock is being held here (and I dislike holding + * it for this long when not doing software aggregation), later on + * break this function into "setup_normal" and "xmit_normal". The + * lock only needs to be held for the ath_tx_handoff call. + */ +static void +ath_tx_xmit_normal(struct ath_softc *sc, struct ath_txq *txq, + struct ath_buf *bf) +{ + + ATH_TXQ_LOCK_ASSERT(txq); + + /* Setup the descriptor before handoff */ + ath_tx_do_ratelookup(sc, bf); + ath_tx_rate_fill_rcflags(sc, bf); + ath_tx_set_rtscts(sc, bf); + ath_tx_setds(sc, bf); + ath_tx_set_ratectrl(sc, bf->bf_node, bf); + ath_tx_chaindesclist(sc, bf); + + /* Hand off to hardware */ + ath_tx_handoff(sc, txq, bf); +} + + + +static int +ath_tx_normal_setup(struct ath_softc *sc, struct ieee80211_node *ni, + struct ath_buf *bf, struct mbuf *m0) { struct ieee80211vap *vap = ni->ni_vap; struct ath_vap *avp = ATH_VAP(vap); @@ -504,22 +980,17 @@ ath_tx_start(struct ath_softc *sc, struct ieee80211_node *ni, struct ath_buf *bf struct ieee80211com *ic = ifp->if_l2com; const struct chanAccParams *cap = &ic->ic_wme.wme_chanParams; int error, iswep, ismcast, isfrag, ismrr; - int keyix, hdrlen, pktlen, try0; - u_int8_t rix, txrate, ctsrate; - u_int8_t cix = 0xff; /* NB: silence compiler */ + int keyix, hdrlen, pktlen, try0 = 0; + u_int8_t rix = 0, txrate = 0; struct ath_desc *ds; struct ath_txq *txq; struct ieee80211_frame *wh; - u_int subtype, flags, ctsduration; + u_int subtype, flags; HAL_PKT_TYPE atype; const HAL_RATE_TABLE *rt; HAL_BOOL shortPreamble; struct ath_node *an; u_int pri; - uint8_t try[4], rate[4]; - - bzero(try, sizeof(try)); - bzero(rate, sizeof(rate)); wh = mtod(m0, struct ieee80211_frame *); iswep = wh->i_fc[1] & IEEE80211_FC1_WEP; @@ -533,7 +1004,8 @@ ath_tx_start(struct ath_softc *sc, struct ieee80211_node *ni, struct ath_buf *bf pktlen = m0->m_pkthdr.len - (hdrlen & 3); /* Handle encryption twiddling if needed */ - if (! ath_tx_tag_crypto(sc, ni, m0, iswep, isfrag, &hdrlen, &pktlen, &keyix)) { + if (! ath_tx_tag_crypto(sc, ni, m0, iswep, isfrag, &hdrlen, + &pktlen, &keyix)) { ath_freetx(m0); return EIO; } @@ -629,12 +1101,12 @@ ath_tx_start(struct ath_softc *sc, struct ieee80211_node *ni, struct ath_buf *bf txrate |= rt->info[rix].shortPreamble; try0 = ATH_TXMAXTRY; /* XXX?too many? */ } else { - ath_rate_findrate(sc, an, shortPreamble, pktlen, - &rix, &try0, &txrate); - sc->sc_txrix = rix; /* for LED blinking */ - sc->sc_lastdatarix = rix; /* for fast frames */ - if (try0 != ATH_TXMAXTRY) - ismrr = 1; + /* + * Do rate lookup on each TX, rather than using + * the hard-coded TX information decided here. + */ + ismrr = 1; + bf->bf_state.bfs_doratelookup = 1; } if (cap->cap_wmeParams[pri].wmep_noackPolicy) flags |= HAL_TXDESC_NOACK; @@ -665,7 +1137,6 @@ ath_tx_start(struct ath_softc *sc, struct ieee80211_node *ni, struct ath_buf *bf } else if (pktlen > vap->iv_rtsthreshold && (ni->ni_ath_flags & IEEE80211_NODE_FF) == 0) { flags |= HAL_TXDESC_RTSENA; /* RTS based on frame length */ - cix = rt->info[rix].controlRate; sc->sc_stats.ast_tx_rts++; } if (flags & HAL_TXDESC_NOACK) /* NB: avoid double counting */ @@ -688,22 +1159,20 @@ ath_tx_start(struct ath_softc *sc, struct ieee80211_node *ni, struct ath_buf *bf if ((ic->ic_flags & IEEE80211_F_USEPROT) && rt->info[rix].phy == IEEE80211_T_OFDM && (flags & HAL_TXDESC_NOACK) == 0) { + bf->bf_state.bfs_doprot = 1; /* XXX fragments must use CCK rates w/ protection */ - if (ic->ic_protmode == IEEE80211_PROT_RTSCTS) + if (ic->ic_protmode == IEEE80211_PROT_RTSCTS) { flags |= HAL_TXDESC_RTSENA; - else if (ic->ic_protmode == IEEE80211_PROT_CTSONLY) + } else if (ic->ic_protmode == IEEE80211_PROT_CTSONLY) { flags |= HAL_TXDESC_CTSENA; - if (isfrag) { - /* - * For frags it would be desirable to use the - * highest CCK rate for RTS/CTS. But stations - * farther away may detect it at a lower CCK rate - * so use the configured protection rate instead - * (for now). - */ - cix = rt->info[sc->sc_protrix].controlRate; - } else - cix = rt->info[sc->sc_protrix].controlRate; + } + /* + * For frags it would be desirable to use the + * highest CCK rate for RTS/CTS. But stations + * farther away may detect it at a lower CCK rate + * so use the configured protection rate instead + * (for now). + */ sc->sc_stats.ast_tx_protect++; } @@ -761,23 +1230,28 @@ ath_tx_start(struct ath_softc *sc, struct ieee80211_node *ni, struct ath_buf *bf } /* - * Calculate RTS/CTS rate and duration if needed. + * Determine if a tx interrupt should be generated for + * this descriptor. We take a tx interrupt to reap + * descriptors when the h/w hits an EOL condition or + * when the descriptor is specifically marked to generate + * an interrupt. We periodically mark descriptors in this + * way to insure timely replenishing of the supply needed + * for sending frames. Defering interrupts reduces system + * load and potentially allows more concurrent work to be + * done but if done to aggressively can cause senders to + * backup. + * + * NB: use >= to deal with sc_txintrperiod changing + * dynamically through sysctl. */ - ctsduration = 0; - if (flags & (HAL_TXDESC_RTSENA|HAL_TXDESC_CTSENA)) { - ctsrate = ath_tx_get_rtscts_rate(ah, rt, rix, cix, shortPreamble); + if (flags & HAL_TXDESC_INTREQ) { + txq->axq_intrcnt = 0; + } else if (++txq->axq_intrcnt >= sc->sc_txintrperiod) { + flags |= HAL_TXDESC_INTREQ; + txq->axq_intrcnt = 0; + } - /* The 11n chipsets do ctsduration calculations for you */ - if (! ath_tx_is_11n(sc)) - ctsduration = ath_tx_calc_ctsduration(ah, rix, cix, shortPreamble, - pktlen, rt, flags); - /* - * Must disable multi-rate retry when using RTS/CTS. - */ - ismrr = 0; - try0 = ATH_TXMGTTRY; /* XXX */ - } else - ctsrate = 0; + /* This point forward is actual TX bits */ /* * At this point we are committed to sending the frame @@ -806,71 +1280,187 @@ ath_tx_start(struct ath_softc *sc, struct ieee80211_node *ni, struct ath_buf *bf ieee80211_radiotap_tx(vap, m0); } + /* Blank the legacy rate array */ + bzero(&bf->bf_state.bfs_rc, sizeof(bf->bf_state.bfs_rc)); + /* - * Determine if a tx interrupt should be generated for - * this descriptor. We take a tx interrupt to reap - * descriptors when the h/w hits an EOL condition or - * when the descriptor is specifically marked to generate - * an interrupt. We periodically mark descriptors in this - * way to insure timely replenishing of the supply needed - * for sending frames. Defering interrupts reduces system - * load and potentially allows more concurrent work to be - * done but if done to aggressively can cause senders to - * backup. + * ath_buf_set_rate needs at least one rate/try to setup + * the rate scenario. + */ + bf->bf_state.bfs_rc[0].rix = rix; + bf->bf_state.bfs_rc[0].tries = try0; + bf->bf_state.bfs_rc[0].ratecode = txrate; + + /* Store the decided rate index values away */ + bf->bf_state.bfs_pktlen = pktlen; + bf->bf_state.bfs_hdrlen = hdrlen; + bf->bf_state.bfs_atype = atype; + bf->bf_state.bfs_txpower = ni->ni_txpower; + bf->bf_state.bfs_txrate0 = txrate; + bf->bf_state.bfs_try0 = try0; + bf->bf_state.bfs_keyix = keyix; + bf->bf_state.bfs_txantenna = sc->sc_txantenna; + bf->bf_state.bfs_flags = flags; + bf->bf_txflags = flags; + bf->bf_state.bfs_shpream = shortPreamble; + + /* XXX this should be done in ath_tx_setrate() */ + bf->bf_state.bfs_ctsrate0 = 0; /* ie, no hard-coded ctsrate */ + bf->bf_state.bfs_ctsrate = 0; /* calculated later */ + bf->bf_state.bfs_ctsduration = 0; + bf->bf_state.bfs_ismrr = ismrr; + + return 0; +} + +/* + * Direct-dispatch the current frame to the hardware. + * + * This can be called by the net80211 code. + * + * XXX what about locking? Or, push the seqno assign into the + * XXX aggregate scheduler so its serialised? + */ +int +ath_tx_start(struct ath_softc *sc, struct ieee80211_node *ni, + struct ath_buf *bf, struct mbuf *m0) +{ + struct ieee80211vap *vap = ni->ni_vap; + struct ath_vap *avp = ATH_VAP(vap); + int r; + u_int pri; + int tid; + struct ath_txq *txq; + int ismcast; + const struct ieee80211_frame *wh; + int is_ampdu, is_ampdu_tx, is_ampdu_pending; + ieee80211_seq seqno; + uint8_t type, subtype; + + /* + * Determine the target hardware queue. * - * NB: use >= to deal with sc_txintrperiod changing - * dynamically through sysctl. + * For multicast frames, the txq gets overridden to be the + * software TXQ and it's done via direct-dispatch. + * + * For any other frame, we do a TID/QoS lookup inside the frame + * to see what the TID should be. If it's a non-QoS frame, the + * AC and TID are overridden. The TID/TXQ code assumes the + * TID is on a predictable hardware TXQ, so we don't support + * having a node TID queued to multiple hardware TXQs. + * This may change in the future but would require some locking + * fudgery. */ - if (flags & HAL_TXDESC_INTREQ) { - txq->axq_intrcnt = 0; - } else if (++txq->axq_intrcnt >= sc->sc_txintrperiod) { - flags |= HAL_TXDESC_INTREQ; - txq->axq_intrcnt = 0; - } + pri = ath_tx_getac(sc, m0); + tid = ath_tx_gettid(sc, m0); - if (ath_tx_is_11n(sc)) { - rate[0] = rix; - try[0] = try0; + txq = sc->sc_ac2q[pri]; + wh = mtod(m0, struct ieee80211_frame *); + ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1); + type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK; + subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK; + + /* A-MPDU TX */ + is_ampdu_tx = ath_tx_ampdu_running(sc, ATH_NODE(ni), tid); + is_ampdu_pending = ath_tx_ampdu_pending(sc, ATH_NODE(ni), tid); + is_ampdu = is_ampdu_tx | is_ampdu_pending; + + DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: tid=%d, ac=%d, is_ampdu=%d\n", + __func__, tid, pri, is_ampdu); + + /* Multicast frames go onto the software multicast queue */ + if (ismcast) + txq = &avp->av_mcastq; + + if ((! is_ampdu) && (vap->iv_ps_sta || avp->av_mcastq.axq_depth)) + txq = &avp->av_mcastq; + + /* Do the generic frame setup */ + /* XXX should just bzero the bf_state? */ + bf->bf_state.bfs_dobaw = 0; + + /* A-MPDU TX? Manually set sequence number */ + /* Don't do it whilst pending; the net80211 layer still assigns them */ + /* XXX do we need locking here? */ + if (is_ampdu_tx) { + ATH_TXQ_LOCK(txq); + /* + * Always call; this function will + * handle making sure that null data frames + * don't get a sequence number from the current + * TID and thus mess with the BAW. + */ + seqno = ath_tx_tid_seqno_assign(sc, ni, bf, m0); + if (IEEE80211_QOS_HAS_SEQ(wh) && + subtype != IEEE80211_FC0_SUBTYPE_QOS_NULL) { + bf->bf_state.bfs_dobaw = 1; + } + ATH_TXQ_UNLOCK(txq); } /* - * Formulate first tx descriptor with tx controls. + * If needed, the sequence number has been assigned. + * Squirrel it away somewhere easy to get to. */ - /* XXX check return value? */ - /* XXX is this ok to call for 11n descriptors? */ - /* XXX or should it go through the first, next, last 11n calls? */ - ath_hal_setuptxdesc(ah, ds - , pktlen /* packet length */ - , hdrlen /* header length */ - , atype /* Atheros packet type */ - , ni->ni_txpower /* txpower */ - , txrate, try0 /* series 0 rate/tries */ - , keyix /* key cache index */ - , sc->sc_txantenna /* antenna mode */ - , flags /* flags */ - , ctsrate /* rts/cts rate */ - , ctsduration /* rts/cts duration */ - ); - bf->bf_txflags = flags; + bf->bf_state.bfs_seqno = M_SEQNO_GET(m0) << IEEE80211_SEQ_SEQ_SHIFT; + + /* Is ampdu pending? fetch the seqno and print it out */ + if (is_ampdu_pending) + DPRINTF(sc, ATH_DEBUG_SW_TX, + "%s: tid %d: ampdu pending, seqno %d\n", + __func__, tid, M_SEQNO_GET(m0)); + + /* This also sets up the DMA map */ + r = ath_tx_normal_setup(sc, ni, bf, m0); + + if (r != 0) + return r; + + /* At this point m0 could have changed! */ + m0 = bf->bf_m; + +#if 1 /* - * Setup the multi-rate retry state only when we're - * going to use it. This assumes ath_hal_setuptxdesc - * initializes the descriptors (so we don't have to) - * when the hardware supports multi-rate retry and - * we don't use it. + * If it's a multicast frame, do a direct-dispatch to the + * destination hardware queue. Don't bother software + * queuing it. */ - if (ismrr) { - if (ath_tx_is_11n(sc)) - ath_rate_getxtxrates(sc, an, rix, rate, try); - else - ath_rate_setupxtxdesc(sc, an, ds, shortPreamble, rix); - } - - if (ath_tx_is_11n(sc)) { - ath_buf_set_rate(sc, ni, bf, pktlen, flags, ctsrate, (atype == HAL_PKT_TYPE_PSPOLL), rate, try); - } + /* + * If it's a BAR frame, do a direct dispatch to the + * destination hardware queue. Don't bother software + * queuing it, as the TID will now be paused. + * Sending a BAR frame can occur from the net80211 txa timer + * (ie, retries) or from the ath txtask (completion call.) + * It queues directly to hardware because the TID is paused + * at this point (and won't be unpaused until the BAR has + * either been TXed successfully or max retries has been + * reached.) + */ + if (txq == &avp->av_mcastq) { + ATH_TXQ_LOCK(txq); + ath_tx_xmit_normal(sc, txq, bf); + ATH_TXQ_UNLOCK(txq); + } else if (type == IEEE80211_FC0_TYPE_CTL && + subtype == IEEE80211_FC0_SUBTYPE_BAR) { + DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, + "%s: BAR: TX'ing direct\n", __func__); + ATH_TXQ_LOCK(txq); + ath_tx_xmit_normal(sc, txq, bf); + ATH_TXQ_UNLOCK(txq); + } else { + /* add to software queue */ + ath_tx_swq(sc, ni, txq, bf); + } +#else + /* + * For now, since there's no software queue, + * direct-dispatch to the hardware. + */ + ATH_TXQ_LOCK(txq); + ath_tx_xmit_normal(sc, txq, bf); + ATH_TXQ_UNLOCK(txq); +#endif - ath_tx_handoff(sc, txq, bf); return 0; } @@ -885,17 +1475,15 @@ ath_tx_raw_start(struct ath_softc *sc, struct ieee80211_node *ni, struct ieee80211vap *vap = ni->ni_vap; int error, ismcast, ismrr; int keyix, hdrlen, pktlen, try0, txantenna; - u_int8_t rix, cix, txrate, ctsrate, rate1, rate2, rate3; + u_int8_t rix, txrate; struct ieee80211_frame *wh; - u_int flags, ctsduration; + u_int flags; HAL_PKT_TYPE atype; const HAL_RATE_TABLE *rt; struct ath_desc *ds; u_int pri; - uint8_t try[4], rate[4]; - - bzero(try, sizeof(try)); - bzero(rate, sizeof(rate)); + int o_tid = -1; + int do_override; wh = mtod(m0, struct ieee80211_frame *); ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1); @@ -907,14 +1495,24 @@ ath_tx_raw_start(struct ath_softc *sc, struct ieee80211_node *ni, /* XXX honor IEEE80211_BPF_DATAPAD */ pktlen = m0->m_pkthdr.len - (hdrlen & 3) + IEEE80211_CRC_LEN; + + DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: ismcast=%d\n", + __func__, ismcast); + /* Handle encryption twiddling if needed */ - if (! ath_tx_tag_crypto(sc, ni, m0, params->ibp_flags & IEEE80211_BPF_CRYPTO, 0, &hdrlen, &pktlen, &keyix)) { + if (! ath_tx_tag_crypto(sc, ni, + m0, params->ibp_flags & IEEE80211_BPF_CRYPTO, 0, + &hdrlen, &pktlen, &keyix)) { ath_freetx(m0); return EIO; } /* packet header may have moved, reset our local pointer */ wh = mtod(m0, struct ieee80211_frame *); + /* Do the generic frame setup */ + /* XXX should just bzero the bf_state? */ + bf->bf_state.bfs_dobaw = 0; + error = ath_tx_dmasetup(sc, bf, m0); if (error != 0) return error; @@ -926,8 +1524,11 @@ ath_tx_raw_start(struct ath_softc *sc, struct ieee80211_node *ni, flags |= HAL_TXDESC_INTREQ; /* force interrupt */ if (params->ibp_flags & IEEE80211_BPF_RTS) flags |= HAL_TXDESC_RTSENA; - else if (params->ibp_flags & IEEE80211_BPF_CTS) + else if (params->ibp_flags & IEEE80211_BPF_CTS) { + /* XXX assume 11g/11n protection? */ + bf->bf_state.bfs_doprot = 1; flags |= HAL_TXDESC_CTSENA; + } /* XXX leave ismcast to injector? */ if ((params->ibp_flags & IEEE80211_BPF_NOACK) || ismcast) flags |= HAL_TXDESC_NOACK; @@ -945,23 +1546,18 @@ ath_tx_raw_start(struct ath_softc *sc, struct ieee80211_node *ni, if (txantenna == 0) /* XXX? */ txantenna = sc->sc_txantenna; - ctsduration = 0; - if (flags & (HAL_TXDESC_RTSENA|HAL_TXDESC_CTSENA)) { - cix = ath_tx_findrix(sc, params->ibp_ctsrate); - ctsrate = ath_tx_get_rtscts_rate(ah, rt, rix, cix, params->ibp_flags & IEEE80211_BPF_SHORTPRE); - /* The 11n chipsets do ctsduration calculations for you */ - if (! ath_tx_is_11n(sc)) - ctsduration = ath_tx_calc_ctsduration(ah, rix, cix, - params->ibp_flags & IEEE80211_BPF_SHORTPRE, pktlen, - rt, flags); - /* - * Must disable multi-rate retry when using RTS/CTS. - */ - ismrr = 0; /* XXX */ - } else - ctsrate = 0; + /* + * Since ctsrate is fixed, store it away for later + * use when the descriptor fields are being set. + */ + if (flags & (HAL_TXDESC_RTSENA|HAL_TXDESC_CTSENA)) + bf->bf_state.bfs_ctsrate0 = params->ibp_ctsrate; pri = params->ibp_pri & 3; + /* Override pri if the frame isn't a QoS one */ + if (! IEEE80211_QOS_HAS_SEQ(wh)) + pri = ath_tx_getac(sc, m0); + /* * NB: we mark all packets as type PSPOLL so the h/w won't * set the sequence number, duration, etc. @@ -993,77 +1589,95 @@ ath_tx_raw_start(struct ath_softc *sc, struct ieee80211_node *ni, */ ds = bf->bf_desc; /* XXX check return value? */ - ath_hal_setuptxdesc(ah, ds - , pktlen /* packet length */ - , hdrlen /* header length */ - , atype /* Atheros packet type */ - , params->ibp_power /* txpower */ - , txrate, try0 /* series 0 rate/tries */ - , keyix /* key cache index */ - , txantenna /* antenna mode */ - , flags /* flags */ - , ctsrate /* rts/cts rate */ - , ctsduration /* rts/cts duration */ - ); + + /* Store the decided rate index values away */ + bf->bf_state.bfs_pktlen = pktlen; + bf->bf_state.bfs_hdrlen = hdrlen; + bf->bf_state.bfs_atype = atype; + bf->bf_state.bfs_txpower = params->ibp_power; + bf->bf_state.bfs_txrate0 = txrate; + bf->bf_state.bfs_try0 = try0; + bf->bf_state.bfs_keyix = keyix; + bf->bf_state.bfs_txantenna = txantenna; + bf->bf_state.bfs_flags = flags; bf->bf_txflags = flags; + bf->bf_state.bfs_shpream = + !! (params->ibp_flags & IEEE80211_BPF_SHORTPRE); - if (ath_tx_is_11n(sc)) { - rate[0] = ath_tx_findrix(sc, params->ibp_rate0); - try[0] = params->ibp_try0; - - if (ismrr) { - /* Remember, rate[] is actually an array of rix's -adrian */ - rate[0] = ath_tx_findrix(sc, params->ibp_rate0); - rate[1] = ath_tx_findrix(sc, params->ibp_rate1); - rate[2] = ath_tx_findrix(sc, params->ibp_rate2); - rate[3] = ath_tx_findrix(sc, params->ibp_rate3); - - try[0] = params->ibp_try0; - try[1] = params->ibp_try1; - try[2] = params->ibp_try2; - try[3] = params->ibp_try3; - } - } else { - if (ismrr) { - rix = ath_tx_findrix(sc, params->ibp_rate1); - rate1 = rt->info[rix].rateCode; - if (params->ibp_flags & IEEE80211_BPF_SHORTPRE) - rate1 |= rt->info[rix].shortPreamble; - if (params->ibp_try2) { - rix = ath_tx_findrix(sc, params->ibp_rate2); - rate2 = rt->info[rix].rateCode; - if (params->ibp_flags & IEEE80211_BPF_SHORTPRE) - rate2 |= rt->info[rix].shortPreamble; - } else - rate2 = 0; - if (params->ibp_try3) { - rix = ath_tx_findrix(sc, params->ibp_rate3); - rate3 = rt->info[rix].rateCode; - if (params->ibp_flags & IEEE80211_BPF_SHORTPRE) - rate3 |= rt->info[rix].shortPreamble; - } else - rate3 = 0; - ath_hal_setupxtxdesc(ah, ds - , rate1, params->ibp_try1 /* series 1 */ - , rate2, params->ibp_try2 /* series 2 */ - , rate3, params->ibp_try3 /* series 3 */ - ); - } - } + /* XXX this should be done in ath_tx_setrate() */ + bf->bf_state.bfs_ctsrate = 0; + bf->bf_state.bfs_ctsduration = 0; + bf->bf_state.bfs_ismrr = ismrr; - if (ath_tx_is_11n(sc)) { - /* - * notice that rix doesn't include any of the "magic" flags txrate - * does for communicating "other stuff" to the HAL. - */ - ath_buf_set_rate(sc, ni, bf, pktlen, flags, ctsrate, (atype == HAL_PKT_TYPE_PSPOLL), rate, try); + /* Blank the legacy rate array */ + bzero(&bf->bf_state.bfs_rc, sizeof(bf->bf_state.bfs_rc)); + + bf->bf_state.bfs_rc[0].rix = + ath_tx_findrix(sc, params->ibp_rate0); + bf->bf_state.bfs_rc[0].tries = try0; + bf->bf_state.bfs_rc[0].ratecode = txrate; + + if (ismrr) { + int rix; + + rix = ath_tx_findrix(sc, params->ibp_rate1); + bf->bf_state.bfs_rc[1].rix = rix; + bf->bf_state.bfs_rc[1].tries = params->ibp_try1; + + rix = ath_tx_findrix(sc, params->ibp_rate2); + bf->bf_state.bfs_rc[2].rix = rix; + bf->bf_state.bfs_rc[2].tries = params->ibp_try2; + + rix = ath_tx_findrix(sc, params->ibp_rate3); + bf->bf_state.bfs_rc[3].rix = rix; + bf->bf_state.bfs_rc[3].tries = params->ibp_try3; } + /* + * All the required rate control decisions have been made; + * fill in the rc flags. + */ + ath_tx_rate_fill_rcflags(sc, bf); /* NB: no buffered multicast in power save support */ - ath_tx_handoff(sc, sc->sc_ac2q[pri], bf); + + /* XXX If it's an ADDBA, override the correct queue */ + do_override = ath_tx_action_frame_override_queue(sc, ni, m0, &o_tid); + + /* Map ADDBA to the correct priority */ + if (do_override) { +#if 0 + device_printf(sc->sc_dev, + "%s: overriding tid %d pri %d -> %d\n", + __func__, o_tid, pri, TID_TO_WME_AC(o_tid)); +#endif + pri = TID_TO_WME_AC(o_tid); + } + + /* + * If we're overiding the ADDBA destination, dump directly + * into the hardware queue, right after any pending + * frames to that node are. + */ + DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: dooverride=%d\n", + __func__, do_override); + + if (do_override) { + ATH_TXQ_LOCK(sc->sc_ac2q[pri]); + ath_tx_xmit_normal(sc, sc->sc_ac2q[pri], bf); + ATH_TXQ_UNLOCK(sc->sc_ac2q[pri]); + } else { + /* Queue to software queue */ + ath_tx_swq(sc, ni, sc->sc_ac2q[pri], bf); + } + return 0; } +/* + * Send a raw frame. + * + * This can be called by net80211. + */ int ath_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) @@ -1127,3 +1741,2093 @@ bad: ieee80211_free_node(ni); return error; } + +/* Some helper functions */ + +/* + * ADDBA (and potentially others) need to be placed in the same + * hardware queue as the TID/node it's relating to. This is so + * it goes out after any pending non-aggregate frames to the + * same node/TID. + * + * If this isn't done, the ADDBA can go out before the frames + * queued in hardware. Even though these frames have a sequence + * number -earlier- than the ADDBA can be transmitted (but + * no frames whose sequence numbers are after the ADDBA should + * be!) they'll arrive after the ADDBA - and the receiving end + * will simply drop them as being out of the BAW. + * + * The frames can't be appended to the TID software queue - it'll + * never be sent out. So these frames have to be directly + * dispatched to the hardware, rather than queued in software. + * So if this function returns true, the TXQ has to be + * overridden and it has to be directly dispatched. + * + * It's a dirty hack, but someone's gotta do it. + */ + +/* + * XXX doesn't belong here! + */ +static int +ieee80211_is_action(struct ieee80211_frame *wh) +{ + /* Type: Management frame? */ + if ((wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) != + IEEE80211_FC0_TYPE_MGT) + return 0; + + /* Subtype: Action frame? */ + if ((wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK) != + IEEE80211_FC0_SUBTYPE_ACTION) + return 0; + + return 1; +} + +#define MS(_v, _f) (((_v) & _f) >> _f##_S) +/* + * Return an alternate TID for ADDBA request frames. + * + * Yes, this likely should be done in the net80211 layer. + */ +static int +ath_tx_action_frame_override_queue(struct ath_softc *sc, + struct ieee80211_node *ni, + struct mbuf *m0, int *tid) +{ + struct ieee80211_frame *wh = mtod(m0, struct ieee80211_frame *); + struct ieee80211_action_ba_addbarequest *ia; + uint8_t *frm; + uint16_t baparamset; + + /* Not action frame? Bail */ + if (! ieee80211_is_action(wh)) + return 0; + + /* XXX Not needed for frames we send? */ +#if 0 + /* Correct length? */ + if (! ieee80211_parse_action(ni, m)) + return 0; +#endif + + /* Extract out action frame */ + frm = (u_int8_t *)&wh[1]; + ia = (struct ieee80211_action_ba_addbarequest *) frm; + + /* Not ADDBA? Bail */ + if (ia->rq_header.ia_category != IEEE80211_ACTION_CAT_BA) + return 0; + if (ia->rq_header.ia_action != IEEE80211_ACTION_BA_ADDBA_REQUEST) + return 0; + + /* Extract TID, return it */ + baparamset = le16toh(ia->rq_baparamset); + *tid = (int) MS(baparamset, IEEE80211_BAPS_TID); + + return 1; +} +#undef MS + +/* Per-node software queue operations */ + +/* + * Add the current packet to the given BAW. + * It is assumed that the current packet + * + * + fits inside the BAW; + * + already has had a sequence number allocated. + * + * Since the BAW status may be modified by both the ath task and + * the net80211/ifnet contexts, the TID must be locked. + */ +void +ath_tx_addto_baw(struct ath_softc *sc, struct ath_node *an, + struct ath_tid *tid, struct ath_buf *bf) +{ + int index, cindex; + struct ieee80211_tx_ampdu *tap; + + ATH_TXQ_LOCK_ASSERT(sc->sc_ac2q[tid->ac]); + + if (bf->bf_state.bfs_isretried) + return; + + tap = ath_tx_get_tx_tid(an, tid->tid); + + if (bf->bf_state.bfs_addedbaw) + device_printf(sc->sc_dev, + "%s: re-added? tid=%d, seqno %d; window %d:%d; baw head=%d tail=%d\n", + __func__, tid->tid, SEQNO(bf->bf_state.bfs_seqno), + tap->txa_start, tap->txa_wnd, tid->baw_head, tid->baw_tail); + + /* + * ni->ni_txseqs[] is the currently allocated seqno. + * the txa state contains the current baw start. + */ + index = ATH_BA_INDEX(tap->txa_start, SEQNO(bf->bf_state.bfs_seqno)); + cindex = (tid->baw_head + index) & (ATH_TID_MAX_BUFS - 1); + DPRINTF(sc, ATH_DEBUG_SW_TX_BAW, + "%s: tid=%d, seqno %d; window %d:%d; index=%d cindex=%d baw head=%d tail=%d\n", + __func__, tid->tid, SEQNO(bf->bf_state.bfs_seqno), + tap->txa_start, tap->txa_wnd, index, cindex, tid->baw_head, tid->baw_tail); + + +#if 0 + assert(tid->tx_buf[cindex] == NULL); +#endif + if (tid->tx_buf[cindex] != NULL) { + device_printf(sc->sc_dev, + "%s: ba packet dup (index=%d, cindex=%d, " + "head=%d, tail=%d)\n", + __func__, index, cindex, tid->baw_head, tid->baw_tail); + device_printf(sc->sc_dev, + "%s: BA bf: %p; seqno=%d ; new bf: %p; seqno=%d\n", + __func__, + tid->tx_buf[cindex], + SEQNO(tid->tx_buf[cindex]->bf_state.bfs_seqno), + bf, + SEQNO(bf->bf_state.bfs_seqno) + ); + } + tid->tx_buf[cindex] = bf; + + if (index >= ((tid->baw_tail - tid->baw_head) & (ATH_TID_MAX_BUFS - 1))) { + tid->baw_tail = cindex; + INCR(tid->baw_tail, ATH_TID_MAX_BUFS); + } +} + +/* + * seq_start - left edge of BAW + * seq_next - current/next sequence number to allocate + * + * Since the BAW status may be modified by both the ath task and + * the net80211/ifnet contexts, the TID must be locked. + */ +static void +ath_tx_update_baw(struct ath_softc *sc, struct ath_node *an, + struct ath_tid *tid, const struct ath_buf *bf) +{ + int index, cindex; + struct ieee80211_tx_ampdu *tap; + int seqno = SEQNO(bf->bf_state.bfs_seqno); + + ATH_TXQ_LOCK_ASSERT(sc->sc_ac2q[tid->ac]); + + tap = ath_tx_get_tx_tid(an, tid->tid); + index = ATH_BA_INDEX(tap->txa_start, seqno); + cindex = (tid->baw_head + index) & (ATH_TID_MAX_BUFS - 1); + + DPRINTF(sc, ATH_DEBUG_SW_TX_BAW, + "%s: tid=%d, baw=%d:%d, seqno=%d, index=%d, cindex=%d, baw head=%d, tail=%d\n", + __func__, tid->tid, tap->txa_start, tap->txa_wnd, seqno, index, + cindex, tid->baw_head, tid->baw_tail); + + /* + * If this occurs then we have a big problem - something else + * has slid tap->txa_start along without updating the BAW + * tracking start/end pointers. Thus the TX BAW state is now + * completely busted. + * + * But for now, since I haven't yet fixed TDMA and buffer cloning, + * it's quite possible that a cloned buffer is making its way + * here and causing it to fire off. Disable TDMA for now. + */ + if (tid->tx_buf[cindex] != bf) { + device_printf(sc->sc_dev, + "%s: comp bf=%p, seq=%d; slot bf=%p, seqno=%d\n", + __func__, + bf, SEQNO(bf->bf_state.bfs_seqno), + tid->tx_buf[cindex], + SEQNO(tid->tx_buf[cindex]->bf_state.bfs_seqno)); + } + + tid->tx_buf[cindex] = NULL; + + while (tid->baw_head != tid->baw_tail && !tid->tx_buf[tid->baw_head]) { + INCR(tap->txa_start, IEEE80211_SEQ_RANGE); + INCR(tid->baw_head, ATH_TID_MAX_BUFS); + } + DPRINTF(sc, ATH_DEBUG_SW_TX_BAW, "%s: baw is now %d:%d, baw head=%d\n", + __func__, tap->txa_start, tap->txa_wnd, tid->baw_head); +} + +/* + * Mark the current node/TID as ready to TX. + * + * This is done to make it easy for the software scheduler to + * find which nodes have data to send. + * + * The TXQ lock must be held. + */ +static void +ath_tx_tid_sched(struct ath_softc *sc, struct ath_tid *tid) +{ + struct ath_txq *txq = sc->sc_ac2q[tid->ac]; + + ATH_TXQ_LOCK_ASSERT(txq); + + if (tid->paused) + return; /* paused, can't schedule yet */ + + if (tid->sched) + return; /* already scheduled */ + + tid->sched = 1; + + TAILQ_INSERT_TAIL(&txq->axq_tidq, tid, axq_qelem); +} + +/* + * Mark the current node as no longer needing to be polled for + * TX packets. + * + * The TXQ lock must be held. + */ +static void +ath_tx_tid_unsched(struct ath_softc *sc, struct ath_tid *tid) +{ + struct ath_txq *txq = sc->sc_ac2q[tid->ac]; + + ATH_TXQ_LOCK_ASSERT(txq); + + if (tid->sched == 0) + return; + + tid->sched = 0; + TAILQ_REMOVE(&txq->axq_tidq, tid, axq_qelem); +} + +/* + * Assign a sequence number manually to the given frame. + * + * This should only be called for A-MPDU TX frames. + */ +static ieee80211_seq +ath_tx_tid_seqno_assign(struct ath_softc *sc, struct ieee80211_node *ni, + struct ath_buf *bf, struct mbuf *m0) +{ + struct ieee80211_frame *wh; + int tid, pri; + ieee80211_seq seqno; + uint8_t subtype; + + /* TID lookup */ + wh = mtod(m0, struct ieee80211_frame *); + pri = M_WME_GETAC(m0); /* honor classification */ + tid = WME_AC_TO_TID(pri); + DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: pri=%d, tid=%d, qos has seq=%d\n", + __func__, pri, tid, IEEE80211_QOS_HAS_SEQ(wh)); + + /* XXX Is it a control frame? Ignore */ + + /* Does the packet require a sequence number? */ + if (! IEEE80211_QOS_HAS_SEQ(wh)) + return -1; + + /* + * Is it a QOS NULL Data frame? Give it a sequence number from + * the default TID (IEEE80211_NONQOS_TID.) + * + * The RX path of everything I've looked at doesn't include the NULL + * data frame sequence number in the aggregation state updates, so + * assigning it a sequence number there will cause a BAW hole on the + * RX side. + */ + subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK; + if (subtype == IEEE80211_FC0_SUBTYPE_QOS_NULL) { + seqno = ni->ni_txseqs[IEEE80211_NONQOS_TID]; + INCR(ni->ni_txseqs[IEEE80211_NONQOS_TID], IEEE80211_SEQ_RANGE); + } else { + /* Manually assign sequence number */ + seqno = ni->ni_txseqs[tid]; + INCR(ni->ni_txseqs[tid], IEEE80211_SEQ_RANGE); + } + *(uint16_t *)&wh->i_seq[0] = htole16(seqno << IEEE80211_SEQ_SEQ_SHIFT); + M_SEQNO_SET(m0, seqno); + + /* Return so caller can do something with it if needed */ + DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: -> seqno=%d\n", __func__, seqno); + return seqno; +} + +/* + * Attempt to direct dispatch an aggregate frame to hardware. + * If the frame is out of BAW, queue. + * Otherwise, schedule it as a single frame. + */ +static void +ath_tx_xmit_aggr(struct ath_softc *sc, struct ath_node *an, struct ath_buf *bf) +{ + struct ath_tid *tid = &an->an_tid[bf->bf_state.bfs_tid]; + struct ath_txq *txq = bf->bf_state.bfs_txq; + struct ieee80211_tx_ampdu *tap; + + ATH_TXQ_LOCK_ASSERT(txq); + + tap = ath_tx_get_tx_tid(an, tid->tid); + + /* paused? queue */ + if (tid->paused) { + ATH_TXQ_INSERT_TAIL(tid, bf, bf_list); + return; + } + + /* outside baw? queue */ + if (bf->bf_state.bfs_dobaw && + (! BAW_WITHIN(tap->txa_start, tap->txa_wnd, + SEQNO(bf->bf_state.bfs_seqno)))) { + ATH_TXQ_INSERT_TAIL(tid, bf, bf_list); + ath_tx_tid_sched(sc, tid); + return; + } + + /* Direct dispatch to hardware */ + ath_tx_do_ratelookup(sc, bf); + ath_tx_rate_fill_rcflags(sc, bf); + ath_tx_set_rtscts(sc, bf); + ath_tx_setds(sc, bf); + ath_tx_set_ratectrl(sc, bf->bf_node, bf); + ath_tx_chaindesclist(sc, bf); + + /* Statistics */ + sc->sc_aggr_stats.aggr_low_hwq_single_pkt++; + + /* Track per-TID hardware queue depth correctly */ + tid->hwq_depth++; + + /* Add to BAW */ + if (bf->bf_state.bfs_dobaw) { + ath_tx_addto_baw(sc, an, tid, bf); + bf->bf_state.bfs_addedbaw = 1; + } + + /* Set completion handler, multi-frame aggregate or not */ + bf->bf_comp = ath_tx_aggr_comp; + + /* Hand off to hardware */ + ath_tx_handoff(sc, txq, bf); +} + +/* + * Attempt to send the packet. + * If the queue isn't busy, direct-dispatch. + * If the queue is busy enough, queue the given packet on the + * relevant software queue. + */ +void +ath_tx_swq(struct ath_softc *sc, struct ieee80211_node *ni, struct ath_txq *txq, + struct ath_buf *bf) +{ + struct ath_node *an = ATH_NODE(ni); + struct ieee80211_frame *wh; + struct ath_tid *atid; + int pri, tid; + struct mbuf *m0 = bf->bf_m; + + /* Fetch the TID - non-QoS frames get assigned to TID 16 */ + wh = mtod(m0, struct ieee80211_frame *); + pri = ath_tx_getac(sc, m0); + tid = ath_tx_gettid(sc, m0); + atid = &an->an_tid[tid]; + + DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: bf=%p, pri=%d, tid=%d, qos=%d\n", + __func__, bf, pri, tid, IEEE80211_QOS_HAS_SEQ(wh)); + + /* Set local packet state, used to queue packets to hardware */ + bf->bf_state.bfs_tid = tid; + bf->bf_state.bfs_txq = txq; + bf->bf_state.bfs_pri = pri; + + /* + * If the hardware queue isn't busy, queue it directly. + * If the hardware queue is busy, queue it. + * If the TID is paused or the traffic it outside BAW, software + * queue it. + */ + ATH_TXQ_LOCK(txq); + if (atid->paused) { + /* TID is paused, queue */ + ATH_TXQ_INSERT_TAIL(atid, bf, bf_list); + } else if (ath_tx_ampdu_pending(sc, an, tid)) { + /* AMPDU pending; queue */ + ATH_TXQ_INSERT_TAIL(atid, bf, bf_list); + /* XXX sched? */ + } else if (ath_tx_ampdu_running(sc, an, tid)) { + /* AMPDU running, attempt direct dispatch if possible */ + if (txq->axq_depth < sc->sc_hwq_limit) + ath_tx_xmit_aggr(sc, an, bf); + else { + ATH_TXQ_INSERT_TAIL(atid, bf, bf_list); + ath_tx_tid_sched(sc, atid); + } + } else if (txq->axq_depth < sc->sc_hwq_limit) { + /* AMPDU not running, attempt direct dispatch */ + ath_tx_xmit_normal(sc, txq, bf); + } else { + /* Busy; queue */ + ATH_TXQ_INSERT_TAIL(atid, bf, bf_list); + ath_tx_tid_sched(sc, atid); + } + ATH_TXQ_UNLOCK(txq); +} + +/* + * Do the basic frame setup stuff that's required before the frame + * is added to a software queue. + * + * All frames get mostly the same treatment and it's done once. + * Retransmits fiddle with things like the rate control setup, + * setting the retransmit bit in the packet; doing relevant DMA/bus + * syncing and relinking it (back) into the hardware TX queue. + * + * Note that this may cause the mbuf to be reallocated, so + * m0 may not be valid. + */ + + +/* + * Configure the per-TID node state. + * + * This likely belongs in if_ath_node.c but I can't think of anywhere + * else to put it just yet. + * + * This sets up the SLISTs and the mutex as appropriate. + */ +void +ath_tx_tid_init(struct ath_softc *sc, struct ath_node *an) +{ + int i, j; + struct ath_tid *atid; + + for (i = 0; i < IEEE80211_TID_SIZE; i++) { + atid = &an->an_tid[i]; + TAILQ_INIT(&atid->axq_q); + atid->tid = i; + atid->an = an; + for (j = 0; j < ATH_TID_MAX_BUFS; j++) + atid->tx_buf[j] = NULL; + atid->baw_head = atid->baw_tail = 0; + atid->paused = 0; + atid->sched = 0; + atid->hwq_depth = 0; + atid->cleanup_inprogress = 0; + if (i == IEEE80211_NONQOS_TID) + atid->ac = WME_AC_BE; + else + atid->ac = TID_TO_WME_AC(i); + } +} + +/* + * Pause the current TID. This stops packets from being transmitted + * on it. + * + * Since this is also called from upper layers as well as the driver, + * it will get the TID lock. + */ +static void +ath_tx_tid_pause(struct ath_softc *sc, struct ath_tid *tid) +{ + ATH_TXQ_LOCK(sc->sc_ac2q[tid->ac]); + tid->paused++; + DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, "%s: paused = %d\n", + __func__, tid->paused); + ATH_TXQ_UNLOCK(sc->sc_ac2q[tid->ac]); +} + +/* + * Unpause the current TID, and schedule it if needed. + */ +static void +ath_tx_tid_resume(struct ath_softc *sc, struct ath_tid *tid) +{ + ATH_TXQ_LOCK_ASSERT(sc->sc_ac2q[tid->ac]); + + tid->paused--; + + DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, "%s: unpaused = %d\n", + __func__, tid->paused); + + if (tid->paused || tid->axq_depth == 0) { + return; + } + + ath_tx_tid_sched(sc, tid); + /* Punt some frames to the hardware if needed */ + ath_txq_sched(sc, sc->sc_ac2q[tid->ac]); +} + +/* + * Free any packets currently pending in the software TX queue. + * + * This will be called when a node is being deleted. + * + * It can also be called on an active node during an interface + * reset or state transition. + * + * (From Linux/reference): + * + * TODO: For frame(s) that are in the retry state, we will reuse the + * sequence number(s) without setting the retry bit. The + * alternative is to give up on these and BAR the receiver's window + * forward. + */ +static void +ath_tx_tid_drain(struct ath_softc *sc, struct ath_node *an, struct ath_tid *tid, + ath_bufhead *bf_cq) +{ + struct ath_buf *bf; + struct ieee80211_tx_ampdu *tap; + struct ieee80211_node *ni = &an->an_node; + int t = 0; + struct ath_txq *txq = sc->sc_ac2q[tid->ac]; + + tap = ath_tx_get_tx_tid(an, tid->tid); + + ATH_TXQ_LOCK_ASSERT(sc->sc_ac2q[tid->ac]); + + /* Walk the queue, free frames */ + for (;;) { + bf = TAILQ_FIRST(&tid->axq_q); + if (bf == NULL) { + break; + } + + if (t == 0) { + device_printf(sc->sc_dev, + "%s: node %p: tid %d: txq_depth=%d, " + "txq_aggr_depth=%d, sched=%d, paused=%d, " + "hwq_depth=%d, incomp=%d, baw_head=%d, baw_tail=%d " + "txa_start=%d, ni_txseqs=%d\n", + __func__, ni, tid->tid, txq->axq_depth, + txq->axq_aggr_depth, tid->sched, tid->paused, + tid->hwq_depth, tid->incomp, tid->baw_head, + tid->baw_tail, tap == NULL ? -1 : tap->txa_start, + ni->ni_txseqs[tid->tid]); + t = 1; + } + + + /* + * If the current TID is running AMPDU, update + * the BAW. + */ + if (ath_tx_ampdu_running(sc, an, tid->tid) && + bf->bf_state.bfs_dobaw) { + /* + * Only remove the frame from the BAW if it's + * been transmitted at least once; this means + * the frame was in the BAW to begin with. + */ + if (bf->bf_state.bfs_retries > 0) { + ath_tx_update_baw(sc, an, tid, bf); + bf->bf_state.bfs_dobaw = 0; + } + /* + * This has become a non-fatal error now + */ + if (! bf->bf_state.bfs_addedbaw) + device_printf(sc->sc_dev, + "%s: wasn't added: seqno %d\n", + __func__, SEQNO(bf->bf_state.bfs_seqno)); + } + ATH_TXQ_REMOVE(tid, bf, bf_list); + TAILQ_INSERT_TAIL(bf_cq, bf, bf_list); + } + + /* + * Now that it's completed, grab the TID lock and update + * the sequence number and BAW window. + * Because sequence numbers have been assigned to frames + * that haven't been sent yet, it's entirely possible + * we'll be called with some pending frames that have not + * been transmitted. + * + * The cleaner solution is to do the sequence number allocation + * when the packet is first transmitted - and thus the "retries" + * check above would be enough to update the BAW/seqno. + */ + + /* But don't do it for non-QoS TIDs */ + if (tap) { +#if 0 + DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, + "%s: node %p: TID %d: sliding BAW left edge to %d\n", + __func__, an, tid->tid, tap->txa_start); +#endif + ni->ni_txseqs[tid->tid] = tap->txa_start; + tid->baw_tail = tid->baw_head; + } +} + +/* + * Flush all software queued packets for the given node. + * + * This occurs when a completion handler frees the last buffer + * for a node, and the node is thus freed. This causes the node + * to be cleaned up, which ends up calling ath_tx_node_flush. + */ +void +ath_tx_node_flush(struct ath_softc *sc, struct ath_node *an) +{ + int tid; + ath_bufhead bf_cq; + struct ath_buf *bf; + + TAILQ_INIT(&bf_cq); + + for (tid = 0; tid < IEEE80211_TID_SIZE; tid++) { + struct ath_tid *atid = &an->an_tid[tid]; + struct ath_txq *txq = sc->sc_ac2q[atid->ac]; + + /* Remove this tid from the list of active tids */ + ATH_TXQ_LOCK(txq); + ath_tx_tid_unsched(sc, atid); + + /* Free packets */ + ath_tx_tid_drain(sc, an, atid, &bf_cq); + ATH_TXQ_UNLOCK(txq); + } + + /* Handle completed frames */ + while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) { + TAILQ_REMOVE(&bf_cq, bf, bf_list); + ath_tx_default_comp(sc, bf, 0); + } +} + +/* + * Drain all the software TXQs currently with traffic queued. + */ +void +ath_tx_txq_drain(struct ath_softc *sc, struct ath_txq *txq) +{ + struct ath_tid *tid; + ath_bufhead bf_cq; + struct ath_buf *bf; + + TAILQ_INIT(&bf_cq); + ATH_TXQ_LOCK(txq); + + /* + * Iterate over all active tids for the given txq, + * flushing and unsched'ing them + */ + while (! TAILQ_EMPTY(&txq->axq_tidq)) { + tid = TAILQ_FIRST(&txq->axq_tidq); + ath_tx_tid_drain(sc, tid->an, tid, &bf_cq); + ath_tx_tid_unsched(sc, tid); + } + + ATH_TXQ_UNLOCK(txq); + + while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) { + TAILQ_REMOVE(&bf_cq, bf, bf_list); + ath_tx_default_comp(sc, bf, 0); + } +} + +/* + * Handle completion of non-aggregate session frames. + */ +void +ath_tx_normal_comp(struct ath_softc *sc, struct ath_buf *bf, int fail) +{ + struct ieee80211_node *ni = bf->bf_node; + struct ath_node *an = ATH_NODE(ni); + int tid = bf->bf_state.bfs_tid; + struct ath_tid *atid = &an->an_tid[tid]; + struct ath_tx_status *ts = &bf->bf_status.ds_txstat; + + /* The TID state is protected behind the TXQ lock */ + ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]); + + DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: bf=%p: fail=%d, hwq_depth now %d\n", + __func__, bf, fail, atid->hwq_depth - 1); + + atid->hwq_depth--; + if (atid->hwq_depth < 0) + device_printf(sc->sc_dev, "%s: hwq_depth < 0: %d\n", + __func__, atid->hwq_depth); + ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]); + + /* + * punt to rate control if we're not being cleaned up + * during a hw queue drain and the frame wanted an ACK. + */ + if (fail == 0 && ((bf->bf_txflags & HAL_TXDESC_NOACK) == 0)) + ath_tx_update_ratectrl(sc, ni, bf->bf_state.bfs_rc, + ts, bf->bf_state.bfs_pktlen, + 1, (ts->ts_status == 0) ? 0 : 1); + + ath_tx_default_comp(sc, bf, fail); +} + +/* + * Handle cleanup of aggregate session packets that aren't + * an A-MPDU. + * + * There's no need to update the BAW here - the session is being + * torn down. + */ +static void +ath_tx_comp_cleanup_unaggr(struct ath_softc *sc, struct ath_buf *bf) +{ + struct ieee80211_node *ni = bf->bf_node; + struct ath_node *an = ATH_NODE(ni); + int tid = bf->bf_state.bfs_tid; + struct ath_tid *atid = &an->an_tid[tid]; + + DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, "%s: TID %d: incomp=%d\n", + __func__, tid, atid->incomp); + + ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]); + atid->incomp--; + if (atid->incomp == 0) { + DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, + "%s: TID %d: cleaned up! resume!\n", + __func__, tid); + atid->cleanup_inprogress = 0; + ath_tx_tid_resume(sc, atid); + } + ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]); + + ath_tx_default_comp(sc, bf, 0); +} + +/* + * Performs transmit side cleanup when TID changes from aggregated to + * unaggregated. + * + * - Discard all retry frames from the s/w queue. + * - Fix the tx completion function for all buffers in s/w queue. + * - Count the number of unacked frames, and let transmit completion + * handle it later. + * + * The caller is responsible for pausing the TID. + */ +static void +ath_tx_cleanup(struct ath_softc *sc, struct ath_node *an, int tid) +{ + struct ath_tid *atid = &an->an_tid[tid]; + struct ieee80211_tx_ampdu *tap; + struct ath_buf *bf, *bf_next; + ath_bufhead bf_cq; + + DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, + "%s: TID %d: called\n", __func__, tid); + + TAILQ_INIT(&bf_cq); + ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]); + + /* + * Update the frames in the software TX queue: + * + * + Discard retry frames in the queue + * + Fix the completion function to be non-aggregate + */ + bf = TAILQ_FIRST(&atid->axq_q); + while (bf) { + if (bf->bf_state.bfs_isretried) { + bf_next = TAILQ_NEXT(bf, bf_list); + TAILQ_REMOVE(&atid->axq_q, bf, bf_list); + atid->axq_depth--; + if (bf->bf_state.bfs_dobaw) { + ath_tx_update_baw(sc, an, atid, bf); + if (! bf->bf_state.bfs_addedbaw) + device_printf(sc->sc_dev, + "%s: wasn't added: seqno %d\n", + __func__, SEQNO(bf->bf_state.bfs_seqno)); + } + bf->bf_state.bfs_dobaw = 0; + /* + * Call the default completion handler with "fail" just + * so upper levels are suitably notified about this. + */ + TAILQ_INSERT_TAIL(&bf_cq, bf, bf_list); + bf = bf_next; + continue; + } + /* Give these the default completion handler */ + bf->bf_comp = ath_tx_normal_comp; + bf = TAILQ_NEXT(bf, bf_list); + } + + /* The caller is required to pause the TID */ +#if 0 + /* Pause the TID */ + ath_tx_tid_pause(sc, atid); +#endif + + /* + * Calculate what hardware-queued frames exist based + * on the current BAW size. Ie, what frames have been + * added to the TX hardware queue for this TID but + * not yet ACKed. + */ + tap = ath_tx_get_tx_tid(an, tid); + /* Need the lock - fiddling with BAW */ + while (atid->baw_head != atid->baw_tail) { + if (atid->tx_buf[atid->baw_head]) { + atid->incomp++; + atid->cleanup_inprogress = 1; + atid->tx_buf[atid->baw_head] = NULL; + } + INCR(atid->baw_head, ATH_TID_MAX_BUFS); + INCR(tap->txa_start, IEEE80211_SEQ_RANGE); + } + + /* + * If cleanup is required, defer TID scheduling + * until all the HW queued packets have been + * sent. + */ + if (! atid->cleanup_inprogress) + ath_tx_tid_resume(sc, atid); + + if (atid->cleanup_inprogress) + DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, + "%s: TID %d: cleanup needed: %d packets\n", + __func__, tid, atid->incomp); + ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]); + + /* Handle completing frames and fail them */ + while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) { + TAILQ_REMOVE(&bf_cq, bf, bf_list); + ath_tx_default_comp(sc, bf, 1); + } +} + +static void +ath_tx_set_retry(struct ath_softc *sc, struct ath_buf *bf) +{ + struct ieee80211_frame *wh; + + wh = mtod(bf->bf_m, struct ieee80211_frame *); + /* Only update/resync if needed */ + if (bf->bf_state.bfs_isretried == 0) { + wh->i_fc[1] |= IEEE80211_FC1_RETRY; + bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, + BUS_DMASYNC_PREWRITE); + } + sc->sc_stats.ast_tx_swretries++; + bf->bf_state.bfs_isretried = 1; + bf->bf_state.bfs_retries ++; +} + +static struct ath_buf * +ath_tx_retry_clone(struct ath_softc *sc, struct ath_buf *bf) +{ + struct ath_buf *nbf; + int error; + + nbf = ath_buf_clone(sc, bf); + +#if 0 + device_printf(sc->sc_dev, "%s: ATH_BUF_BUSY; cloning\n", + __func__); +#endif + + if (nbf == NULL) { + /* Failed to clone */ + device_printf(sc->sc_dev, + "%s: failed to clone a busy buffer\n", + __func__); + return NULL; + } + + /* Setup the dma for the new buffer */ + error = ath_tx_dmasetup(sc, nbf, nbf->bf_m); + if (error != 0) { + device_printf(sc->sc_dev, + "%s: failed to setup dma for clone\n", + __func__); + /* + * Put this at the head of the list, not tail; + * that way it doesn't interfere with the + * busy buffer logic (which uses the tail of + * the list.) + */ + ATH_TXBUF_LOCK(sc); + TAILQ_INSERT_HEAD(&sc->sc_txbuf, nbf, bf_list); + ATH_TXBUF_UNLOCK(sc); + return NULL; + } + + /* Free current buffer; return the older buffer */ + bf->bf_m = NULL; + bf->bf_node = NULL; + ath_freebuf(sc, bf); + return nbf; +} + +/* + * Handle retrying an unaggregate frame in an aggregate + * session. + * + * If too many retries occur, pause the TID, wait for + * any further retransmits (as there's no reason why + * non-aggregate frames in an aggregate session are + * transmitted in-order; they just have to be in-BAW) + * and then queue a BAR. + */ +static void +ath_tx_aggr_retry_unaggr(struct ath_softc *sc, struct ath_buf *bf) +{ + struct ieee80211_node *ni = bf->bf_node; + struct ath_node *an = ATH_NODE(ni); + int tid = bf->bf_state.bfs_tid; + struct ath_tid *atid = &an->an_tid[tid]; + struct ieee80211_tx_ampdu *tap; + int txseq; + + ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]); + + tap = ath_tx_get_tx_tid(an, tid); + + /* + * If the buffer is marked as busy, we can't directly + * reuse it. Instead, try to clone the buffer. + * If the clone is successful, recycle the old buffer. + * If the clone is unsuccessful, set bfs_retries to max + * to force the next bit of code to free the buffer + * for us. + */ + if ((bf->bf_state.bfs_retries < SWMAX_RETRIES) && + (bf->bf_flags & ATH_BUF_BUSY)) { + struct ath_buf *nbf; + nbf = ath_tx_retry_clone(sc, bf); + if (nbf) + /* bf has been freed at this point */ + bf = nbf; + else + bf->bf_state.bfs_retries = SWMAX_RETRIES + 1; + } + + if (bf->bf_state.bfs_retries >= SWMAX_RETRIES) { + DPRINTF(sc, ATH_DEBUG_SW_TX_RETRIES, + "%s: exceeded retries; seqno %d\n", + __func__, SEQNO(bf->bf_state.bfs_seqno)); + sc->sc_stats.ast_tx_swretrymax++; + + /* Update BAW anyway */ + if (bf->bf_state.bfs_dobaw) { + ath_tx_update_baw(sc, an, atid, bf); + if (! bf->bf_state.bfs_addedbaw) + device_printf(sc->sc_dev, + "%s: wasn't added: seqno %d\n", + __func__, SEQNO(bf->bf_state.bfs_seqno)); + } + bf->bf_state.bfs_dobaw = 0; + + /* Send BAR frame */ + /* + * This'll end up going into net80211 and back out + * again, via ic->ic_raw_xmit(). + */ + txseq = tap->txa_start; + ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]); + + device_printf(sc->sc_dev, + "%s: TID %d: send BAR; seq %d\n", __func__, tid, txseq); + + /* XXX TODO: send BAR */ + + /* Free buffer, bf is free after this call */ + ath_tx_default_comp(sc, bf, 0); + return; + } + + /* + * This increments the retry counter as well as + * sets the retry flag in the ath_buf and packet + * body. + */ + ath_tx_set_retry(sc, bf); + + /* + * Insert this at the head of the queue, so it's + * retried before any current/subsequent frames. + */ + ATH_TXQ_INSERT_HEAD(atid, bf, bf_list); + ath_tx_tid_sched(sc, atid); + + ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]); +} + +/* + * Common code for aggregate excessive retry/subframe retry. + * If retrying, queues buffers to bf_q. If not, frees the + * buffers. + * + * XXX should unify this with ath_tx_aggr_retry_unaggr() + */ +static int +ath_tx_retry_subframe(struct ath_softc *sc, struct ath_buf *bf, + ath_bufhead *bf_q) +{ + struct ieee80211_node *ni = bf->bf_node; + struct ath_node *an = ATH_NODE(ni); + int tid = bf->bf_state.bfs_tid; + struct ath_tid *atid = &an->an_tid[tid]; + + ATH_TXQ_LOCK_ASSERT(sc->sc_ac2q[atid->ac]); + + ath_hal_clr11n_aggr(sc->sc_ah, bf->bf_desc); + ath_hal_set11nburstduration(sc->sc_ah, bf->bf_desc, 0); + /* ath_hal_set11n_virtualmorefrag(sc->sc_ah, bf->bf_desc, 0); */ + + /* + * If the buffer is marked as busy, we can't directly + * reuse it. Instead, try to clone the buffer. + * If the clone is successful, recycle the old buffer. + * If the clone is unsuccessful, set bfs_retries to max + * to force the next bit of code to free the buffer + * for us. + */ + if ((bf->bf_state.bfs_retries < SWMAX_RETRIES) && + (bf->bf_flags & ATH_BUF_BUSY)) { + struct ath_buf *nbf; + nbf = ath_tx_retry_clone(sc, bf); + if (nbf) + /* bf has been freed at this point */ + bf = nbf; + else + bf->bf_state.bfs_retries = SWMAX_RETRIES + 1; + } + + if (bf->bf_state.bfs_retries >= SWMAX_RETRIES) { + sc->sc_stats.ast_tx_swretrymax++; + DPRINTF(sc, ATH_DEBUG_SW_TX_RETRIES, + "%s: max retries: seqno %d\n", + __func__, SEQNO(bf->bf_state.bfs_seqno)); + ath_tx_update_baw(sc, an, atid, bf); + if (! bf->bf_state.bfs_addedbaw) + device_printf(sc->sc_dev, + "%s: wasn't added: seqno %d\n", + __func__, SEQNO(bf->bf_state.bfs_seqno)); + bf->bf_state.bfs_dobaw = 0; + return 1; + } + + ath_tx_set_retry(sc, bf); + bf->bf_next = NULL; /* Just to make sure */ + + TAILQ_INSERT_TAIL(bf_q, bf, bf_list); + return 0; +} + +/* + * error pkt completion for an aggregate destination + */ +static void +ath_tx_comp_aggr_error(struct ath_softc *sc, struct ath_buf *bf_first, + struct ath_tid *tid) +{ + struct ieee80211_node *ni = bf_first->bf_node; + struct ath_node *an = ATH_NODE(ni); + struct ath_buf *bf_next, *bf; + ath_bufhead bf_q; + int drops = 0; + struct ieee80211_tx_ampdu *tap; + ath_bufhead bf_cq; + + TAILQ_INIT(&bf_q); + TAILQ_INIT(&bf_cq); + sc->sc_stats.ast_tx_aggrfail++; + + /* + * Update rate control - all frames have failed. + * + * XXX use the length in the first frame in the series; + * XXX just so things are consistent for now. + */ + ath_tx_update_ratectrl(sc, ni, bf_first->bf_state.bfs_rc, + &bf_first->bf_status.ds_txstat, + bf_first->bf_state.bfs_pktlen, + bf_first->bf_state.bfs_nframes, bf_first->bf_state.bfs_nframes); + + ATH_TXQ_LOCK(sc->sc_ac2q[tid->ac]); + tap = ath_tx_get_tx_tid(an, tid->tid); + + /* Retry all subframes */ + bf = bf_first; + while (bf) { + bf_next = bf->bf_next; + bf->bf_next = NULL; /* Remove it from the aggr list */ + if (ath_tx_retry_subframe(sc, bf, &bf_q)) { + drops++; + bf->bf_next = NULL; + TAILQ_INSERT_TAIL(&bf_cq, bf, bf_list); + } + bf = bf_next; + } + + /* Prepend all frames to the beginning of the queue */ + while ((bf = TAILQ_LAST(&bf_q, ath_bufhead_s)) != NULL) { + TAILQ_REMOVE(&bf_q, bf, bf_list); + ATH_TXQ_INSERT_HEAD(tid, bf, bf_list); + } + + ath_tx_tid_sched(sc, tid); + + /* + * send bar if we dropped any frames + * + * Keep the txq lock held for now, as we need to ensure + * that ni_txseqs[] is consistent (as it's being updated + * in the ifnet TX context or raw TX context.) + */ + if (drops) { + int txseq = tap->txa_start; + ATH_TXQ_UNLOCK(sc->sc_ac2q[tid->ac]); + device_printf(sc->sc_dev, + "%s: TID %d: send BAR; seq %d\n", + __func__, tid->tid, txseq); + + /* XXX TODO: send BAR */ + } else { + ATH_TXQ_UNLOCK(sc->sc_ac2q[tid->ac]); + } + + /* Complete frames which errored out */ + while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) { + TAILQ_REMOVE(&bf_cq, bf, bf_list); + ath_tx_default_comp(sc, bf, 0); + } +} + +/* + * Handle clean-up of packets from an aggregate list. + * + * There's no need to update the BAW here - the session is being + * torn down. + */ +static void +ath_tx_comp_cleanup_aggr(struct ath_softc *sc, struct ath_buf *bf_first) +{ + struct ath_buf *bf, *bf_next; + struct ieee80211_node *ni = bf_first->bf_node; + struct ath_node *an = ATH_NODE(ni); + int tid = bf_first->bf_state.bfs_tid; + struct ath_tid *atid = &an->an_tid[tid]; + + bf = bf_first; + + ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]); + + /* update incomp */ + while (bf) { + atid->incomp--; + bf = bf->bf_next; + } + + if (atid->incomp == 0) { + DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, + "%s: TID %d: cleaned up! resume!\n", + __func__, tid); + atid->cleanup_inprogress = 0; + ath_tx_tid_resume(sc, atid); + } + ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]); + + /* Handle frame completion */ + while (bf) { + bf_next = bf->bf_next; + ath_tx_default_comp(sc, bf, 1); + bf = bf_next; + } +} + +/* + * Handle completion of an set of aggregate frames. + * + * XXX for now, simply complete each sub-frame. + * + * Note: the completion handler is the last descriptor in the aggregate, + * not the last descriptor in the first frame. + */ +static void +ath_tx_aggr_comp_aggr(struct ath_softc *sc, struct ath_buf *bf_first, int fail) +{ + //struct ath_desc *ds = bf->bf_lastds; + struct ieee80211_node *ni = bf_first->bf_node; + struct ath_node *an = ATH_NODE(ni); + int tid = bf_first->bf_state.bfs_tid; + struct ath_tid *atid = &an->an_tid[tid]; + struct ath_tx_status ts; + struct ieee80211_tx_ampdu *tap; + ath_bufhead bf_q; + ath_bufhead bf_cq; + int seq_st, tx_ok; + int hasba, isaggr; + uint32_t ba[2]; + struct ath_buf *bf, *bf_next; + int ba_index; + int drops = 0; + int nframes = 0, nbad = 0, nf; + int pktlen; + /* XXX there's too much on the stack? */ + struct ath_rc_series rc[4]; + int txseq; + + DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: called; hwq_depth=%d\n", + __func__, atid->hwq_depth); + + /* The TID state is kept behind the TXQ lock */ + ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]); + + atid->hwq_depth--; + if (atid->hwq_depth < 0) + device_printf(sc->sc_dev, "%s: hwq_depth < 0: %d\n", + __func__, atid->hwq_depth); + + /* + * Punt cleanup to the relevant function, not our problem now + */ + if (atid->cleanup_inprogress) { + ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]); + ath_tx_comp_cleanup_aggr(sc, bf_first); + return; + } + + /* + * Take a copy; this may be needed -after- bf_first + * has been completed and freed. + */ + ts = bf_first->bf_status.ds_txstat; + /* + * XXX for now, use the first frame in the aggregate for + * XXX rate control completion; it's at least consistent. + */ + pktlen = bf_first->bf_state.bfs_pktlen; + + /* + * handle errors first + */ + if (ts.ts_status & HAL_TXERR_XRETRY) { + ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]); + ath_tx_comp_aggr_error(sc, bf_first, atid); + return; + } + + TAILQ_INIT(&bf_q); + TAILQ_INIT(&bf_cq); + tap = ath_tx_get_tx_tid(an, tid); + + /* + * extract starting sequence and block-ack bitmap + */ + /* XXX endian-ness of seq_st, ba? */ + seq_st = ts.ts_seqnum; + hasba = !! (ts.ts_flags & HAL_TX_BA); + tx_ok = (ts.ts_status == 0); + isaggr = bf_first->bf_state.bfs_aggr; + ba[0] = ts.ts_ba_low; + ba[1] = ts.ts_ba_high; + + /* + * Copy the TX completion status and the rate control + * series from the first descriptor, as it may be freed + * before the rate control code can get its grubby fingers + * into things. + */ + memcpy(rc, bf_first->bf_state.bfs_rc, sizeof(rc)); + + DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, + "%s: txa_start=%d, tx_ok=%d, status=%.8x, flags=%.8x, isaggr=%d, seq_st=%d, hasba=%d, ba=%.8x, %.8x\n", + __func__, tap->txa_start, tx_ok, ts.ts_status, ts.ts_flags, + isaggr, seq_st, hasba, ba[0], ba[1]); + + /* Occasionally, the MAC sends a tx status for the wrong TID. */ + if (tid != ts.ts_tid) { + device_printf(sc->sc_dev, "%s: tid %d != hw tid %d\n", + __func__, tid, ts.ts_tid); + tx_ok = 0; + } + + /* AR5416 BA bug; this requires an interface reset */ + if (isaggr && tx_ok && (! hasba)) { + device_printf(sc->sc_dev, + "%s: AR5416 bug: hasba=%d; txok=%d, isaggr=%d, seq_st=%d\n", + __func__, hasba, tx_ok, isaggr, seq_st); + /* XXX TODO: schedule an interface reset */ + } + + /* + * Walk the list of frames, figure out which ones were correctly + * sent and which weren't. + */ + bf = bf_first; + nf = bf_first->bf_state.bfs_nframes; + + /* bf_first is going to be invalid once this list is walked */ + bf_first = NULL; + + /* + * Walk the list of completed frames and determine + * which need to be completed and which need to be + * retransmitted. + * + * For completed frames, the completion functions need + * to be called at the end of this function as the last + * node reference may free the node. + * + * Finally, since the TXQ lock can't be held during the + * completion callback (to avoid lock recursion), + * the completion calls have to be done outside of the + * lock. + */ + while (bf) { + nframes++; + ba_index = ATH_BA_INDEX(seq_st, SEQNO(bf->bf_state.bfs_seqno)); + bf_next = bf->bf_next; + bf->bf_next = NULL; /* Remove it from the aggr list */ + + DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, + "%s: checking bf=%p seqno=%d; ack=%d\n", + __func__, bf, SEQNO(bf->bf_state.bfs_seqno), + ATH_BA_ISSET(ba, ba_index)); + + if (tx_ok && ATH_BA_ISSET(ba, ba_index)) { + ath_tx_update_baw(sc, an, atid, bf); + bf->bf_state.bfs_dobaw = 0; + if (! bf->bf_state.bfs_addedbaw) + device_printf(sc->sc_dev, + "%s: wasn't added: seqno %d\n", + __func__, SEQNO(bf->bf_state.bfs_seqno)); + bf->bf_next = NULL; + TAILQ_INSERT_TAIL(&bf_cq, bf, bf_list); + } else { + if (ath_tx_retry_subframe(sc, bf, &bf_q)) { + drops++; + bf->bf_next = NULL; + TAILQ_INSERT_TAIL(&bf_cq, bf, bf_list); + } + nbad++; + } + bf = bf_next; + } + + /* + * Now that the BAW updates have been done, unlock + * + * txseq is grabbed before the lock is released so we + * have a consistent view of what -was- in the BAW. + * Anything after this point will not yet have been + * TXed. + */ + txseq = tap->txa_start; + ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]); + + if (nframes != nf) + device_printf(sc->sc_dev, + "%s: num frames seen=%d; bf nframes=%d\n", + __func__, nframes, nf); + + /* + * Now we know how many frames were bad, call the rate + * control code. + */ + if (fail == 0) + ath_tx_update_ratectrl(sc, ni, rc, &ts, pktlen, nframes, nbad); + + /* + * send bar if we dropped any frames + */ + if (drops) { + device_printf(sc->sc_dev, + "%s: TID %d: send BAR; seq %d\n", __func__, tid, txseq); + /* XXX TODO: send BAR */ + } + + /* Prepend all frames to the beginning of the queue */ + ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]); + while ((bf = TAILQ_LAST(&bf_q, ath_bufhead_s)) != NULL) { + TAILQ_REMOVE(&bf_q, bf, bf_list); + ATH_TXQ_INSERT_HEAD(atid, bf, bf_list); + } + ath_tx_tid_sched(sc, atid); + ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]); + + DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, + "%s: txa_start now %d\n", __func__, tap->txa_start); + + /* Do deferred completion */ + while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) { + TAILQ_REMOVE(&bf_cq, bf, bf_list); + ath_tx_default_comp(sc, bf, 0); + } +} + +/* + * Handle completion of unaggregated frames in an ADDBA + * session. + * + * Fail is set to 1 if the entry is being freed via a call to + * ath_tx_draintxq(). + */ +static void +ath_tx_aggr_comp_unaggr(struct ath_softc *sc, struct ath_buf *bf, int fail) +{ + struct ieee80211_node *ni = bf->bf_node; + struct ath_node *an = ATH_NODE(ni); + int tid = bf->bf_state.bfs_tid; + struct ath_tid *atid = &an->an_tid[tid]; + struct ath_tx_status *ts = &bf->bf_status.ds_txstat; + + /* + * Update rate control status here, before we possibly + * punt to retry or cleanup. + * + * Do it outside of the TXQ lock. + */ + if (fail == 0 && ((bf->bf_txflags & HAL_TXDESC_NOACK) == 0)) + ath_tx_update_ratectrl(sc, ni, bf->bf_state.bfs_rc, + &bf->bf_status.ds_txstat, + bf->bf_state.bfs_pktlen, + 1, (ts->ts_status == 0) ? 0 : 1); + + /* + * This is called early so atid->hwq_depth can be tracked. + * This unfortunately means that it's released and regrabbed + * during retry and cleanup. That's rather inefficient. + */ + ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]); + + if (tid == IEEE80211_NONQOS_TID) + device_printf(sc->sc_dev, "%s: TID=16!\n", __func__); + + DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: bf=%p: tid=%d, hwq_depth=%d\n", + __func__, bf, bf->bf_state.bfs_tid, atid->hwq_depth); + + atid->hwq_depth--; + if (atid->hwq_depth < 0) + device_printf(sc->sc_dev, "%s: hwq_depth < 0: %d\n", + __func__, atid->hwq_depth); + + /* + * If a cleanup is in progress, punt to comp_cleanup; + * rather than handling it here. It's thus their + * responsibility to clean up, call the completion + * function in net80211, etc. + */ + if (atid->cleanup_inprogress) { + ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]); + ath_tx_comp_cleanup_unaggr(sc, bf); + return; + } + + /* + * Don't bother with the retry check if all frames + * are being failed (eg during queue deletion.) + */ + if (fail == 0 && ts->ts_status & HAL_TXERR_XRETRY) { + ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]); + ath_tx_aggr_retry_unaggr(sc, bf); + return; + } + + /* Success? Complete */ + DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: TID=%d, seqno %d\n", + __func__, tid, SEQNO(bf->bf_state.bfs_seqno)); + if (bf->bf_state.bfs_dobaw) { + ath_tx_update_baw(sc, an, atid, bf); + bf->bf_state.bfs_dobaw = 0; + if (! bf->bf_state.bfs_addedbaw) + device_printf(sc->sc_dev, + "%s: wasn't added: seqno %d\n", + __func__, SEQNO(bf->bf_state.bfs_seqno)); + } + + ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]); + + ath_tx_default_comp(sc, bf, fail); + /* bf is freed at this point */ +} + +void +ath_tx_aggr_comp(struct ath_softc *sc, struct ath_buf *bf, int fail) +{ + if (bf->bf_state.bfs_aggr) + ath_tx_aggr_comp_aggr(sc, bf, fail); + else + ath_tx_aggr_comp_unaggr(sc, bf, fail); +} + +/* + * Schedule some packets from the given node/TID to the hardware. + * + * This is the aggregate version. + */ +void +ath_tx_tid_hw_queue_aggr(struct ath_softc *sc, struct ath_node *an, + struct ath_tid *tid) +{ + struct ath_buf *bf; + struct ath_txq *txq = sc->sc_ac2q[tid->ac]; + struct ieee80211_tx_ampdu *tap; + struct ieee80211_node *ni = &an->an_node; + ATH_AGGR_STATUS status; + ath_bufhead bf_q; + + DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: tid=%d\n", __func__, tid->tid); + ATH_TXQ_LOCK_ASSERT(txq); + + tap = ath_tx_get_tx_tid(an, tid->tid); + + if (tid->tid == IEEE80211_NONQOS_TID) + device_printf(sc->sc_dev, "%s: called for TID=NONQOS_TID?\n", + __func__); + + for (;;) { + status = ATH_AGGR_DONE; + + /* + * If the upper layer has paused the TID, don't + * queue any further packets. + * + * This can also occur from the completion task because + * of packet loss; but as its serialised with this code, + * it won't "appear" half way through queuing packets. + */ + if (tid->paused) + break; + + bf = TAILQ_FIRST(&tid->axq_q); + if (bf == NULL) { + break; + } + + /* + * If the packet doesn't fall within the BAW (eg a NULL + * data frame), schedule it directly; continue. + */ + if (! bf->bf_state.bfs_dobaw) { + DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: non-baw packet\n", + __func__); + ATH_TXQ_REMOVE(tid, bf, bf_list); + bf->bf_state.bfs_aggr = 0; + ath_tx_do_ratelookup(sc, bf); + ath_tx_rate_fill_rcflags(sc, bf); + ath_tx_set_rtscts(sc, bf); + ath_tx_setds(sc, bf); + ath_tx_chaindesclist(sc, bf); + ath_hal_clr11n_aggr(sc->sc_ah, bf->bf_desc); + ath_tx_set_ratectrl(sc, ni, bf); + + sc->sc_aggr_stats.aggr_nonbaw_pkt++; + + /* Queue the packet; continue */ + goto queuepkt; + } + + TAILQ_INIT(&bf_q); + + /* + * Do a rate control lookup on the first frame in the + * list. The rate control code needs that to occur + * before it can determine whether to TX. + * It's inaccurate because the rate control code doesn't + * really "do" aggregate lookups, so it only considers + * the size of the first frame. + */ + ath_tx_do_ratelookup(sc, bf); + bf->bf_state.bfs_rc[3].rix = 0; + bf->bf_state.bfs_rc[3].tries = 0; + ath_tx_rate_fill_rcflags(sc, bf); + + status = ath_tx_form_aggr(sc, an, tid, &bf_q); + + DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, + "%s: ath_tx_form_aggr() status=%d\n", __func__, status); + + /* + * No frames to be picked up - out of BAW + */ + if (TAILQ_EMPTY(&bf_q)) + break; + + /* + * This assumes that the descriptor list in the ath_bufhead + * are already linked together via bf_next pointers. + */ + bf = TAILQ_FIRST(&bf_q); + + /* + * If it's the only frame send as non-aggregate + * assume that ath_tx_form_aggr() has checked + * whether it's in the BAW and added it appropriately. + */ + if (bf->bf_state.bfs_nframes == 1) { + DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, + "%s: single-frame aggregate\n", __func__); + bf->bf_state.bfs_aggr = 0; + ath_tx_set_rtscts(sc, bf); + ath_tx_setds(sc, bf); + ath_tx_chaindesclist(sc, bf); + ath_hal_clr11n_aggr(sc->sc_ah, bf->bf_desc); + ath_tx_set_ratectrl(sc, ni, bf); + if (status == ATH_AGGR_BAW_CLOSED) + sc->sc_aggr_stats.aggr_baw_closed_single_pkt++; + else + sc->sc_aggr_stats.aggr_single_pkt++; + } else { + DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, + "%s: multi-frame aggregate: %d frames, length %d\n", + __func__, bf->bf_state.bfs_nframes, + bf->bf_state.bfs_al); + bf->bf_state.bfs_aggr = 1; + sc->sc_aggr_stats.aggr_pkts[bf->bf_state.bfs_nframes]++; + sc->sc_aggr_stats.aggr_aggr_pkt++; + + /* + * Update the rate and rtscts information based on the + * rate decision made by the rate control code; + * the first frame in the aggregate needs it. + */ + ath_tx_set_rtscts(sc, bf); + + /* + * Setup the relevant descriptor fields + * for aggregation. The first descriptor + * already points to the rest in the chain. + */ + ath_tx_setds_11n(sc, bf); + + /* + * setup first desc with rate and aggr info + */ + ath_tx_set_ratectrl(sc, ni, bf); + } + queuepkt: + //txq = bf->bf_state.bfs_txq; + + /* Set completion handler, multi-frame aggregate or not */ + bf->bf_comp = ath_tx_aggr_comp; + + if (bf->bf_state.bfs_tid == IEEE80211_NONQOS_TID) + device_printf(sc->sc_dev, "%s: TID=16?\n", __func__); + + /* Punt to txq */ + ath_tx_handoff(sc, txq, bf); + + /* Track outstanding buffer count to hardware */ + /* aggregates are "one" buffer */ + tid->hwq_depth++; + + /* + * Break out if ath_tx_form_aggr() indicated + * there can't be any further progress (eg BAW is full.) + * Checking for an empty txq is done above. + * + * XXX locking on txq here? + */ + if (txq->axq_aggr_depth >= sc->sc_hwq_limit || + status == ATH_AGGR_BAW_CLOSED) + break; + } +} + +/* + * Schedule some packets from the given node/TID to the hardware. + */ +void +ath_tx_tid_hw_queue_norm(struct ath_softc *sc, struct ath_node *an, + struct ath_tid *tid) +{ + struct ath_buf *bf; + struct ath_txq *txq = sc->sc_ac2q[tid->ac]; + struct ieee80211_node *ni = &an->an_node; + + DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: node %p: TID %d: called\n", + __func__, an, tid->tid); + + ATH_TXQ_LOCK_ASSERT(txq); + + /* Check - is AMPDU pending or running? then print out something */ + if (ath_tx_ampdu_pending(sc, an, tid->tid)) + device_printf(sc->sc_dev, "%s: tid=%d, ampdu pending?\n", + __func__, tid->tid); + if (ath_tx_ampdu_running(sc, an, tid->tid)) + device_printf(sc->sc_dev, "%s: tid=%d, ampdu running?\n", + __func__, tid->tid); + + for (;;) { + + /* + * If the upper layers have paused the TID, don't + * queue any further packets. + */ + if (tid->paused) + break; + + bf = TAILQ_FIRST(&tid->axq_q); + if (bf == NULL) { + break; + } + + ATH_TXQ_REMOVE(tid, bf, bf_list); + + KASSERT(txq == bf->bf_state.bfs_txq, ("txqs not equal!\n")); + + /* Sanity check! */ + if (tid->tid != bf->bf_state.bfs_tid) { + device_printf(sc->sc_dev, "%s: bfs_tid %d !=" + " tid %d\n", + __func__, bf->bf_state.bfs_tid, tid->tid); + } + /* Normal completion handler */ + bf->bf_comp = ath_tx_normal_comp; + + /* Program descriptors + rate control */ + ath_tx_do_ratelookup(sc, bf); + ath_tx_rate_fill_rcflags(sc, bf); + ath_tx_set_rtscts(sc, bf); + ath_tx_setds(sc, bf); + ath_tx_chaindesclist(sc, bf); + ath_tx_set_ratectrl(sc, ni, bf); + + /* Track outstanding buffer count to hardware */ + /* aggregates are "one" buffer */ + tid->hwq_depth++; + + /* Punt to hardware or software txq */ + ath_tx_handoff(sc, txq, bf); + } +} + +/* + * Schedule some packets to the given hardware queue. + * + * This function walks the list of TIDs (ie, ath_node TIDs + * with queued traffic) and attempts to schedule traffic + * from them. + * + * TID scheduling is implemented as a FIFO, with TIDs being + * added to the end of the queue after some frames have been + * scheduled. + */ +void +ath_txq_sched(struct ath_softc *sc, struct ath_txq *txq) +{ + struct ath_tid *tid, *next, *last; + + ATH_TXQ_LOCK_ASSERT(txq); + + /* + * Don't schedule if the hardware queue is busy. + * This (hopefully) gives some more time to aggregate + * some packets in the aggregation queue. + */ + if (txq->axq_aggr_depth >= sc->sc_hwq_limit) { + sc->sc_aggr_stats.aggr_sched_nopkt++; + return; + } + + last = TAILQ_LAST(&txq->axq_tidq, axq_t_s); + + TAILQ_FOREACH_SAFE(tid, &txq->axq_tidq, axq_qelem, next) { + /* + * Suspend paused queues here; they'll be resumed + * once the addba completes or times out. + */ + DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: tid=%d, paused=%d\n", + __func__, tid->tid, tid->paused); + ath_tx_tid_unsched(sc, tid); + if (tid->paused) { + continue; + } + if (ath_tx_ampdu_running(sc, tid->an, tid->tid)) + ath_tx_tid_hw_queue_aggr(sc, tid->an, tid); + else + ath_tx_tid_hw_queue_norm(sc, tid->an, tid); + + /* Not empty? Re-schedule */ + if (tid->axq_depth != 0) + ath_tx_tid_sched(sc, tid); + + /* Give the software queue time to aggregate more packets */ + if (txq->axq_aggr_depth >= sc->sc_hwq_limit) { + break; + } + + /* + * If this was the last entry on the original list, stop. + * Otherwise nodes that have been rescheduled onto the end + * of the TID FIFO list will just keep being rescheduled. + */ + if (tid == last) + break; + } +} + +/* + * TX addba handling + */ + +/* + * Return net80211 TID struct pointer, or NULL for none + */ +struct ieee80211_tx_ampdu * +ath_tx_get_tx_tid(struct ath_node *an, int tid) +{ + struct ieee80211_node *ni = &an->an_node; + struct ieee80211_tx_ampdu *tap; + int ac; + + if (tid == IEEE80211_NONQOS_TID) + return NULL; + + ac = TID_TO_WME_AC(tid); + + tap = &ni->ni_tx_ampdu[ac]; + return tap; +} + +/* + * Is AMPDU-TX running? + */ +static int +ath_tx_ampdu_running(struct ath_softc *sc, struct ath_node *an, int tid) +{ + struct ieee80211_tx_ampdu *tap; + + if (tid == IEEE80211_NONQOS_TID) + return 0; + + tap = ath_tx_get_tx_tid(an, tid); + if (tap == NULL) + return 0; /* Not valid; default to not running */ + + return !! (tap->txa_flags & IEEE80211_AGGR_RUNNING); +} + +/* + * Is AMPDU-TX negotiation pending? + */ +static int +ath_tx_ampdu_pending(struct ath_softc *sc, struct ath_node *an, int tid) +{ + struct ieee80211_tx_ampdu *tap; + + if (tid == IEEE80211_NONQOS_TID) + return 0; + + tap = ath_tx_get_tx_tid(an, tid); + if (tap == NULL) + return 0; /* Not valid; default to not pending */ + + return !! (tap->txa_flags & IEEE80211_AGGR_XCHGPEND); +} + +/* + * Is AMPDU-TX pending for the given TID? + */ + + +/* + * Method to handle sending an ADDBA request. + * + * We tap this so the relevant flags can be set to pause the TID + * whilst waiting for the response. + * + * XXX there's no timeout handler we can override? + */ +int +ath_addba_request(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, + int dialogtoken, int baparamset, int batimeout) +{ + struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc; + int tid = WME_AC_TO_TID(tap->txa_ac); + struct ath_node *an = ATH_NODE(ni); + struct ath_tid *atid = &an->an_tid[tid]; + + /* + * XXX danger Will Robinson! + * + * Although the taskqueue may be running and scheduling some more + * packets, these should all be _before_ the addba sequence number. + * However, net80211 will keep self-assigning sequence numbers + * until addba has been negotiated. + * + * In the past, these packets would be "paused" (which still works + * fine, as they're being scheduled to the driver in the same + * serialised method which is calling the addba request routine) + * and when the aggregation session begins, they'll be dequeued + * as aggregate packets and added to the BAW. However, now there's + * a "bf->bf_state.bfs_dobaw" flag, and this isn't set for these + * packets. Thus they never get included in the BAW tracking and + * this can cause the initial burst of packets after the addba + * negotiation to "hang", as they quickly fall outside the BAW. + * + * The "eventual" solution should be to tag these packets with + * dobaw. Although net80211 has given us a sequence number, + * it'll be "after" the left edge of the BAW and thus it'll + * fall within it. + */ + ath_tx_tid_pause(sc, atid); + + DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, + "%s: called; dialogtoken=%d, baparamset=%d, batimeout=%d\n", + __func__, dialogtoken, baparamset, batimeout); + DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, + "%s: txa_start=%d, ni_txseqs=%d\n", + __func__, tap->txa_start, ni->ni_txseqs[tid]); + + return sc->sc_addba_request(ni, tap, dialogtoken, baparamset, + batimeout); +} + +/* + * Handle an ADDBA response. + * + * We unpause the queue so TX'ing can resume. + * + * Any packets TX'ed from this point should be "aggregate" (whether + * aggregate or not) so the BAW is updated. + * + * Note! net80211 keeps self-assigning sequence numbers until + * ampdu is negotiated. This means the initially-negotiated BAW left + * edge won't match the ni->ni_txseq. + * + * So, being very dirty, the BAW left edge is "slid" here to match + * ni->ni_txseq. + * + * What likely SHOULD happen is that all packets subsequent to the + * addba request should be tagged as aggregate and queued as non-aggregate + * frames; thus updating the BAW. For now though, I'll just slide the + * window. + */ +int +ath_addba_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, + int status, int code, int batimeout) +{ + struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc; + int tid = WME_AC_TO_TID(tap->txa_ac); + struct ath_node *an = ATH_NODE(ni); + struct ath_tid *atid = &an->an_tid[tid]; + int r; + + DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, + "%s: called; status=%d, code=%d, batimeout=%d\n", __func__, + status, code, batimeout); + + DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, + "%s: txa_start=%d, ni_txseqs=%d\n", + __func__, tap->txa_start, ni->ni_txseqs[tid]); + + /* + * Call this first, so the interface flags get updated + * before the TID is unpaused. Otherwise a race condition + * exists where the unpaused TID still doesn't yet have + * IEEE80211_AGGR_RUNNING set. + */ + r = sc->sc_addba_response(ni, tap, status, code, batimeout); + + ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]); + /* + * XXX dirty! + * Slide the BAW left edge to wherever net80211 left it for us. + * Read above for more information. + */ + tap->txa_start = ni->ni_txseqs[tid]; + ath_tx_tid_resume(sc, atid); + ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]); + return r; +} + + +/* + * Stop ADDBA on a queue. + */ +void +ath_addba_stop(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap) +{ + struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc; + int tid = WME_AC_TO_TID(tap->txa_ac); + struct ath_node *an = ATH_NODE(ni); + struct ath_tid *atid = &an->an_tid[tid]; + + DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, "%s: called\n", __func__); + + /* Pause TID traffic early, so there aren't any races */ + ath_tx_tid_pause(sc, atid); + + /* There's no need to hold the TXQ lock here */ + sc->sc_addba_stop(ni, tap); + + /* + * ath_tx_cleanup will resume the TID if possible, otherwise + * it'll set the cleanup flag, and it'll be unpaused once + * things have been cleaned up. + */ + ath_tx_cleanup(sc, an, tid); +} + +/* + * Note: net80211 bar_timeout() doesn't call this function on BAR failure; + * it simply tears down the aggregation session. Ew. + * + * It however will call ieee80211_ampdu_stop() which will call + * ic->ic_addba_stop(). + * + * XXX This uses a hard-coded max BAR count value; the whole + * XXX BAR TX success or failure should be better handled! + */ +void +ath_bar_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, + int status) +{ + struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc; + int tid = WME_AC_TO_TID(tap->txa_ac); + struct ath_node *an = ATH_NODE(ni); + struct ath_tid *atid = &an->an_tid[tid]; + int attempts = tap->txa_attempts; + + DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, + "%s: called; status=%d\n", __func__, status); + + /* Note: This may update the BAW details */ + sc->sc_bar_response(ni, tap, status); + + /* Unpause the TID */ + /* + * XXX if this is attempt=50, the TID will be downgraded + * XXX to a non-aggregate session. So we must unpause the + * XXX TID here or it'll never be done. + */ + if (status == 0 || attempts == 50) { + ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]); + ath_tx_tid_resume(sc, atid); + ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]); + } +} + +/* + * This is called whenever the pending ADDBA request times out. + * Unpause and reschedule the TID. + */ +void +ath_addba_response_timeout(struct ieee80211_node *ni, + struct ieee80211_tx_ampdu *tap) +{ + struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc; + int tid = WME_AC_TO_TID(tap->txa_ac); + struct ath_node *an = ATH_NODE(ni); + struct ath_tid *atid = &an->an_tid[tid]; + + DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, + "%s: called; resuming\n", __func__); + + /* Note: This updates the aggregate state to (again) pending */ + sc->sc_addba_response_timeout(ni, tap); + + /* Unpause the TID; which reschedules it */ + ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]); + ath_tx_tid_resume(sc, atid); + ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]); +} diff --git a/sys/dev/ath/if_ath_tx.h b/sys/dev/ath/if_ath_tx.h index c66f61b..958acf9 100644 --- a/sys/dev/ath/if_ath_tx.h +++ b/sys/dev/ath/if_ath_tx.h @@ -32,6 +32,18 @@ #define __IF_ATH_TX_H__ /* + * some general macros + */ +#define INCR(_l, _sz) (_l) ++; (_l) &= ((_sz) - 1) +/* + * return block-ack bitmap index given sequence and starting sequence + */ +#define ATH_BA_INDEX(_st, _seq) (((_seq) - (_st)) & (IEEE80211_SEQ_RANGE - 1)) + +#define WME_BA_BMP_SIZE 64 +#define WME_MAX_BA WME_BA_BMP_SIZE + +/* * How 'busy' to try and keep the hardware txq */ #define ATH_AGGR_MIN_QDEPTH 2 @@ -49,7 +61,28 @@ #define ATH_AGGR_SCHED_HIGH 4 #define ATH_AGGR_SCHED_LOW 2 +/* + * return whether a bit at index _n in bitmap _bm is set + * _sz is the size of the bitmap + */ +#define ATH_BA_ISSET(_bm, _n) (((_n) < (WME_BA_BMP_SIZE)) && \ + ((_bm)[(_n) >> 5] & (1 << ((_n) & 31)))) + + +/* extracting the seqno from buffer seqno */ +#define SEQNO(_a) ((_a) >> IEEE80211_SEQ_SEQ_SHIFT) + +/* + * Whether the current sequence number is within the + * BAW. + */ +#define BAW_WITHIN(_start, _bawsz, _seqno) \ + ((((_seqno) - (_start)) & 4095) < (_bawsz)) + +extern void ath_txq_restart_dma(struct ath_softc *sc, struct ath_txq *txq); extern void ath_freetx(struct mbuf *m); +extern void ath_tx_node_flush(struct ath_softc *sc, struct ath_node *an); +extern void ath_tx_txq_drain(struct ath_softc *sc, struct ath_txq *txq); extern void ath_txfrag_cleanup(struct ath_softc *sc, ath_bufhead *frags, struct ieee80211_node *ni); extern int ath_txfrag_setup(struct ath_softc *sc, ath_bufhead *frags, @@ -59,4 +92,36 @@ extern int ath_tx_start(struct ath_softc *sc, struct ieee80211_node *ni, extern int ath_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params); +/* software queue stuff */ +extern void ath_tx_swq(struct ath_softc *sc, struct ieee80211_node *ni, + struct ath_txq *txq, struct ath_buf *bf); +extern void ath_tx_tid_init(struct ath_softc *sc, struct ath_node *an); +extern void ath_tx_tid_hw_queue_aggr(struct ath_softc *sc, struct ath_node *an, + struct ath_tid *tid); +extern void ath_tx_tid_hw_queue_norm(struct ath_softc *sc, struct ath_node *an, + struct ath_tid *tid); +extern void ath_txq_sched(struct ath_softc *sc, struct ath_txq *txq); +extern void ath_tx_normal_comp(struct ath_softc *sc, struct ath_buf *bf, + int fail); +extern void ath_tx_aggr_comp(struct ath_softc *sc, struct ath_buf *bf, + int fail); +extern void ath_tx_addto_baw(struct ath_softc *sc, struct ath_node *an, + struct ath_tid *tid, struct ath_buf *bf); +extern struct ieee80211_tx_ampdu * ath_tx_get_tx_tid(struct ath_node *an, + int tid); + +/* TX addba handling */ +extern int ath_addba_request(struct ieee80211_node *ni, + struct ieee80211_tx_ampdu *tap, int dialogtoken, + int baparamset, int batimeout); +extern int ath_addba_response(struct ieee80211_node *ni, + struct ieee80211_tx_ampdu *tap, int dialogtoken, + int code, int batimeout); +extern void ath_addba_stop(struct ieee80211_node *ni, + struct ieee80211_tx_ampdu *tap); +extern void ath_bar_response(struct ieee80211_node *ni, + struct ieee80211_tx_ampdu *tap, int status); +extern void ath_addba_response_timeout(struct ieee80211_node *ni, + struct ieee80211_tx_ampdu *tap); + #endif diff --git a/sys/dev/ath/if_ath_tx_ht.c b/sys/dev/ath/if_ath_tx_ht.c index 348a1499..bec7064 100644 --- a/sys/dev/ath/if_ath_tx_ht.c +++ b/sys/dev/ath/if_ath_tx_ht.c @@ -86,17 +86,357 @@ __FBSDID("$FreeBSD$"); #include <dev/ath/ath_tx99/ath_tx99.h> #endif +#include <dev/ath/if_ath_tx.h> /* XXX for some support functions */ #include <dev/ath/if_ath_tx_ht.h> +#include <dev/ath/if_athrate.h> +#include <dev/ath/if_ath_debug.h> + +/* + * XXX net80211? + */ +#define IEEE80211_AMPDU_SUBFRAME_DEFAULT 32 + +#define ATH_AGGR_DELIM_SZ 4 /* delimiter size */ +#define ATH_AGGR_MINPLEN 256 /* in bytes, minimum packet length */ +#define ATH_AGGR_ENCRYPTDELIM 10 /* number of delimiters for encryption padding */ + +/* + * returns delimiter padding required given the packet length + */ +#define ATH_AGGR_GET_NDELIM(_len) \ + (((((_len) + ATH_AGGR_DELIM_SZ) < ATH_AGGR_MINPLEN) ? \ + (ATH_AGGR_MINPLEN - (_len) - ATH_AGGR_DELIM_SZ) : 0) >> 2) + +#define PADBYTES(_len) ((4 - ((_len) % 4)) % 4) + +int ath_max_4ms_framelen[4][32] = { + [MCS_HT20] = { + 3212, 6432, 9648, 12864, 19300, 25736, 28952, 32172, + 6424, 12852, 19280, 25708, 38568, 51424, 57852, 64280, + 9628, 19260, 28896, 38528, 57792, 65532, 65532, 65532, + 12828, 25656, 38488, 51320, 65532, 65532, 65532, 65532, + }, + [MCS_HT20_SGI] = { + 3572, 7144, 10720, 14296, 21444, 28596, 32172, 35744, + 7140, 14284, 21428, 28568, 42856, 57144, 64288, 65532, + 10700, 21408, 32112, 42816, 64228, 65532, 65532, 65532, + 14256, 28516, 42780, 57040, 65532, 65532, 65532, 65532, + }, + [MCS_HT40] = { + 6680, 13360, 20044, 26724, 40092, 53456, 60140, 65532, + 13348, 26700, 40052, 53400, 65532, 65532, 65532, 65532, + 20004, 40008, 60016, 65532, 65532, 65532, 65532, 65532, + 26644, 53292, 65532, 65532, 65532, 65532, 65532, 65532, + }, + [MCS_HT40_SGI] = { + 7420, 14844, 22272, 29696, 44544, 59396, 65532, 65532, + 14832, 29668, 44504, 59340, 65532, 65532, 65532, 65532, + 22232, 44464, 65532, 65532, 65532, 65532, 65532, 65532, + 29616, 59232, 65532, 65532, 65532, 65532, 65532, 65532, + } +}; + +/* + * XXX should be in net80211 + */ +static int ieee80211_mpdudensity_map[] = { + 0, /* IEEE80211_HTCAP_MPDUDENSITY_NA */ + 25, /* IEEE80211_HTCAP_MPDUDENSITY_025 */ + 50, /* IEEE80211_HTCAP_MPDUDENSITY_05 */ + 100, /* IEEE80211_HTCAP_MPDUDENSITY_1 */ + 200, /* IEEE80211_HTCAP_MPDUDENSITY_2 */ + 400, /* IEEE80211_HTCAP_MPDUDENSITY_4 */ + 800, /* IEEE80211_HTCAP_MPDUDENSITY_8 */ + 1600, /* IEEE80211_HTCAP_MPDUDENSITY_16 */ +}; + +/* + * XXX should be in the HAL/net80211 ? + */ +#define BITS_PER_BYTE 8 +#define OFDM_PLCP_BITS 22 +#define HT_RC_2_MCS(_rc) ((_rc) & 0x7f) +#define HT_RC_2_STREAMS(_rc) ((((_rc) & 0x78) >> 3) + 1) +#define L_STF 8 +#define L_LTF 8 +#define L_SIG 4 +#define HT_SIG 8 +#define HT_STF 4 +#define HT_LTF(_ns) (4 * (_ns)) +#define SYMBOL_TIME(_ns) ((_ns) << 2) // ns * 4 us +#define SYMBOL_TIME_HALFGI(_ns) (((_ns) * 18 + 4) / 5) // ns * 3.6 us +#define NUM_SYMBOLS_PER_USEC(_usec) (_usec >> 2) +#define NUM_SYMBOLS_PER_USEC_HALFGI(_usec) (((_usec*5)-4)/18) +#define IS_HT_RATE(_rate) ((_rate) & 0x80) + +const uint32_t bits_per_symbol[][2] = { + /* 20MHz 40MHz */ + { 26, 54 }, // 0: BPSK + { 52, 108 }, // 1: QPSK 1/2 + { 78, 162 }, // 2: QPSK 3/4 + { 104, 216 }, // 3: 16-QAM 1/2 + { 156, 324 }, // 4: 16-QAM 3/4 + { 208, 432 }, // 5: 64-QAM 2/3 + { 234, 486 }, // 6: 64-QAM 3/4 + { 260, 540 }, // 7: 64-QAM 5/6 + { 52, 108 }, // 8: BPSK + { 104, 216 }, // 9: QPSK 1/2 + { 156, 324 }, // 10: QPSK 3/4 + { 208, 432 }, // 11: 16-QAM 1/2 + { 312, 648 }, // 12: 16-QAM 3/4 + { 416, 864 }, // 13: 64-QAM 2/3 + { 468, 972 }, // 14: 64-QAM 3/4 + { 520, 1080 }, // 15: 64-QAM 5/6 + { 78, 162 }, // 16: BPSK + { 156, 324 }, // 17: QPSK 1/2 + { 234, 486 }, // 18: QPSK 3/4 + { 312, 648 }, // 19: 16-QAM 1/2 + { 468, 972 }, // 20: 16-QAM 3/4 + { 624, 1296 }, // 21: 64-QAM 2/3 + { 702, 1458 }, // 22: 64-QAM 3/4 + { 780, 1620 }, // 23: 64-QAM 5/6 + { 104, 216 }, // 24: BPSK + { 208, 432 }, // 25: QPSK 1/2 + { 312, 648 }, // 26: QPSK 3/4 + { 416, 864 }, // 27: 16-QAM 1/2 + { 624, 1296 }, // 28: 16-QAM 3/4 + { 832, 1728 }, // 29: 64-QAM 2/3 + { 936, 1944 }, // 30: 64-QAM 3/4 + { 1040, 2160 }, // 31: 64-QAM 5/6 +}; + +/* + * Fill in the rate array information based on the current + * node configuration and the choices made by the rate + * selection code and ath_buf setup code. + * + * Later on, this may end up also being made by the + * rate control code, but for now it can live here. + * + * This needs to be called just before the packet is + * queued to the software queue or hardware queue, + * so all of the needed fields in bf_state are setup. + */ +void +ath_tx_rate_fill_rcflags(struct ath_softc *sc, struct ath_buf *bf) +{ + struct ieee80211_node *ni = bf->bf_node; + struct ieee80211com *ic = ni->ni_ic; + const HAL_RATE_TABLE *rt = sc->sc_currates; + struct ath_rc_series *rc = bf->bf_state.bfs_rc; + uint8_t rate; + int i; + + for (i = 0; i < ATH_RC_NUM; i++) { + rc[i].flags = 0; + if (rc[i].tries == 0) + continue; + + rate = rt->info[rc[i].rix].rateCode; + + /* + * XXX only do this for legacy rates? + */ + if (bf->bf_state.bfs_shpream) + rate |= rt->info[rc[i].rix].shortPreamble; + + /* + * Save this, used by the TX and completion code + */ + rc[i].ratecode = rate; + + if (bf->bf_state.bfs_flags & + (HAL_TXDESC_RTSENA | HAL_TXDESC_CTSENA)) + rc[i].flags |= ATH_RC_RTSCTS_FLAG; + + /* Only enable shortgi, 2040, dual-stream if HT is set */ + if (IS_HT_RATE(rate)) { + rc[i].flags |= ATH_RC_HT_FLAG; + + if (ni->ni_chw == 40) + rc[i].flags |= ATH_RC_CW40_FLAG; + + if (ni->ni_chw == 40 && + ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI40 && + ni->ni_htcap & IEEE80211_HTCAP_SHORTGI40) + rc[i].flags |= ATH_RC_SGI_FLAG; + + if (ni->ni_chw == 20 && + ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI20 && + ni->ni_htcap & IEEE80211_HTCAP_SHORTGI20) + rc[i].flags |= ATH_RC_SGI_FLAG; + + /* XXX dual stream? and 3-stream? */ + } + + /* + * Calculate the maximum 4ms frame length based + * on the MCS rate, SGI and channel width flags. + */ + if ((rc[i].flags & ATH_RC_HT_FLAG) && + (HT_RC_2_MCS(rate) < 32)) { + int j; + if (rc[i].flags & ATH_RC_CW40_FLAG) { + if (rc[i].flags & ATH_RC_SGI_FLAG) + j = MCS_HT40_SGI; + else + j = MCS_HT40; + } else { + if (rc[i].flags & ATH_RC_SGI_FLAG) + j = MCS_HT20_SGI; + else + j = MCS_HT20; + } + rc[i].max4msframelen = + ath_max_4ms_framelen[j][HT_RC_2_MCS(rate)]; + } else + rc[i].max4msframelen = 0; + DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, + "%s: i=%d, rate=0x%x, flags=0x%x, max4ms=%d\n", + __func__, i, rate, rc[i].flags, rc[i].max4msframelen); + } +} + +/* + * Return the number of delimiters to be added to + * meet the minimum required mpdudensity. + * + * Caller should make sure that the rate is HT. + * + * TODO: is this delimiter calculation supposed to be the + * total frame length, the hdr length, the data length (including + * delimiters, padding, CRC, etc) or ? + * + * TODO: this should ensure that the rate control information + * HAS been setup for the first rate. + * + * TODO: ensure this is only called for MCS rates. + * + * TODO: enforce MCS < 31 + */ +static int +ath_compute_num_delims(struct ath_softc *sc, struct ath_buf *first_bf, + uint16_t pktlen) +{ + const HAL_RATE_TABLE *rt = sc->sc_currates; + struct ieee80211_node *ni = first_bf->bf_node; + struct ieee80211vap *vap = ni->ni_vap; + int ndelim, mindelim = 0; + int mpdudensity; /* in 1/100'th of a microsecond */ + uint8_t rc, rix, flags; + int width, half_gi; + uint32_t nsymbits, nsymbols; + uint16_t minlen; + + /* + * vap->iv_ampdu_density is a value, rather than the actual + * density. + */ + if (vap->iv_ampdu_density > IEEE80211_HTCAP_MPDUDENSITY_16) + mpdudensity = 1600; /* maximum density */ + else + mpdudensity = ieee80211_mpdudensity_map[vap->iv_ampdu_density]; + + /* Select standard number of delimiters based on frame length */ + ndelim = ATH_AGGR_GET_NDELIM(pktlen); + + /* + * If encryption is enabled, add extra delimiters to let the + * crypto hardware catch up. This could be tuned per-MAC and + * per-rate, but for now we'll simply assume encryption is + * always enabled. + */ + ndelim += ATH_AGGR_ENCRYPTDELIM; + + DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, + "%s: pktlen=%d, ndelim=%d, mpdudensity=%d\n", + __func__, pktlen, ndelim, mpdudensity); + + /* + * If the MPDU density is 0, we can return here. + * Otherwise, we need to convert the desired mpdudensity + * into a byte length, based on the rate in the subframe. + */ + if (mpdudensity == 0) + return ndelim; + + /* + * Convert desired mpdu density from microeconds to bytes based + * on highest rate in rate series (i.e. first rate) to determine + * required minimum length for subframe. Take into account + * whether high rate is 20 or 40Mhz and half or full GI. + */ + rix = first_bf->bf_state.bfs_rc[0].rix; + rc = rt->info[rix].rateCode; + flags = first_bf->bf_state.bfs_rc[0].flags; + width = !! (flags & ATH_RC_CW40_FLAG); + half_gi = !! (flags & ATH_RC_SGI_FLAG); + + /* + * mpdudensity is in 1/100th of a usec, so divide by 100 + */ + if (half_gi) + nsymbols = NUM_SYMBOLS_PER_USEC_HALFGI(mpdudensity); + else + nsymbols = NUM_SYMBOLS_PER_USEC(mpdudensity); + nsymbols /= 100; + + if (nsymbols == 0) + nsymbols = 1; + + nsymbits = bits_per_symbol[HT_RC_2_MCS(rc)][width]; + minlen = (nsymbols * nsymbits) / BITS_PER_BYTE; + + /* + * Min length is the minimum frame length for the + * required MPDU density. + */ + if (pktlen < minlen) { + mindelim = (minlen - pktlen) / ATH_AGGR_DELIM_SZ; + ndelim = MAX(mindelim, ndelim); + } + + DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, + "%s: pktlen=%d, minlen=%d, rix=%x, rc=%x, width=%d, hgi=%d, ndelim=%d\n", + __func__, pktlen, minlen, rix, rc, width, half_gi, ndelim); + + return ndelim; +} + +/* + * Fetch the aggregation limit. + * + * It's the lowest of the four rate series 4ms frame length. + */ +static int +ath_get_aggr_limit(struct ath_softc *sc, struct ath_buf *bf) +{ + int amin = 65530; + int i; + + for (i = 0; i < 4; i++) { + if (bf->bf_state.bfs_rc[i].tries == 0) + continue; + amin = MIN(amin, bf->bf_state.bfs_rc[i].max4msframelen); + } + + DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: max frame len= %d\n", + __func__, amin); + + return amin; +} /* * Setup a 11n rate series structure * * This should be called for both legacy and MCS rates. + * + * It, along with ath_buf_set_rate, must be called -after- a burst + * or aggregate is setup. */ static void ath_rateseries_setup(struct ath_softc *sc, struct ieee80211_node *ni, - HAL_11N_RATE_SERIES *series, unsigned int pktlen, uint8_t *rix, - uint8_t *try, int flags) + struct ath_buf *bf, HAL_11N_RATE_SERIES *series) { #define HT_RC_2_STREAMS(_rc) ((((_rc) & 0x78) >> 3) + 1) struct ieee80211com *ic = ni->ni_ic; @@ -104,18 +444,34 @@ ath_rateseries_setup(struct ath_softc *sc, struct ieee80211_node *ni, HAL_BOOL shortPreamble = AH_FALSE; const HAL_RATE_TABLE *rt = sc->sc_currates; int i; + int pktlen; + int flags = bf->bf_state.bfs_flags; + struct ath_rc_series *rc = bf->bf_state.bfs_rc; if ((ic->ic_flags & IEEE80211_F_SHPREAMBLE) && (ni->ni_capinfo & IEEE80211_CAPINFO_SHORT_PREAMBLE)) shortPreamble = AH_TRUE; + /* + * If this is the first frame in an aggregate series, + * use the aggregate length. + */ + if (bf->bf_state.bfs_aggr) + pktlen = bf->bf_state.bfs_al; + else + pktlen = bf->bf_state.bfs_pktlen; + + /* + * XXX TODO: modify this routine to use the bfs_rc[x].flags + * XXX fields. + */ memset(series, 0, sizeof(HAL_11N_RATE_SERIES) * 4); for (i = 0; i < 4; i++) { /* Only set flags for actual TX attempts */ - if (try[i] == 0) + if (rc[i].tries == 0) continue; - series[i].Tries = try[i]; + series[i].Tries = rc[i].tries; /* * XXX this isn't strictly correct - sc_txchainmask @@ -154,7 +510,7 @@ ath_rateseries_setup(struct ath_softc *sc, struct ieee80211_node *ni, ni->ni_htcap & IEEE80211_HTCAP_SHORTGI20) series[i].RateFlags |= HAL_RATESERIES_HALFGI; - series[i].Rate = rt->info[rix[i]].rateCode; + series[i].Rate = rt->info[rc[i].rix].rateCode; /* PktDuration doesn't include slot, ACK, RTS, etc timing - it's just the packet duration */ if (series[i].Rate & IEEE80211_RATE_MCS) { @@ -166,9 +522,10 @@ ath_rateseries_setup(struct ath_softc *sc, struct ieee80211_node *ni, , series[i].RateFlags & HAL_RATESERIES_HALFGI); } else { if (shortPreamble) - series[i].Rate |= rt->info[rix[i]].shortPreamble; + series[i].Rate |= + rt->info[rc[i].rix].shortPreamble; series[i].PktDuration = ath_hal_computetxtime(ah, - rt, pktlen, rix[i], shortPreamble); + rt, pktlen, rc[i].rix, shortPreamble); } } #undef HT_RC_2_STREAMS @@ -200,25 +557,28 @@ ath_rateseries_print(HAL_11N_RATE_SERIES *series) */ void -ath_buf_set_rate(struct ath_softc *sc, struct ieee80211_node *ni, struct ath_buf *bf, - int pktlen, int flags, uint8_t ctsrate, int is_pspoll, uint8_t *rix, uint8_t *try) +ath_buf_set_rate(struct ath_softc *sc, struct ieee80211_node *ni, + struct ath_buf *bf) { HAL_11N_RATE_SERIES series[4]; struct ath_desc *ds = bf->bf_desc; struct ath_desc *lastds = NULL; struct ath_hal *ah = sc->sc_ah; + int is_pspoll = (bf->bf_state.bfs_atype == HAL_PKT_TYPE_PSPOLL); + int ctsrate = bf->bf_state.bfs_ctsrate; + int flags = bf->bf_state.bfs_flags; /* Setup rate scenario */ memset(&series, 0, sizeof(series)); - ath_rateseries_setup(sc, ni, series, pktlen, rix, try, flags); + ath_rateseries_setup(sc, ni, bf, series); /* Enforce AR5416 aggregate limit - can't do RTS w/ an agg frame > 8k */ /* Enforce RTS and CTS are mutually exclusive */ /* Get a pointer to the last tx descriptor in the list */ - lastds = &bf->bf_desc[bf->bf_nseg - 1]; + lastds = bf->bf_lastds; #if 0 printf("pktlen: %d; flags 0x%x\n", pktlen, flags); @@ -238,6 +598,238 @@ ath_buf_set_rate(struct ath_softc *sc, struct ieee80211_node *ni, struct ath_buf ath_hal_setuplasttxdesc(ah, lastds, ds); /* Set burst duration */ - /* This should only be done if aggregate protection is enabled */ + /* + * This is only required when doing 11n burst, not aggregation + * ie, if there's a second frame in a RIFS or A-MPDU burst + * w/ >1 A-MPDU frame bursting back to back. + * Normal A-MPDU doesn't do bursting -between- aggregates. + * + * .. and it's highly likely this won't ever be implemented + */ //ath_hal_set11nburstduration(ah, ds, 8192); } + +/* + * Form an aggregate packet list. + * + * This function enforces the aggregate restrictions/requirements. + * + * These are: + * + * + The aggregate size maximum (64k for AR9160 and later, 8K for + * AR5416 when doing RTS frame protection.) + * + Maximum number of sub-frames for an aggregate + * + The aggregate delimiter size, giving MACs time to do whatever is + * needed before each frame + * + Enforce the BAW limit + * + * Each descriptor queued should have the DMA setup. + * The rate series, descriptor setup, linking, etc is all done + * externally. This routine simply chains them together. + * ath_tx_setds_11n() will take care of configuring the per- + * descriptor setup, and ath_buf_set_rate() will configure the + * rate control. + * + * Note that the TID lock is only grabbed when dequeuing packets from + * the TID queue. If some code in another thread adds to the head of this + * list, very strange behaviour will occur. Since retransmission is the + * only reason this will occur, and this routine is designed to be called + * from within the scheduler task, it won't ever clash with the completion + * task. + * + * So if you want to call this from an upper layer context (eg, to direct- + * dispatch aggregate frames to the hardware), please keep this in mind. + */ +ATH_AGGR_STATUS +ath_tx_form_aggr(struct ath_softc *sc, struct ath_node *an, struct ath_tid *tid, + ath_bufhead *bf_q) +{ + //struct ieee80211_node *ni = &an->an_node; + struct ath_buf *bf, *bf_first = NULL, *bf_prev = NULL; + int nframes = 0; + uint16_t aggr_limit = 0, al = 0, bpad = 0, al_delta, h_baw; + struct ieee80211_tx_ampdu *tap; + int status = ATH_AGGR_DONE; + int prev_frames = 0; /* XXX for AR5416 burst, not done here */ + int prev_al = 0; /* XXX also for AR5416 burst */ + + ATH_TXQ_LOCK_ASSERT(sc->sc_ac2q[tid->ac]); + + tap = ath_tx_get_tx_tid(an, tid->tid); + if (tap == NULL) { + status = ATH_AGGR_ERROR; + goto finish; + } + + h_baw = tap->txa_wnd / 2; + + for (;;) { + bf = TAILQ_FIRST(&tid->axq_q); + if (bf_first == NULL) + bf_first = bf; + if (bf == NULL) { + status = ATH_AGGR_DONE; + break; + } else { + /* + * It's the first frame; + * set the aggregation limit based on the + * rate control decision that has been made. + */ + aggr_limit = ath_get_aggr_limit(sc, bf_first); + } + + /* Set this early just so things don't get confused */ + bf->bf_next = NULL; + + /* + * Don't unlock the tid lock until we're sure we are going + * to queue this frame. + */ + + /* + * If the frame doesn't have a sequence number that we're + * tracking in the BAW (eg NULL QOS data frame), we can't + * aggregate it. Stop the aggregation process; the sender + * can then TX what's in the list thus far and then + * TX the frame individually. + */ + if (! bf->bf_state.bfs_dobaw) { + status = ATH_AGGR_NONAGGR; + break; + } + + /* + * If any of the rates are non-HT, this packet + * can't be aggregated. + * XXX TODO: add a bf_state flag which gets marked + * if any active rate is non-HT. + */ + + /* + * If the packet has a sequence number, do not + * step outside of the block-ack window. + */ + if (! BAW_WITHIN(tap->txa_start, tap->txa_wnd, + SEQNO(bf->bf_state.bfs_seqno))) { + status = ATH_AGGR_BAW_CLOSED; + break; + } + + /* + * XXX TODO: AR5416 has an 8K aggregation size limit + * when RTS is enabled, and RTS is required for dual-stream + * rates. + * + * For now, limit all aggregates for the AR5416 to be 8K. + */ + + /* + * do not exceed aggregation limit + */ + al_delta = ATH_AGGR_DELIM_SZ + bf->bf_state.bfs_pktlen; + if (nframes && + (aggr_limit < (al + bpad + al_delta + prev_al))) { + status = ATH_AGGR_LIMITED; + break; + } + + /* + * Do not exceed subframe limit. + */ + if ((nframes + prev_frames) >= MIN((h_baw), + IEEE80211_AMPDU_SUBFRAME_DEFAULT)) { + status = ATH_AGGR_LIMITED; + break; + } + + /* + * this packet is part of an aggregate. + */ + ATH_TXQ_REMOVE(tid, bf, bf_list); + + /* The TID lock is required for the BAW update */ + ath_tx_addto_baw(sc, an, tid, bf); + bf->bf_state.bfs_addedbaw = 1; + + /* + * XXX TODO: If any frame in the aggregate requires RTS/CTS, + * set the first frame. + */ + + /* + * XXX enforce ACK for aggregate frames (this needs to be + * XXX handled more gracefully? + */ + if (bf->bf_state.bfs_flags & HAL_TXDESC_NOACK) { + device_printf(sc->sc_dev, + "%s: HAL_TXDESC_NOACK set for an aggregate frame?\n", + __func__); + bf->bf_state.bfs_flags &= (~HAL_TXDESC_NOACK); + } + + /* + * Add the now owned buffer (which isn't + * on the software TXQ any longer) to our + * aggregate frame list. + */ + TAILQ_INSERT_TAIL(bf_q, bf, bf_list); + nframes ++; + + /* Completion handler */ + bf->bf_comp = ath_tx_aggr_comp; + + /* + * add padding for previous frame to aggregation length + */ + al += bpad + al_delta; + + /* + * Calculate delimiters needed for the current frame + */ + bf->bf_state.bfs_ndelim = + ath_compute_num_delims(sc, bf_first, + bf->bf_state.bfs_pktlen); + + /* + * Calculate the padding needed from this set of delimiters, + * used when calculating if the next frame will fit in + * the aggregate. + */ + bpad = PADBYTES(al_delta) + (bf->bf_state.bfs_ndelim << 2); + + /* + * Chain the buffers together + */ + if (bf_prev) + bf_prev->bf_next = bf; + bf_prev = bf; + + /* + * XXX TODO: if any sub-frames have RTS/CTS enabled; + * enable it for the entire aggregate. + */ + +#if 0 + /* + * terminate aggregation on a small packet boundary + */ + if (bf->bf_state.bfs_pktlen < ATH_AGGR_MINPLEN) { + status = ATH_AGGR_SHORTPKT; + break; + } +#endif + + } + +finish: + /* + * Just in case the list was empty when we tried to + * dequeue a packet .. + */ + if (bf_first) { + bf_first->bf_state.bfs_al = al; + bf_first->bf_state.bfs_nframes = nframes; + } + return status; +} diff --git a/sys/dev/ath/if_ath_tx_ht.h b/sys/dev/ath/if_ath_tx_ht.h index cf16c46..543c56b 100644 --- a/sys/dev/ath/if_ath_tx_ht.h +++ b/sys/dev/ath/if_ath_tx_ht.h @@ -31,9 +31,32 @@ #ifndef __IF_ATH_TX_HT_H__ #define __IF_ATH_TX_HT_H__ +enum { + MCS_HT20, + MCS_HT20_SGI, + MCS_HT40, + MCS_HT40_SGI, +}; + +typedef enum { + ATH_AGGR_DONE, + ATH_AGGR_BAW_CLOSED, + ATH_AGGR_LIMITED, + ATH_AGGR_SHORTPKT, + ATH_AGGR_8K_LIMITED, + ATH_AGGR_ERROR, + ATH_AGGR_NONAGGR, +} ATH_AGGR_STATUS; + +extern int ath_max_4ms_framelen[4][32]; + +extern void ath_tx_rate_fill_rcflags(struct ath_softc *sc, struct ath_buf *bf); + extern void ath_buf_set_rate(struct ath_softc *sc, - struct ieee80211_node *ni, struct ath_buf *bf, - int pktlen, int flags, uint8_t ctsrate, int is_pspoll, - uint8_t *rix, uint8_t *try); + struct ieee80211_node *ni, struct ath_buf *bf); + +extern ATH_AGGR_STATUS + ath_tx_form_aggr(struct ath_softc *sc, struct ath_node *an, + struct ath_tid *tid, ath_bufhead *bf_q); #endif diff --git a/sys/dev/ath/if_athrate.h b/sys/dev/ath/if_athrate.h index ca7a8b0..10f6040 100644 --- a/sys/dev/ath/if_athrate.h +++ b/sys/dev/ath/if_athrate.h @@ -120,7 +120,7 @@ void ath_rate_newassoc(struct ath_softc *, struct ath_node *, * Return the four TX rate index and try counts for the current data packet. */ void ath_rate_getxtxrates(struct ath_softc *sc, struct ath_node *an, - uint8_t rix0, uint8_t *rix, uint8_t *try); + uint8_t rix0, struct ath_rc_series *rc); /* * Return the transmit info for a data packet. If multi-rate state @@ -142,8 +142,12 @@ void ath_rate_setupxtxdesc(struct ath_softc *, struct ath_node *, * supplied transmit descriptor. The routine is invoked both * for packets that were successfully sent and for those that * failed (consult the descriptor for details). + * + * For A-MPDU frames, nframes and nbad indicate how many frames + * were in the aggregate, and how many failed. */ struct ath_buf; void ath_rate_tx_complete(struct ath_softc *, struct ath_node *, - const struct ath_buf *); + const struct ath_rc_series *, const struct ath_tx_status *, + int pktlen, int nframes, int nbad); #endif /* _ATH_RATECTRL_H_ */ |