summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoradrian <adrian@FreeBSD.org>2016-06-19 07:31:02 +0000
committeradrian <adrian@FreeBSD.org>2016-06-19 07:31:02 +0000
commit09a0af3095401ecbe16901b6657f4d8d900ac47c (patch)
tree0b768eec1cbe9bb8dae8a1f2e106f3d990493a6d
parentfa16c812f5b570ef8994dae9eace7bb20d997f87 (diff)
downloadFreeBSD-src-09a0af3095401ecbe16901b6657f4d8d900ac47c.zip
FreeBSD-src-09a0af3095401ecbe16901b6657f4d8d900ac47c.tar.gz
[net80211] remove node scan lock / generation number + fix few LORs
Drop scan generation number and node table scan lock - the only place where ni_scangen is checked is in ieee80211_timeout_stations() (and it is used to prevent duplicate checking of the same node); node scan lock protects only this variable + node table scan generation number. This will fix (at least) next LOR (hostap mode): lock order reversal: 1st 0xc175f84c urtwm0_scan_loc (urtwm0_scan_loc) @ /usr/src/sys/modules/wlan/../../net80211/ieee80211_node.c:2019 2nd 0xc175e018 urtwm0_com_lock (urtwm0_com_lock) @ /usr/src/sys/modules/wlan/../../net80211/ieee80211_node.c:2693 stack backtrace: #0 0xa070d1c5 at witness_debugger+0x75 #1 0xa070d0f6 at witness_checkorder+0xd46 #2 0xa0694cce at __mtx_lock_flags+0x9e #3 0xb03ad9ef at ieee80211_node_leave+0x12f #4 0xb03afd13 at ieee80211_timeout_stations+0x483 #5 0xb03aa1c2 at ieee80211_node_timeout+0x42 #6 0xa06c6fa1 at softclock_call_cc+0x1e1 #7 0xa06c7518 at softclock+0xc8 #8 0xa06789ae at intr_event_execute_handlers+0x8e #9 0xa0678fa0 at ithread_loop+0x90 #10 0xa0675fbe at fork_exit+0x7e #11 0xa08af910 at fork_trampoline+0x8 In addition to the above: * switch to ieee80211_iterate_nodes(); * do not assert that node table lock is held, while calling node_age(); that's not really needed (there are no resources, which can be protected by this lock) + this fixes LOR/deadlock between ieee80211_timeout_stations() and ieee80211_set_tim() (easy to reproduce in HOSTAP mode while sending something to an STA with enabled power management). Tested: * (avos) urtwn0, hostap mode * (adrian) AR9380, STA mode * (adrian) AR9380, AR9331, AR9580, hostap mode Notes: * This changes the net80211 internals, so you have to recompile all of it and the wifi drivers. Submitted by: avos Approved by: re (delphij) Differential Revision: https://reviews.freebsd.org/D6833
-rw-r--r--sys/net80211/ieee80211_ddb.c7
-rw-r--r--sys/net80211/ieee80211_freebsd.h22
-rw-r--r--sys/net80211/ieee80211_node.c269
-rw-r--r--sys/net80211/ieee80211_node.h3
-rw-r--r--sys/net80211/ieee80211_superg.c6
5 files changed, 123 insertions, 184 deletions
diff --git a/sys/net80211/ieee80211_ddb.c b/sys/net80211/ieee80211_ddb.c
index f758875..94fcf93 100644
--- a/sys/net80211/ieee80211_ddb.c
+++ b/sys/net80211/ieee80211_ddb.c
@@ -233,9 +233,8 @@ _db_show_sta(const struct ieee80211_node *ni)
db_printf("\tvap %p wdsvap %p ic %p table %p\n",
ni->ni_vap, ni->ni_wdsvap, ni->ni_ic, ni->ni_table);
db_printf("\tflags=%b\n", ni->ni_flags, IEEE80211_NODE_BITS);
- db_printf("\tscangen %u authmode %u ath_flags 0x%x ath_defkeyix %u\n",
- ni->ni_scangen, ni->ni_authmode,
- ni->ni_ath_flags, ni->ni_ath_defkeyix);
+ db_printf("\tauthmode %u ath_flags 0x%x ath_defkeyix %u\n",
+ ni->ni_authmode, ni->ni_ath_flags, ni->ni_ath_defkeyix);
db_printf("\tassocid 0x%x txpower %u vlan %u\n",
ni->ni_associd, ni->ni_txpower, ni->ni_vlan);
db_printf("\tjointime %d (%lu secs) challenge %p\n",
@@ -688,8 +687,6 @@ _db_show_node_table(const char *tag, const struct ieee80211_node_table *nt)
db_printf("%s%s@%p:\n", tag, nt->nt_name, nt);
db_printf("%s nodelock %p", tag, &nt->nt_nodelock);
db_printf(" inact_init %d", nt->nt_inact_init);
- db_printf(" scanlock %p", &nt->nt_scanlock);
- db_printf(" scangen %u\n", nt->nt_scangen);
db_printf("%s keyixmax %d keyixmap %p\n",
tag, nt->nt_keyixmax, nt->nt_keyixmap);
for (i = 0; i < nt->nt_keyixmax; i++) {
diff --git a/sys/net80211/ieee80211_freebsd.h b/sys/net80211/ieee80211_freebsd.h
index 26fba76..2cb3b43 100644
--- a/sys/net80211/ieee80211_freebsd.h
+++ b/sys/net80211/ieee80211_freebsd.h
@@ -107,28 +107,6 @@ typedef struct {
mtx_assert(IEEE80211_NODE_LOCK_OBJ(_nt), MA_OWNED)
/*
- * Node table iteration locking definitions; this protects the
- * scan generation # used to iterate over the station table
- * while grabbing+releasing the node lock.
- */
-typedef struct {
- char name[16]; /* e.g. "ath0_scan_lock" */
- struct mtx mtx;
-} ieee80211_scan_lock_t;
-#define IEEE80211_NODE_ITERATE_LOCK_INIT(_nt, _name) do { \
- ieee80211_scan_lock_t *sl = &(_nt)->nt_scanlock; \
- snprintf(sl->name, sizeof(sl->name), "%s_scan_lock", _name); \
- mtx_init(&sl->mtx, sl->name, NULL, MTX_DEF); \
-} while (0)
-#define IEEE80211_NODE_ITERATE_LOCK_OBJ(_nt) (&(_nt)->nt_scanlock.mtx)
-#define IEEE80211_NODE_ITERATE_LOCK_DESTROY(_nt) \
- mtx_destroy(IEEE80211_NODE_ITERATE_LOCK_OBJ(_nt))
-#define IEEE80211_NODE_ITERATE_LOCK(_nt) \
- mtx_lock(IEEE80211_NODE_ITERATE_LOCK_OBJ(_nt))
-#define IEEE80211_NODE_ITERATE_UNLOCK(_nt) \
- mtx_unlock(IEEE80211_NODE_ITERATE_LOCK_OBJ(_nt))
-
-/*
* Power-save queue definitions.
*/
typedef struct mtx ieee80211_psq_lock_t;
diff --git a/sys/net80211/ieee80211_node.c b/sys/net80211/ieee80211_node.c
index fd134e1..c5b5cc4 100644
--- a/sys/net80211/ieee80211_node.c
+++ b/sys/net80211/ieee80211_node.c
@@ -1099,8 +1099,6 @@ node_age(struct ieee80211_node *ni)
{
struct ieee80211vap *vap = ni->ni_vap;
- IEEE80211_NODE_LOCK_ASSERT(&vap->iv_ic->ic_sta);
-
/*
* Age frames on the power save queue.
*/
@@ -1922,10 +1920,8 @@ ieee80211_node_table_init(struct ieee80211com *ic,
nt->nt_ic = ic;
IEEE80211_NODE_LOCK_INIT(nt, ic->ic_name);
- IEEE80211_NODE_ITERATE_LOCK_INIT(nt, ic->ic_name);
TAILQ_INIT(&nt->nt_node);
nt->nt_name = name;
- nt->nt_scangen = 1;
nt->nt_inact_init = inact;
nt->nt_keyixmax = keyixmax;
if (nt->nt_keyixmax > 0) {
@@ -1994,159 +1990,137 @@ ieee80211_node_table_cleanup(struct ieee80211_node_table *nt)
IEEE80211_FREE(nt->nt_keyixmap, M_80211_NODE);
nt->nt_keyixmap = NULL;
}
- IEEE80211_NODE_ITERATE_LOCK_DESTROY(nt);
IEEE80211_NODE_LOCK_DESTROY(nt);
}
-/*
- * Timeout inactive stations and do related housekeeping.
- * Note that we cannot hold the node lock while sending a
- * frame as this would lead to a LOR. Instead we use a
- * generation number to mark nodes that we've scanned and
- * drop the lock and restart a scan if we have to time out
- * a node. Since we are single-threaded by virtue of
- * controlling the inactivity timer we can be sure this will
- * process each node only once.
- */
static void
-ieee80211_timeout_stations(struct ieee80211com *ic)
+timeout_stations(void *arg __unused, struct ieee80211_node *ni)
{
- struct ieee80211_node_table *nt = &ic->ic_sta;
- struct ieee80211vap *vap;
- struct ieee80211_node *ni;
- int gen = 0;
+ struct ieee80211com *ic = ni->ni_ic;
+ struct ieee80211vap *vap = ni->ni_vap;
- IEEE80211_NODE_ITERATE_LOCK(nt);
- gen = ++nt->nt_scangen;
-restart:
- IEEE80211_NODE_LOCK(nt);
- TAILQ_FOREACH(ni, &nt->nt_node, ni_list) {
- if (ni->ni_scangen == gen) /* previously handled */
- continue;
- ni->ni_scangen = gen;
- /*
- * Ignore entries for which have yet to receive an
- * authentication frame. These are transient and
- * will be reclaimed when the last reference to them
- * goes away (when frame xmits complete).
- */
- vap = ni->ni_vap;
- /*
- * Only process stations when in RUN state. This
- * insures, for example, that we don't timeout an
- * inactive station during CAC. Note that CSA state
- * is actually handled in ieee80211_node_timeout as
- * it applies to more than timeout processing.
- */
- if (vap->iv_state != IEEE80211_S_RUN)
- continue;
- /* XXX can vap be NULL? */
- if ((vap->iv_opmode == IEEE80211_M_HOSTAP ||
- vap->iv_opmode == IEEE80211_M_STA) &&
- (ni->ni_flags & IEEE80211_NODE_AREF) == 0)
- continue;
+ /*
+ * Only process stations when in RUN state. This
+ * insures, for example, that we don't timeout an
+ * inactive station during CAC. Note that CSA state
+ * is actually handled in ieee80211_node_timeout as
+ * it applies to more than timeout processing.
+ */
+ if (vap->iv_state != IEEE80211_S_RUN)
+ return;
+ /*
+ * Ignore entries for which have yet to receive an
+ * authentication frame. These are transient and
+ * will be reclaimed when the last reference to them
+ * goes away (when frame xmits complete).
+ */
+ if ((vap->iv_opmode == IEEE80211_M_HOSTAP ||
+ vap->iv_opmode == IEEE80211_M_STA) &&
+ (ni->ni_flags & IEEE80211_NODE_AREF) == 0)
+ return;
+ /*
+ * Free fragment if not needed anymore
+ * (last fragment older than 1s).
+ * XXX doesn't belong here, move to node_age
+ */
+ if (ni->ni_rxfrag[0] != NULL &&
+ ticks > ni->ni_rxfragstamp + hz) {
+ m_freem(ni->ni_rxfrag[0]);
+ ni->ni_rxfrag[0] = NULL;
+ }
+ if (ni->ni_inact > 0) {
+ ni->ni_inact--;
+ IEEE80211_NOTE(vap, IEEE80211_MSG_INACT, ni,
+ "%s: inact %u inact_reload %u nrates %u",
+ __func__, ni->ni_inact, ni->ni_inact_reload,
+ ni->ni_rates.rs_nrates);
+ }
+ /*
+ * Special case ourself; we may be idle for extended periods
+ * of time and regardless reclaiming our state is wrong.
+ * XXX run ic_node_age
+ */
+ /* XXX before inact decrement? */
+ if (ni == vap->iv_bss)
+ return;
+ if (ni->ni_associd != 0 ||
+ (vap->iv_opmode == IEEE80211_M_IBSS ||
+ vap->iv_opmode == IEEE80211_M_AHDEMO)) {
/*
- * Free fragment if not needed anymore
- * (last fragment older than 1s).
- * XXX doesn't belong here, move to node_age
+ * Age/drain resources held by the station.
*/
- if (ni->ni_rxfrag[0] != NULL &&
- ticks > ni->ni_rxfragstamp + hz) {
- m_freem(ni->ni_rxfrag[0]);
- ni->ni_rxfrag[0] = NULL;
- }
- if (ni->ni_inact > 0) {
- ni->ni_inact--;
- IEEE80211_NOTE(vap, IEEE80211_MSG_INACT, ni,
- "%s: inact %u inact_reload %u nrates %u",
- __func__, ni->ni_inact, ni->ni_inact_reload,
- ni->ni_rates.rs_nrates);
- }
+ ic->ic_node_age(ni);
/*
- * Special case ourself; we may be idle for extended periods
- * of time and regardless reclaiming our state is wrong.
- * XXX run ic_node_age
+ * Probe the station before time it out. We
+ * send a null data frame which may not be
+ * universally supported by drivers (need it
+ * for ps-poll support so it should be...).
+ *
+ * XXX don't probe the station unless we've
+ * received a frame from them (and have
+ * some idea of the rates they are capable
+ * of); this will get fixed more properly
+ * soon with better handling of the rate set.
*/
- if (ni == vap->iv_bss)
- continue;
- if (ni->ni_associd != 0 ||
- (vap->iv_opmode == IEEE80211_M_IBSS ||
- vap->iv_opmode == IEEE80211_M_AHDEMO)) {
- /*
- * Age/drain resources held by the station.
- */
- ic->ic_node_age(ni);
- /*
- * Probe the station before time it out. We
- * send a null data frame which may not be
- * universally supported by drivers (need it
- * for ps-poll support so it should be...).
- *
- * XXX don't probe the station unless we've
- * received a frame from them (and have
- * some idea of the rates they are capable
- * of); this will get fixed more properly
- * soon with better handling of the rate set.
- */
- if ((vap->iv_flags_ext & IEEE80211_FEXT_INACT) &&
- (0 < ni->ni_inact &&
- ni->ni_inact <= vap->iv_inact_probe) &&
- ni->ni_rates.rs_nrates != 0) {
- IEEE80211_NOTE(vap,
- IEEE80211_MSG_INACT | IEEE80211_MSG_NODE,
- ni, "%s",
- "probe station due to inactivity");
- /*
- * Grab a reference before unlocking the table
- * so the node cannot be reclaimed before we
- * send the frame. ieee80211_send_nulldata
- * understands we've done this and reclaims the
- * ref for us as needed.
- */
- ieee80211_ref_node(ni);
- IEEE80211_NODE_UNLOCK(nt);
- ieee80211_send_nulldata(ni);
- /* XXX stat? */
- goto restart;
- }
- }
if ((vap->iv_flags_ext & IEEE80211_FEXT_INACT) &&
- ni->ni_inact <= 0) {
+ (0 < ni->ni_inact &&
+ ni->ni_inact <= vap->iv_inact_probe) &&
+ ni->ni_rates.rs_nrates != 0) {
IEEE80211_NOTE(vap,
- IEEE80211_MSG_INACT | IEEE80211_MSG_NODE, ni,
- "station timed out due to inactivity "
- "(refcnt %u)", ieee80211_node_refcnt(ni));
+ IEEE80211_MSG_INACT | IEEE80211_MSG_NODE,
+ ni, "%s",
+ "probe station due to inactivity");
/*
- * Send a deauthenticate frame and drop the station.
- * This is somewhat complicated due to reference counts
- * and locking. At this point a station will typically
- * have a reference count of 1. ieee80211_node_leave
- * will do a "free" of the node which will drop the
- * reference count. But in the meantime a reference
- * wil be held by the deauth frame. The actual reclaim
- * of the node will happen either after the tx is
- * completed or by ieee80211_node_leave.
- *
- * Separately we must drop the node lock before sending
- * in case the driver takes a lock, as this can result
- * in a LOR between the node lock and the driver lock.
+ * Grab a reference so the node cannot
+ * be reclaimed before we send the frame.
+ * ieee80211_send_nulldata understands
+ * we've done this and reclaims the
+ * ref for us as needed.
*/
+ /* XXX fix this (not required anymore). */
ieee80211_ref_node(ni);
- IEEE80211_NODE_UNLOCK(nt);
- if (ni->ni_associd != 0) {
- IEEE80211_SEND_MGMT(ni,
- IEEE80211_FC0_SUBTYPE_DEAUTH,
- IEEE80211_REASON_AUTH_EXPIRE);
- }
- ieee80211_node_leave(ni);
- ieee80211_free_node(ni);
- vap->iv_stats.is_node_timeout++;
- goto restart;
+ /* XXX useless */
+ ieee80211_send_nulldata(ni);
+ /* XXX stat? */
+ return;
}
}
- IEEE80211_NODE_UNLOCK(nt);
+ if ((vap->iv_flags_ext & IEEE80211_FEXT_INACT) &&
+ ni->ni_inact <= 0) {
+ IEEE80211_NOTE(vap,
+ IEEE80211_MSG_INACT | IEEE80211_MSG_NODE, ni,
+ "station timed out due to inactivity "
+ "(refcnt %u)", ieee80211_node_refcnt(ni));
+ /*
+ * Send a deauthenticate frame and drop the station.
+ * This is somewhat complicated due to reference counts
+ * and locking. At this point a station will typically
+ * have a reference count of 2. ieee80211_node_leave
+ * will do a "free" of the node which will drop the
+ * reference count. But in the meantime a reference
+ * wil be held by the deauth frame. The actual reclaim
+ * of the node will happen either after the tx is
+ * completed or by ieee80211_node_leave.
+ */
+ if (ni->ni_associd != 0) {
+ IEEE80211_SEND_MGMT(ni,
+ IEEE80211_FC0_SUBTYPE_DEAUTH,
+ IEEE80211_REASON_AUTH_EXPIRE);
+ }
+ ieee80211_node_leave(ni);
+ vap->iv_stats.is_node_timeout++;
+ }
+}
+
+/*
+ * Timeout inactive stations and do related housekeeping.
+ */
+static void
+ieee80211_timeout_stations(struct ieee80211com *ic)
+{
+ struct ieee80211_node_table *nt = &ic->ic_sta;
- IEEE80211_NODE_ITERATE_UNLOCK(nt);
+ ieee80211_iterate_nodes(nt, timeout_stations, NULL);
}
/*
@@ -2247,23 +2221,12 @@ int
ieee80211_iterate_nt(struct ieee80211_node_table *nt,
struct ieee80211_node **ni_arr, uint16_t max_aid)
{
- u_int gen;
int i, j, ret;
struct ieee80211_node *ni;
- IEEE80211_NODE_ITERATE_LOCK(nt);
IEEE80211_NODE_LOCK(nt);
- gen = ++nt->nt_scangen;
i = ret = 0;
-
- /*
- * We simply assume here that since the node
- * scan generation doesn't change (as
- * we are holding both the node table and
- * node table iteration locks), we can simply
- * assign it to the node here.
- */
TAILQ_FOREACH(ni, &nt->nt_node, ni_list) {
if (i >= max_aid) {
ret = E2BIG;
@@ -2272,7 +2235,6 @@ ieee80211_iterate_nt(struct ieee80211_node_table *nt,
break;
}
ni_arr[i] = ieee80211_ref_node(ni);
- ni_arr[i]->ni_scangen = gen;
i++;
}
@@ -2287,7 +2249,6 @@ ieee80211_iterate_nt(struct ieee80211_node_table *nt,
* ieee80211_free_node().
*/
IEEE80211_NODE_UNLOCK(nt);
- IEEE80211_NODE_ITERATE_UNLOCK(nt);
/*
* If ret is non-zero, we hit some kind of error.
@@ -2362,8 +2323,8 @@ ieee80211_dump_node(struct ieee80211_node_table *nt, struct ieee80211_node *ni)
{
printf("0x%p: mac %s refcnt %d\n", ni,
ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni));
- printf("\tscangen %u authmode %u flags 0x%x\n",
- ni->ni_scangen, ni->ni_authmode, ni->ni_flags);
+ printf("\tauthmode %u flags 0x%x\n",
+ ni->ni_authmode, ni->ni_flags);
printf("\tassocid 0x%x txpower %u vlan %u\n",
ni->ni_associd, ni->ni_txpower, ni->ni_vlan);
printf("\ttxseq %u rxseq %u fragno %u rxfragstamp %u\n",
diff --git a/sys/net80211/ieee80211_node.h b/sys/net80211/ieee80211_node.h
index 9492fc0..7df0053 100644
--- a/sys/net80211/ieee80211_node.h
+++ b/sys/net80211/ieee80211_node.h
@@ -115,7 +115,6 @@ struct ieee80211_node {
TAILQ_ENTRY(ieee80211_node) ni_list; /* list of all nodes */
LIST_ENTRY(ieee80211_node) ni_hash; /* hash collision list */
u_int ni_refcnt; /* count of held references */
- u_int ni_scangen; /* gen# for timeout scan */
u_int ni_flags;
#define IEEE80211_NODE_AUTH 0x000001 /* authorized for data */
#define IEEE80211_NODE_QOS 0x000002 /* QoS enabled */
@@ -360,8 +359,6 @@ struct ieee80211_node_table {
struct ieee80211_node **nt_keyixmap; /* key ix -> node map */
int nt_keyixmax; /* keyixmap size */
const char *nt_name; /* table name for debug msgs */
- ieee80211_scan_lock_t nt_scanlock; /* on nt_scangen */
- u_int nt_scangen; /* gen# for iterators */
int nt_inact_init; /* initial node inact setting */
};
diff --git a/sys/net80211/ieee80211_superg.c b/sys/net80211/ieee80211_superg.c
index 5565e5e..d5d1c18 100644
--- a/sys/net80211/ieee80211_superg.c
+++ b/sys/net80211/ieee80211_superg.c
@@ -912,6 +912,12 @@ ieee80211_ff_node_init(struct ieee80211_node *ni)
ieee80211_ff_node_cleanup(ni);
}
+/*
+ * Note: this comlock acquisition LORs with the node lock:
+ *
+ * 1: sta_join1 -> NODE_LOCK -> node_free -> node_cleanup -> ff_node_cleanup -> COM_LOCK
+ * 2: TBD
+ */
void
ieee80211_ff_node_cleanup(struct ieee80211_node *ni)
{
OpenPOWER on IntegriCloud