MFC

author: attilio <attilio@FreeBSD.org> 2013-02-27 18:17:34 +0000
committer: attilio <attilio@FreeBSD.org> 2013-02-27 18:17:34 +0000
commit: 52c57fbbdb554a7ce0cdbb6bf27051ef70834bdf (patch)
tree: d0908474209a17865e044675940a2f62f9ff2493
parent: c74a3afc6a5d7d1ced989c36d4ba0a7d2bbc43b9 (diff)
download: FreeBSD-src-52c57fbbdb554a7ce0cdbb6bf27051ef70834bdf.zip
FreeBSD-src-52c57fbbdb554a7ce0cdbb6bf27051ef70834bdf.tar.gz
30 files changed, 1075 insertions, 580 deletions
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.c b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
index bbaefaf..0238c65 100644
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb.c
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
@@ -545,7 +545,7 @@ static void
 dump_metaslab_stats(metaslab_t *msp)
 {
 	char maxbuf[32];
-	space_map_t *sm = &msp->ms_map;
+	space_map_t *sm = msp->ms_map;
 	avl_tree_t *t = sm->sm_pp_root;
 	int free_pct = sm->sm_space * 100 / sm->sm_size;
 
@@ -561,7 +561,7 @@ dump_metaslab(metaslab_t *msp)
 {
 	vdev_t *vd = msp->ms_group->mg_vd;
 	spa_t *spa = vd->vdev_spa;
-	space_map_t *sm = &msp->ms_map;
+	space_map_t *sm = msp->ms_map;
 	space_map_obj_t *smo = &msp->ms_smo;
 	char freebuf[32];
 
@@ -2160,11 +2160,11 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
 			for (int m = 0; m < vd->vdev_ms_count; m++) {
 				metaslab_t *msp = vd->vdev_ms[m];
 				mutex_enter(&msp->ms_lock);
-				space_map_unload(&msp->ms_map);
-				VERIFY(space_map_load(&msp->ms_map,
+				space_map_unload(msp->ms_map);
+				VERIFY(space_map_load(msp->ms_map,
 				    &zdb_space_map_ops, SM_ALLOC, &msp->ms_smo,
 				    spa->spa_meta_objset) == 0);
-				msp->ms_map.sm_ppd = vd;
+				msp->ms_map->sm_ppd = vd;
 				mutex_exit(&msp->ms_lock);
 			}
 		}
@@ -2187,7 +2187,7 @@ zdb_leak_fini(spa_t *spa)
 			for (int m = 0; m < vd->vdev_ms_count; m++) {
 				metaslab_t *msp = vd->vdev_ms[m];
 				mutex_enter(&msp->ms_lock);
-				space_map_unload(&msp->ms_map);
+				space_map_unload(msp->ms_map);
 				mutex_exit(&msp->ms_lock);
 			}
 		}
diff --git a/contrib/binutils/gas/config/tc-arm.c b/contrib/binutils/gas/config/tc-arm.c
index 0b18b5b..daaae00 100644
--- a/contrib/binutils/gas/config/tc-arm.c
+++ b/contrib/binutils/gas/config/tc-arm.c
@@ -3079,6 +3079,7 @@ s_arm_unwind_fnend (int ignored ATTRIBUTE_UNUSED)
   record_alignment (now_seg, 2);
 
   ptr = frag_more (8);
+  memset(ptr, 0, 8);
   where = frag_now_fix () - 8;
 
   /* Self relative offset of the function start.  */
@@ -17350,6 +17351,7 @@ create_unwind_entry (int have_data)
 
   /* Allocate the table entry.	*/
   ptr = frag_more ((size << 2) + 4);
+  memset(ptr, 0, (size << 2) + 4);
   where = frag_now_fix () - ((size << 2) + 4);
 
   switch (unwind.personality_index)
diff --git a/libexec/rtld-elf/map_object.c b/libexec/rtld-elf/map_object.c
index 79e4132..0f75cca 100644
--- a/libexec/rtld-elf/map_object.c
+++ b/libexec/rtld-elf/map_object.c
@@ -175,7 +175,7 @@ map_object(int fd, const char *path, const struct stat *sb)
     base_vaddr = trunc_page(segs[0]->p_vaddr);
     base_vlimit = round_page(segs[nsegs]->p_vaddr + segs[nsegs]->p_memsz);
     mapsize = base_vlimit - base_vaddr;
-    base_addr = hdr->e_type == ET_EXEC ? (caddr_t) base_vaddr : NULL;
+    base_addr = (caddr_t) base_vaddr;
 
     mapbase = mmap(base_addr, mapsize, PROT_NONE, MAP_ANON | MAP_PRIVATE |
       MAP_NOCORE, -1, 0);
diff --git a/sbin/geom/class/part/gpart.8 b/sbin/geom/class/part/gpart.8
index 8843e53..ef83daa 100644
--- a/sbin/geom/class/part/gpart.8
+++ b/sbin/geom/class/part/gpart.8
@@ -583,7 +583,7 @@ The system partition for computers that use the Extensible Firmware
 Interface (EFI).
 In such cases, the GPT partitioning scheme is used and the
 actual partition type for the system partition can also be specified as
-.Qq Li "!c12a7328-f81f-11d2-ba4b-00a0c93ec93ab" .
+.Qq Li "!c12a7328-f81f-11d2-ba4b-00a0c93ec93b" .
 .It Cm freebsd
 A
 .Fx
diff --git a/sbin/tunefs/tunefs.c b/sbin/tunefs/tunefs.c
index 688952f..39e08f7 100644
--- a/sbin/tunefs/tunefs.c
+++ b/sbin/tunefs/tunefs.c
@@ -671,7 +671,7 @@ journal_findfile(void)
 				return (ino);
 		}
 	} else {
-		if ((off_t)dp1->di_size >= lblktosize(&sblock, NDADDR)) {
+		if ((off_t)dp2->di_size >= lblktosize(&sblock, NDADDR)) {
 			warnx("ROOTINO extends beyond direct blocks.");
 			return (-1);
 		}
diff --git a/sys/arm/ti/ti_gpio.c b/sys/arm/ti/ti_gpio.c
index 58de516..4edb10e 100644
--- a/sys/arm/ti/ti_gpio.c
+++ b/sys/arm/ti/ti_gpio.c
@@ -653,6 +653,9 @@ ti_gpio_attach(device_t dev)
 	struct ti_gpio_softc *sc = device_get_softc(dev);
 	unsigned int i;
 	int err = 0;
+	int pin;
+	uint32_t flags;
+	uint32_t reg_oe;
 
 	sc->sc_dev = dev;
 
@@ -720,6 +723,17 @@ ti_gpio_attach(device_t dev)
 			/* Disable interrupts for all pins */
 			ti_gpio_write_4(sc, i, TI_GPIO_CLEARIRQENABLE1, 0xffffffff);
 			ti_gpio_write_4(sc, i, TI_GPIO_CLEARIRQENABLE2, 0xffffffff);
+
+			/* Init OE register based on pads configuration */
+			reg_oe = 0xffffffff;
+			for (pin = 0; pin < 32; pin++) {
+				ti_scm_padconf_get_gpioflags(
+				    PINS_PER_BANK*i + pin, &flags);
+				if (flags & GPIO_PIN_OUTPUT)
+					reg_oe &= ~(1U << pin);
+			}
+
+			ti_gpio_write_4(sc, i, TI_GPIO_OE, reg_oe);
 		}
 	}
 
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
index e81dc02..d6651f9 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
@@ -48,6 +48,14 @@ uint64_t metaslab_aliquot = 512ULL << 10;
 uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1;	/* force gang blocks */
 
 /*
+ * The in-core space map representation is more compact than its on-disk form.
+ * The zfs_condense_pct determines how much more compact the in-core
+ * space_map representation must be before we compact it on-disk.
+ * Values should be greater than or equal to 100.
+ */
+int zfs_condense_pct = 200;
+
+/*
  * This value defines the number of allowed allocation failures per vdev.
  * If a device reaches this threshold in a given txg then we consider skipping
  * allocations on that device.
@@ -215,9 +223,9 @@ metaslab_compare(const void *x1, const void *x2)
 	/*
 	 * If the weights are identical, use the offset to force uniqueness.
 	 */
-	if (m1->ms_map.sm_start < m2->ms_map.sm_start)
+	if (m1->ms_map->sm_start < m2->ms_map->sm_start)
 		return (-1);
-	if (m1->ms_map.sm_start > m2->ms_map.sm_start)
+	if (m1->ms_map->sm_start > m2->ms_map->sm_start)
 		return (1);
 
 	ASSERT3P(m1, ==, m2);
@@ -732,14 +740,15 @@ metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
 	 * addition of new space; and for debugging, it ensures that we'd
 	 * data fault on any attempt to use this metaslab before it's ready.
 	 */
-	space_map_create(&msp->ms_map, start, size,
+	msp->ms_map = kmem_zalloc(sizeof (space_map_t), KM_SLEEP);
+	space_map_create(msp->ms_map, start, size,
 	    vd->vdev_ashift, &msp->ms_lock);
 
 	metaslab_group_add(mg, msp);
 
 	if (metaslab_debug && smo->smo_object != 0) {
 		mutex_enter(&msp->ms_lock);
-		VERIFY(space_map_load(&msp->ms_map, mg->mg_class->mc_ops,
+		VERIFY(space_map_load(msp->ms_map, mg->mg_class->mc_ops,
 		    SM_FREE, smo, spa_meta_objset(vd->vdev_spa)) == 0);
 		mutex_exit(&msp->ms_lock);
 	}
@@ -767,22 +776,27 @@ metaslab_fini(metaslab_t *msp)
 	metaslab_group_t *mg = msp->ms_group;
 
 	vdev_space_update(mg->mg_vd,
-	    -msp->ms_smo.smo_alloc, 0, -msp->ms_map.sm_size);
+	    -msp->ms_smo.smo_alloc, 0, -msp->ms_map->sm_size);
 
 	metaslab_group_remove(mg, msp);
 
 	mutex_enter(&msp->ms_lock);
 
-	space_map_unload(&msp->ms_map);
-	space_map_destroy(&msp->ms_map);
+	space_map_unload(msp->ms_map);
+	space_map_destroy(msp->ms_map);
+	kmem_free(msp->ms_map, sizeof (*msp->ms_map));
 
 	for (int t = 0; t < TXG_SIZE; t++) {
-		space_map_destroy(&msp->ms_allocmap[t]);
-		space_map_destroy(&msp->ms_freemap[t]);
+		space_map_destroy(msp->ms_allocmap[t]);
+		space_map_destroy(msp->ms_freemap[t]);
+		kmem_free(msp->ms_allocmap[t], sizeof (*msp->ms_allocmap[t]));
+		kmem_free(msp->ms_freemap[t], sizeof (*msp->ms_freemap[t]));
 	}
 
-	for (int t = 0; t < TXG_DEFER_SIZE; t++)
-		space_map_destroy(&msp->ms_defermap[t]);
+	for (int t = 0; t < TXG_DEFER_SIZE; t++) {
+		space_map_destroy(msp->ms_defermap[t]);
+		kmem_free(msp->ms_defermap[t], sizeof (*msp->ms_defermap[t]));
+	}
 
 	ASSERT0(msp->ms_deferspace);
 
@@ -801,7 +815,7 @@ static uint64_t
 metaslab_weight(metaslab_t *msp)
 {
 	metaslab_group_t *mg = msp->ms_group;
-	space_map_t *sm = &msp->ms_map;
+	space_map_t *sm = msp->ms_map;
 	space_map_obj_t *smo = &msp->ms_smo;
 	vdev_t *vd = mg->mg_vd;
 	uint64_t weight, space;
@@ -809,6 +823,16 @@ metaslab_weight(metaslab_t *msp)
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
 
 	/*
+	 * This vdev is in the process of being removed so there is nothing
+	 * for us to do here.
+	 */
+	if (vd->vdev_removing) {
+		ASSERT0(smo->smo_alloc);
+		ASSERT0(vd->vdev_ms_shift);
+		return (0);
+	}
+
+	/*
 	 * The baseline weight is the metaslab's free space.
 	 */
 	space = sm->sm_size - smo->smo_alloc;
@@ -861,7 +885,7 @@ metaslab_prefetch(metaslab_group_t *mg)
 	 * Prefetch the next potential metaslabs
 	 */
 	for (msp = avl_first(t), m = 0; msp; msp = AVL_NEXT(t, msp), m++) {
-		space_map_t *sm = &msp->ms_map;
+		space_map_t *sm = msp->ms_map;
 		space_map_obj_t *smo = &msp->ms_smo;
 
 		/* If we have reached our prefetch limit then we're done */
@@ -882,7 +906,7 @@ static int
 metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
 {
 	metaslab_group_t *mg = msp->ms_group;
-	space_map_t *sm = &msp->ms_map;
+	space_map_t *sm = msp->ms_map;
 	space_map_ops_t *sm_ops = msp->ms_group->mg_class->mc_ops;
 
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
@@ -899,7 +923,7 @@ metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
 				return (error);
 			}
 			for (int t = 0; t < TXG_DEFER_SIZE; t++)
-				space_map_walk(&msp->ms_defermap[t],
+				space_map_walk(msp->ms_defermap[t],
 				    space_map_claim, sm);
 
 		}
@@ -930,12 +954,158 @@ metaslab_passivate(metaslab_t *msp, uint64_t size)
 	 * this metaslab again.  In that case, it had better be empty,
 	 * or we would be leaving space on the table.
 	 */
-	ASSERT(size >= SPA_MINBLOCKSIZE || msp->ms_map.sm_space == 0);
+	ASSERT(size >= SPA_MINBLOCKSIZE || msp->ms_map->sm_space == 0);
 	metaslab_group_sort(msp->ms_group, msp, MIN(msp->ms_weight, size));
 	ASSERT((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0);
 }
 
 /*
+ * Determine if the in-core space map representation can be condensed on-disk.
+ * We would like to use the following criteria to make our decision:
+ *
+ * 1. The size of the space map object should not dramatically increase as a
+ * result of writing out our in-core free map.
+ *
+ * 2. The minimal on-disk space map representation is zfs_condense_pct/100
+ * times the size than the in-core representation (i.e. zfs_condense_pct = 110
+ * and in-core = 1MB, minimal = 1.1.MB).
+ *
+ * Checking the first condition is tricky since we don't want to walk
+ * the entire AVL tree calculating the estimated on-disk size. Instead we
+ * use the size-ordered AVL tree in the space map and calculate the
+ * size required for the largest segment in our in-core free map. If the
+ * size required to represent that segment on disk is larger than the space
+ * map object then we avoid condensing this map.
+ *
+ * To determine the second criterion we use a best-case estimate and assume
+ * each segment can be represented on-disk as a single 64-bit entry. We refer
+ * to this best-case estimate as the space map's minimal form.
+ */
+static boolean_t
+metaslab_should_condense(metaslab_t *msp)
+{
+	space_map_t *sm = msp->ms_map;
+	space_map_obj_t *smo = &msp->ms_smo_syncing;
+	space_seg_t *ss;
+	uint64_t size, entries, segsz;
+
+	ASSERT(MUTEX_HELD(&msp->ms_lock));
+	ASSERT(sm->sm_loaded);
+
+	/*
+	 * Use the sm_pp_root AVL tree, which is ordered by size, to obtain
+	 * the largest segment in the in-core free map. If the tree is
+	 * empty then we should condense the map.
+	 */
+	ss = avl_last(sm->sm_pp_root);
+	if (ss == NULL)
+		return (B_TRUE);
+
+	/*
+	 * Calculate the number of 64-bit entries this segment would
+	 * require when written to disk. If this single segment would be
+	 * larger on-disk than the entire current on-disk structure, then
+	 * clearly condensing will increase the on-disk structure size.
+	 */
+	size = (ss->ss_end - ss->ss_start) >> sm->sm_shift;
+	entries = size / (MIN(size, SM_RUN_MAX));
+	segsz = entries * sizeof (uint64_t);
+
+	return (segsz <= smo->smo_objsize &&
+	    smo->smo_objsize >= (zfs_condense_pct *
+	    sizeof (uint64_t) * avl_numnodes(&sm->sm_root)) / 100);
+}
+
+/*
+ * Condense the on-disk space map representation to its minimized form.
+ * The minimized form consists of a small number of allocations followed by
+ * the in-core free map.
+ */
+static void
+metaslab_condense(metaslab_t *msp, uint64_t txg, dmu_tx_t *tx)
+{
+	spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+	space_map_t *freemap = msp->ms_freemap[txg & TXG_MASK];
+	space_map_t condense_map;
+	space_map_t *sm = msp->ms_map;
+	objset_t *mos = spa_meta_objset(spa);
+	space_map_obj_t *smo = &msp->ms_smo_syncing;
+
+	ASSERT(MUTEX_HELD(&msp->ms_lock));
+	ASSERT3U(spa_sync_pass(spa), ==, 1);
+	ASSERT(sm->sm_loaded);
+
+	spa_dbgmsg(spa, "condensing: txg %llu, msp[%llu] %p, "
+	    "smo size %llu, segments %lu", txg,
+	    (msp->ms_map->sm_start / msp->ms_map->sm_size), msp,
+	    smo->smo_objsize, avl_numnodes(&sm->sm_root));
+
+	/*
+	 * Create an map that is a 100% allocated map. We remove segments
+	 * that have been freed in this txg, any deferred frees that exist,
+	 * and any allocation in the future. Removing segments should be
+	 * a relatively inexpensive operation since we expect these maps to
+	 * a small number of nodes.
+	 */
+	space_map_create(&condense_map, sm->sm_start, sm->sm_size,
+	    sm->sm_shift, sm->sm_lock);
+	space_map_add(&condense_map, condense_map.sm_start,
+	    condense_map.sm_size);
+
+	/*
+	 * Remove what's been freed in this txg from the condense_map.
+	 * Since we're in sync_pass 1, we know that all the frees from
+	 * this txg are in the freemap.
+	 */
+	space_map_walk(freemap, space_map_remove, &condense_map);
+
+	for (int t = 0; t < TXG_DEFER_SIZE; t++)
+		space_map_walk(msp->ms_defermap[t],
+		    space_map_remove, &condense_map);
+
+	for (int t = 1; t < TXG_CONCURRENT_STATES; t++)
+		space_map_walk(msp->ms_allocmap[(txg + t) & TXG_MASK],
+		    space_map_remove, &condense_map);
+
+	/*
+	 * We're about to drop the metaslab's lock thus allowing
+	 * other consumers to change it's content. Set the
+	 * space_map's sm_condensing flag to ensure that
+	 * allocations on this metaslab do not occur while we're
+	 * in the middle of committing it to disk. This is only critical
+	 * for the ms_map as all other space_maps use per txg
+	 * views of their content.
+	 */
+	sm->sm_condensing = B_TRUE;
+
+	mutex_exit(&msp->ms_lock);
+	space_map_truncate(smo, mos, tx);
+	mutex_enter(&msp->ms_lock);
+
+	/*
+	 * While we would ideally like to create a space_map representation
+	 * that consists only of allocation records, doing so can be
+	 * prohibitively expensive because the in-core free map can be
+	 * large, and therefore computationally expensive to subtract
+	 * from the condense_map. Instead we sync out two maps, a cheap
+	 * allocation only map followed by the in-core free map. While not
+	 * optimal, this is typically close to optimal, and much cheaper to
+	 * compute.
+	 */
+	space_map_sync(&condense_map, SM_ALLOC, smo, mos, tx);
+	space_map_vacate(&condense_map, NULL, NULL);
+	space_map_destroy(&condense_map);
+
+	space_map_sync(sm, SM_FREE, smo, mos, tx);
+	sm->sm_condensing = B_FALSE;
+
+	spa_dbgmsg(spa, "condensed: txg %llu, msp[%llu] %p, "
+	    "smo size %llu", txg,
+	    (msp->ms_map->sm_start / msp->ms_map->sm_size), msp,
+	    smo->smo_objsize);
+}
+
+/*
  * Write a metaslab to disk in the context of the specified transaction group.
  */
 void
@@ -944,17 +1114,29 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
 	vdev_t *vd = msp->ms_group->mg_vd;
 	spa_t *spa = vd->vdev_spa;
 	objset_t *mos = spa_meta_objset(spa);
-	space_map_t *allocmap = &msp->ms_allocmap[txg & TXG_MASK];
-	space_map_t *freemap = &msp->ms_freemap[txg & TXG_MASK];
-	space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
-	space_map_t *sm = &msp->ms_map;
+	space_map_t *allocmap = msp->ms_allocmap[txg & TXG_MASK];
+	space_map_t **freemap = &msp->ms_freemap[txg & TXG_MASK];
+	space_map_t **freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
+	space_map_t *sm = msp->ms_map;
 	space_map_obj_t *smo = &msp->ms_smo_syncing;
 	dmu_buf_t *db;
 	dmu_tx_t *tx;
 
 	ASSERT(!vd->vdev_ishole);
 
-	if (allocmap->sm_space == 0 && freemap->sm_space == 0)
+	/*
+	 * This metaslab has just been added so there's no work to do now.
+	 */
+	if (*freemap == NULL) {
+		ASSERT3P(allocmap, ==, NULL);
+		return;
+	}
+
+	ASSERT3P(allocmap, !=, NULL);
+	ASSERT3P(*freemap, !=, NULL);
+	ASSERT3P(*freed_map, !=, NULL);
+
+	if (allocmap->sm_space == 0 && (*freemap)->sm_space == 0)
 		return;
 
 	/*
@@ -982,49 +1164,36 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
 
 	mutex_enter(&msp->ms_lock);
 
-	space_map_walk(freemap, space_map_add, freed_map);
-
-	if (sm->sm_loaded && spa_sync_pass(spa) == 1 && smo->smo_objsize >=
-	    2 * sizeof (uint64_t) * avl_numnodes(&sm->sm_root)) {
-		/*
-		 * The in-core space map representation is twice as compact
-		 * as the on-disk one, so it's time to condense the latter
-		 * by generating a pure allocmap from first principles.
-		 *
-		 * This metaslab is 100% allocated,
-		 * minus the content of the in-core map (sm),
-		 * minus what's been freed this txg (freed_map),
-		 * minus deferred frees (ms_defermap[]),
-		 * minus allocations from txgs in the future
-		 * (because they haven't been committed yet).
-		 */
-		space_map_vacate(allocmap, NULL, NULL);
-		space_map_vacate(freemap, NULL, NULL);
-
-		space_map_add(allocmap, allocmap->sm_start, allocmap->sm_size);
-
-		space_map_walk(sm, space_map_remove, allocmap);
-		space_map_walk(freed_map, space_map_remove, allocmap);
+	if (sm->sm_loaded && spa_sync_pass(spa) == 1 &&
+	    metaslab_should_condense(msp)) {
+		metaslab_condense(msp, txg, tx);
+	} else {
+		space_map_sync(allocmap, SM_ALLOC, smo, mos, tx);
+		space_map_sync(*freemap, SM_FREE, smo, mos, tx);
+	}
 
-		for (int t = 0; t < TXG_DEFER_SIZE; t++)
-			space_map_walk(&msp->ms_defermap[t],
-			    space_map_remove, allocmap);
+	space_map_vacate(allocmap, NULL, NULL);
 
-		for (int t = 1; t < TXG_CONCURRENT_STATES; t++)
-			space_map_walk(&msp->ms_allocmap[(txg + t) & TXG_MASK],
-			    space_map_remove, allocmap);
-
-		mutex_exit(&msp->ms_lock);
-		space_map_truncate(smo, mos, tx);
-		mutex_enter(&msp->ms_lock);
+	/*
+	 * For sync pass 1, we avoid walking the entire space map and
+	 * instead will just swap the pointers for freemap and
+	 * freed_map. We can safely do this since the freed_map is
+	 * guaranteed to be empty on the initial pass.
+	 */
+	if (spa_sync_pass(spa) == 1) {
+		ASSERT0((*freed_map)->sm_space);
+		ASSERT0(avl_numnodes(&(*freed_map)->sm_root));
+		space_map_swap(freemap, freed_map);
+	} else {
+		space_map_vacate(*freemap, space_map_add, *freed_map);
 	}
 
-	space_map_sync(allocmap, SM_ALLOC, smo, mos, tx);
-	space_map_sync(freemap, SM_FREE, smo, mos, tx);
+	ASSERT0(msp->ms_allocmap[txg & TXG_MASK]->sm_space);
+	ASSERT0(msp->ms_freemap[txg & TXG_MASK]->sm_space);
 
 	mutex_exit(&msp->ms_lock);
 
-	VERIFY(0 == dmu_bonus_hold(mos, smo->smo_object, FTAG, &db));
+	VERIFY0(dmu_bonus_hold(mos, smo->smo_object, FTAG, &db));
 	dmu_buf_will_dirty(db, tx);
 	ASSERT3U(db->db_size, >=, sizeof (*smo));
 	bcopy(smo, db->db_data, sizeof (*smo));
@@ -1042,9 +1211,9 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
 {
 	space_map_obj_t *smo = &msp->ms_smo;
 	space_map_obj_t *smosync = &msp->ms_smo_syncing;
-	space_map_t *sm = &msp->ms_map;
-	space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
-	space_map_t *defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE];
+	space_map_t *sm = msp->ms_map;
+	space_map_t **freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
+	space_map_t **defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE];
 	metaslab_group_t *mg = msp->ms_group;
 	vdev_t *vd = mg->mg_vd;
 	int64_t alloc_delta, defer_delta;
@@ -1055,40 +1224,57 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
 
 	/*
 	 * If this metaslab is just becoming available, initialize its
-	 * allocmaps and freemaps and add its capacity to the vdev.
+	 * allocmaps, freemaps, and defermap and add its capacity to the vdev.
 	 */
-	if (freed_map->sm_size == 0) {
+	if (*freed_map == NULL) {
+		ASSERT(*defer_map == NULL);
 		for (int t = 0; t < TXG_SIZE; t++) {
-			space_map_create(&msp->ms_allocmap[t], sm->sm_start,
+			msp->ms_allocmap[t] = kmem_zalloc(sizeof (space_map_t),
+			    KM_SLEEP);
+			space_map_create(msp->ms_allocmap[t], sm->sm_start,
 			    sm->sm_size, sm->sm_shift, sm->sm_lock);
-			space_map_create(&msp->ms_freemap[t], sm->sm_start,
+			msp->ms_freemap[t] = kmem_zalloc(sizeof (space_map_t),
+			    KM_SLEEP);
+			space_map_create(msp->ms_freemap[t], sm->sm_start,
 			    sm->sm_size, sm->sm_shift, sm->sm_lock);
 		}
 
-		for (int t = 0; t < TXG_DEFER_SIZE; t++)
-			space_map_create(&msp->ms_defermap[t], sm->sm_start,
+		for (int t = 0; t < TXG_DEFER_SIZE; t++) {
+			msp->ms_defermap[t] = kmem_zalloc(sizeof (space_map_t),
+			    KM_SLEEP);
+			space_map_create(msp->ms_defermap[t], sm->sm_start,
 			    sm->sm_size, sm->sm_shift, sm->sm_lock);
+		}
+
+		freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
+		defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE];
 
 		vdev_space_update(vd, 0, 0, sm->sm_size);
 	}
 
 	alloc_delta = smosync->smo_alloc - smo->smo_alloc;
-	defer_delta = freed_map->sm_space - defer_map->sm_space;
+	defer_delta = (*freed_map)->sm_space - (*defer_map)->sm_space;
 
 	vdev_space_update(vd, alloc_delta + defer_delta, defer_delta, 0);
 
-	ASSERT(msp->ms_allocmap[txg & TXG_MASK].sm_space == 0);
-	ASSERT(msp->ms_freemap[txg & TXG_MASK].sm_space == 0);
+	ASSERT(msp->ms_allocmap[txg & TXG_MASK]->sm_space == 0);
+	ASSERT(msp->ms_freemap[txg & TXG_MASK]->sm_space == 0);
 
 	/*
 	 * If there's a space_map_load() in progress, wait for it to complete
 	 * so that we have a consistent view of the in-core space map.
-	 * Then, add defer_map (oldest deferred frees) to this map and
-	 * transfer freed_map (this txg's frees) to defer_map.
 	 */
 	space_map_load_wait(sm);
-	space_map_vacate(defer_map, sm->sm_loaded ? space_map_free : NULL, sm);
-	space_map_vacate(freed_map, space_map_add, defer_map);
+
+	/*
+	 * Move the frees from the defer_map to this map (if it's loaded).
+	 * Swap the freed_map and the defer_map -- this is safe to do
+	 * because we've just emptied out the defer_map.
+	 */
+	space_map_vacate(*defer_map, sm->sm_loaded ? space_map_free : NULL, sm);
+	ASSERT0((*defer_map)->sm_space);
+	ASSERT0(avl_numnodes(&(*defer_map)->sm_root));
+	space_map_swap(freed_map, defer_map);
 
 	*smo = *smosync;
 
@@ -1112,7 +1298,7 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
 		int evictable = 1;
 
 		for (int t = 1; t < TXG_CONCURRENT_STATES; t++)
-			if (msp->ms_allocmap[(txg + t) & TXG_MASK].sm_space)
+			if (msp->ms_allocmap[(txg + t) & TXG_MASK]->sm_space)
 				evictable = 0;
 
 		if (evictable && !metaslab_debug)
@@ -1137,7 +1323,7 @@ metaslab_sync_reassess(metaslab_group_t *mg)
 	for (int m = 0; m < vd->vdev_ms_count; m++) {
 		metaslab_t *msp = vd->vdev_ms[m];
 
-		if (msp->ms_map.sm_start > mg->mg_bonus_area)
+		if (msp->ms_map->sm_start > mg->mg_bonus_area)
 			break;
 
 		mutex_enter(&msp->ms_lock);
@@ -1158,7 +1344,7 @@ metaslab_distance(metaslab_t *msp, dva_t *dva)
 {
 	uint64_t ms_shift = msp->ms_group->mg_vd->vdev_ms_shift;
 	uint64_t offset = DVA_GET_OFFSET(dva) >> ms_shift;
-	uint64_t start = msp->ms_map.sm_start >> ms_shift;
+	uint64_t start = msp->ms_map->sm_start >> ms_shift;
 
 	if (msp->ms_group->mg_vd->vdev_id != DVA_GET_VDEV(dva))
 		return (1ULL << 63);
@@ -1206,6 +1392,13 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
 				mutex_exit(&mg->mg_lock);
 				return (-1ULL);
 			}
+
+			/*
+			 * If the selected metaslab is condensing, skip it.
+			 */
+			if (msp->ms_map->sm_condensing)
+				continue;
+
 			was_active = msp->ms_weight & METASLAB_ACTIVE_MASK;
 			if (activation_weight == METASLAB_WEIGHT_PRIMARY)
 				break;
@@ -1271,20 +1464,30 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
 			continue;
 		}
 
-		if ((offset = space_map_alloc(&msp->ms_map, asize)) != -1ULL)
+		/*
+		 * If this metaslab is currently condensing then pick again as
+		 * we can't manipulate this metaslab until it's committed
+		 * to disk.
+		 */
+		if (msp->ms_map->sm_condensing) {
+			mutex_exit(&msp->ms_lock);
+			continue;
+		}
+
+		if ((offset = space_map_alloc(msp->ms_map, asize)) != -1ULL)
 			break;
 
 		atomic_inc_64(&mg->mg_alloc_failures);
 
-		metaslab_passivate(msp, space_map_maxsize(&msp->ms_map));
+		metaslab_passivate(msp, space_map_maxsize(msp->ms_map));
 
 		mutex_exit(&msp->ms_lock);
 	}
 
-	if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
+	if (msp->ms_allocmap[txg & TXG_MASK]->sm_space == 0)
 		vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg);
 
-	space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, asize);
+	space_map_add(msp->ms_allocmap[txg & TXG_MASK], offset, asize);
 
 	mutex_exit(&msp->ms_lock);
 
@@ -1516,13 +1719,13 @@ metaslab_free_dva(spa_t *spa, const dva_t *dva, uint64_t txg, boolean_t now)
 	mutex_enter(&msp->ms_lock);
 
 	if (now) {
-		space_map_remove(&msp->ms_allocmap[txg & TXG_MASK],
+		space_map_remove(msp->ms_allocmap[txg & TXG_MASK],
 		    offset, size);
-		space_map_free(&msp->ms_map, offset, size);
+		space_map_free(msp->ms_map, offset, size);
 	} else {
-		if (msp->ms_freemap[txg & TXG_MASK].sm_space == 0)
+		if (msp->ms_freemap[txg & TXG_MASK]->sm_space == 0)
 			vdev_dirty(vd, VDD_METASLAB, msp, txg);
-		space_map_add(&msp->ms_freemap[txg & TXG_MASK], offset, size);
+		space_map_add(msp->ms_freemap[txg & TXG_MASK], offset, size);
 	}
 
 	mutex_exit(&msp->ms_lock);
@@ -1557,10 +1760,10 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
 
 	mutex_enter(&msp->ms_lock);
 
-	if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map.sm_loaded)
+	if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map->sm_loaded)
 		error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY);
 
-	if (error == 0 && !space_map_contains(&msp->ms_map, offset, size))
+	if (error == 0 && !space_map_contains(msp->ms_map, offset, size))
 		error = ENOENT;
 
 	if (error || txg == 0) {	/* txg == 0 indicates dry run */
@@ -1568,12 +1771,12 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
 		return (error);
 	}
 
-	space_map_claim(&msp->ms_map, offset, size);
+	space_map_claim(msp->ms_map, offset, size);
 
 	if (spa_writeable(spa)) {	/* don't dirty if we're zdb(1M) */
-		if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
+		if (msp->ms_allocmap[txg & TXG_MASK]->sm_space == 0)
 			vdev_dirty(vd, VDD_METASLAB, msp, txg);
-		space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size);
+		space_map_add(msp->ms_allocmap[txg & TXG_MASK], offset, size);
 	}
 
 	mutex_exit(&msp->ms_lock);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c
index bebb0f3..190fefe 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c
@@ -114,6 +114,7 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
 	int merge_before, merge_after;
 
 	ASSERT(MUTEX_HELD(sm->sm_lock));
+	VERIFY(!sm->sm_condensing);
 	VERIFY(size != 0);
 	VERIFY3U(start, >=, sm->sm_start);
 	VERIFY3U(end, <=, sm->sm_start + sm->sm_size);
@@ -198,6 +199,7 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
 	int left_over, right_over;
 
 	ASSERT(MUTEX_HELD(sm->sm_lock));
+	VERIFY(!sm->sm_condensing);
 	VERIFY(size != 0);
 	VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
 	VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
@@ -267,6 +269,20 @@ space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
 }
 
 void
+space_map_swap(space_map_t **msrc, space_map_t **mdst)
+{
+	space_map_t *sm;
+
+	ASSERT(MUTEX_HELD((*msrc)->sm_lock));
+	ASSERT0((*mdst)->sm_space);
+	ASSERT0(avl_numnodes(&(*mdst)->sm_root));
+
+	sm = *msrc;
+	*msrc = *mdst;
+	*mdst = sm;
+}
+
+void
 space_map_vacate(space_map_t *sm, space_map_func_t *func, space_map_t *mdest)
 {
 	space_seg_t *ss;
@@ -447,9 +463,9 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
 	space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx)
 {
 	spa_t *spa = dmu_objset_spa(os);
-	void *cookie = NULL;
+	avl_tree_t *t = &sm->sm_root;
 	space_seg_t *ss;
-	uint64_t bufsize, start, size, run_len, delta, sm_space;
+	uint64_t bufsize, start, size, run_len, total, sm_space, nodes;
 	uint64_t *entry, *entry_map, *entry_map_end;
 
 	ASSERT(MUTEX_HELD(sm->sm_lock));
@@ -478,13 +494,14 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
 	    SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(spa)) |
 	    SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx));
 
-	delta = 0;
+	total = 0;
+	nodes = avl_numnodes(&sm->sm_root);
 	sm_space = sm->sm_space;
-	while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) {
+	for (ss = avl_first(t); ss != NULL; ss = AVL_NEXT(t, ss)) {
 		size = ss->ss_end - ss->ss_start;
 		start = (ss->ss_start - sm->sm_start) >> sm->sm_shift;
 
-		delta += size;
+		total += size;
 		size >>= sm->sm_shift;
 
 		while (size) {
@@ -506,7 +523,6 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
 			start += run_len;
 			size -= run_len;
 		}
-		kmem_cache_free(space_seg_cache, ss);
 	}
 
 	if (entry != entry_map) {
@@ -522,12 +538,11 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
 	 * Ensure that the space_map's accounting wasn't changed
 	 * while we were in the middle of writing it out.
 	 */
+	VERIFY3U(nodes, ==, avl_numnodes(&sm->sm_root));
 	VERIFY3U(sm->sm_space, ==, sm_space);
+	VERIFY3U(sm->sm_space, ==, total);
 
 	zio_buf_free(entry_map, bufsize);
-
-	sm->sm_space -= delta;
-	VERIFY0(sm->sm_space);
 }
 
 void
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h
index f1f1b38..138e14e 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h
@@ -66,20 +66,38 @@ struct metaslab_group {
 };
 
 /*
- * Each metaslab's free space is tracked in space map object in the MOS,
- * which is only updated in syncing context.  Each time we sync a txg,
+ * Each metaslab maintains an in-core free map (ms_map) that contains the
+ * current list of free segments. As blocks are allocated, the allocated
+ * segment is removed from the ms_map and added to a per txg allocation map.
+ * As blocks are freed, they are added to the per txg free map. These per
+ * txg maps allow us to process all allocations and frees in syncing context
+ * where it is safe to update the on-disk space maps.
+ *
+ * Each metaslab's free space is tracked in a space map object in the MOS,
+ * which is only updated in syncing context. Each time we sync a txg,
  * we append the allocs and frees from that txg to the space map object.
  * When the txg is done syncing, metaslab_sync_done() updates ms_smo
- * to ms_smo_syncing.  Everything in ms_smo is always safe to allocate.
+ * to ms_smo_syncing. Everything in ms_smo is always safe to allocate.
+ *
+ * To load the in-core free map we read the space map object from disk.
+ * This object contains a series of alloc and free records that are
+ * combined to make up the list of all free segments in this metaslab. These
+ * segments are represented in-core by the ms_map and are stored in an
+ * AVL tree.
+ *
+ * As the space map objects grows (as a result of the appends) it will
+ * eventually become space-inefficient. When the space map object is
+ * zfs_condense_pct/100 times the size of the minimal on-disk representation,
+ * we rewrite it in its minimized form.
  */
 struct metaslab {
 	kmutex_t	ms_lock;	/* metaslab lock		*/
 	space_map_obj_t	ms_smo;		/* synced space map object	*/
 	space_map_obj_t	ms_smo_syncing;	/* syncing space map object	*/
-	space_map_t	ms_allocmap[TXG_SIZE];  /* allocated this txg	*/
-	space_map_t	ms_freemap[TXG_SIZE];	/* freed this txg	*/
-	space_map_t	ms_defermap[TXG_DEFER_SIZE]; /* deferred frees	*/
-	space_map_t	ms_map;		/* in-core free space map	*/
+	space_map_t	*ms_allocmap[TXG_SIZE];	/* allocated this txg	*/
+	space_map_t	*ms_freemap[TXG_SIZE];	/* freed this txg	*/
+	space_map_t	*ms_defermap[TXG_DEFER_SIZE];	/* deferred frees */
+	space_map_t	*ms_map;	/* in-core free space map	*/
 	int64_t		ms_deferspace;	/* sum of ms_defermap[] space	*/
 	uint64_t	ms_weight;	/* weight vs. others in group	*/
 	metaslab_group_t *ms_group;	/* metaslab group		*/
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h
index 463b6bb..2da50fb 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h
@@ -40,17 +40,17 @@ extern "C" {
 typedef struct space_map_ops space_map_ops_t;
 
 typedef struct space_map {
-	avl_tree_t	sm_root;	/* AVL tree of map segments */
+	avl_tree_t	sm_root;	/* offset-ordered segment AVL tree */
 	uint64_t	sm_space;	/* sum of all segments in the map */
 	uint64_t	sm_start;	/* start of map */
 	uint64_t	sm_size;	/* size of map */
 	uint8_t		sm_shift;	/* unit shift */
-	uint8_t		sm_pad[3];	/* unused */
 	uint8_t		sm_loaded;	/* map loaded? */
 	uint8_t		sm_loading;	/* map loading? */
+	uint8_t		sm_condensing;	/* map condensing? */
 	kcondvar_t	sm_load_cv;	/* map load completion */
 	space_map_ops_t	*sm_ops;	/* space map block picker ops vector */
-	avl_tree_t	*sm_pp_root;	/* picker-private AVL tree */
+	avl_tree_t	*sm_pp_root;	/* size-ordered, picker-private tree */
 	void		*sm_ppd;	/* picker-private data */
 	kmutex_t	*sm_lock;	/* pointer to lock that protects map */
 } space_map_t;
@@ -149,6 +149,7 @@ extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size);
 extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size);
 extern boolean_t space_map_contains(space_map_t *sm,
     uint64_t start, uint64_t size);
+extern void space_map_swap(space_map_t **msrc, space_map_t **mdest);
 extern void space_map_vacate(space_map_t *sm,
     space_map_func_t *func, space_map_t *mdest);
 extern void space_map_walk(space_map_t *sm,
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
index 59b461b..be5b0bf 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
@@ -1847,6 +1847,7 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg)
 
 	space_map_truncate(smo, mos, tx);
 	space_map_sync(&smsync, SM_ALLOC, smo, mos, tx);
+	space_map_vacate(&smsync, NULL, NULL);
 
 	space_map_destroy(&smsync);
 
diff --git a/sys/dev/ath/ath_hal/ah.c b/sys/dev/ath/ath_hal/ah.c
index d1ce7a8..551c225 100644
--- a/sys/dev/ath/ath_hal/ah.c
+++ b/sys/dev/ath/ath_hal/ah.c
@@ -692,6 +692,10 @@ ath_hal_getcapability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type,
 		return pCap->hal4AddrAggrSupport ? HAL_OK : HAL_ENOTSUPP;
 	case HAL_CAP_EXT_CHAN_DFS:
 		return pCap->halExtChanDfsSupport ? HAL_OK : HAL_ENOTSUPP;
+	case HAL_CAP_RX_STBC:
+		return pCap->halRxStbcSupport ? HAL_OK : HAL_ENOTSUPP;
+	case HAL_CAP_TX_STBC:
+		return pCap->halTxStbcSupport ? HAL_OK : HAL_ENOTSUPP;
 	case HAL_CAP_COMBINED_RADAR_RSSI:
 		return pCap->halUseCombinedRadarRssi ? HAL_OK : HAL_ENOTSUPP;
 	case HAL_CAP_AUTO_SLEEP:
diff --git a/sys/dev/ath/ath_hal/ah.h b/sys/dev/ath/ath_hal/ah.h
index 0e3d5ab..ca2e7ca 100644
--- a/sys/dev/ath/ath_hal/ah.h
+++ b/sys/dev/ath/ath_hal/ah.h
@@ -137,6 +137,9 @@ typedef enum {
 	HAL_CAP_RIFS_RX_ENABLED	= 53,
 	HAL_CAP_BB_DFS_HANG	= 54,
 
+	HAL_CAP_RX_STBC		= 58,
+	HAL_CAP_TX_STBC		= 59,
+
 	HAL_CAP_BT_COEX		= 60,	/* hardware is capable of bluetooth coexistence */
 	HAL_CAP_DYNAMIC_SMPS	= 61,	/* Dynamic MIMO Power Save hardware support */
 
diff --git a/sys/dev/ath/ath_rate/sample/sample.c b/sys/dev/ath/ath_rate/sample/sample.c
index a7d6af6..b3f82fa 100644
--- a/sys/dev/ath/ath_rate/sample/sample.c
+++ b/sys/dev/ath/ath_rate/sample/sample.c
@@ -708,71 +708,6 @@ ath_rate_setupxtxdesc(struct ath_softc *sc, struct ath_node *an,
 	    s3code, sched->t3);		/* series 3 */
 }
 
-/*
- * Update the EWMA percentage.
- *
- * This is a simple hack to track an EWMA based on the current
- * rate scenario. For the rate codes which failed, this will
- * record a 0% against it. For the rate code which succeeded,
- * EWMA will record the nbad*100/nframes percentage against it.
- */
-static void
-update_ewma_stats(struct ath_softc *sc, struct ath_node *an,
-    int frame_size,
-    int rix0, int tries0,
-    int rix1, int tries1,
-    int rix2, int tries2,
-    int rix3, int tries3,
-    int short_tries, int tries, int status,
-    int nframes, int nbad)
-{
-	struct sample_node *sn = ATH_NODE_SAMPLE(an);
-	struct sample_softc *ssc = ATH_SOFTC_SAMPLE(sc);
-	const int size_bin = size_to_bin(frame_size);
-	int tries_so_far;
-	int pct;
-	int rix = rix0;
-
-	/* Calculate percentage based on current rate */
-	if (nframes == 0)
-		nframes = nbad = 1;
-	pct = ((nframes - nbad) * 1000) / nframes;
-
-	/* Figure out which rate index succeeded */
-	tries_so_far = tries0;
-
-	if (tries1 && tries_so_far < tries) {
-		tries_so_far += tries1;
-		rix = rix1;
-		/* XXX bump ewma pct */
-	}
-
-	if (tries2 && tries_so_far < tries) {
-		tries_so_far += tries2;
-		rix = rix2;
-		/* XXX bump ewma pct */
-	}
-
-	if (tries3 && tries_so_far < tries) {
-		rix = rix3;
-		/* XXX bump ewma pct */
-	}
-
-	/* rix is the successful rate, update EWMA for final rix */
-	if (sn->stats[size_bin][rix].total_packets <
-	    ssc->smoothing_minpackets) {
-		/* just average the first few packets */
-		int a_pct = (sn->stats[size_bin][rix].packets_acked * 1000) /
-		    (sn->stats[size_bin][rix].total_packets);
-		sn->stats[size_bin][rix].ewma_pct = a_pct;
-	} else {
-		/* use a ewma */
-		sn->stats[size_bin][rix].ewma_pct =
-			((sn->stats[size_bin][rix].ewma_pct * ssc->smoothing_rate) +
-			 (pct * (100 - ssc->smoothing_rate))) / 100;
-	}
-}
-
 static void
 update_stats(struct ath_softc *sc, struct ath_node *an, 
 		  int frame_size,
@@ -792,6 +727,7 @@ update_stats(struct ath_softc *sc, struct ath_node *an,
 	const int size = bin_to_size(size_bin);
 	int tt, tries_so_far;
 	int is_ht40 = (an->an_node.ni_chw == 40);
+	int pct;
 
 	if (!IS_RATE_DEFINED(sn, rix0))
 		return;
@@ -865,6 +801,27 @@ update_stats(struct ath_softc *sc, struct ath_node *an,
 	sn->stats[size_bin][rix0].last_tx = ticks;
 	sn->stats[size_bin][rix0].total_packets += nframes;
 
+	/* update EWMA for this rix */
+
+	/* Calculate percentage based on current rate */
+	if (nframes == 0)
+		nframes = nbad = 1;
+	pct = ((nframes - nbad) * 1000) / nframes;
+
+	if (sn->stats[size_bin][rix0].total_packets <
+	    ssc->smoothing_minpackets) {
+		/* just average the first few packets */
+		int a_pct = (sn->stats[size_bin][rix0].packets_acked * 1000) /
+		    (sn->stats[size_bin][rix0].total_packets);
+		sn->stats[size_bin][rix0].ewma_pct = a_pct;
+	} else {
+		/* use a ewma */
+		sn->stats[size_bin][rix0].ewma_pct =
+			((sn->stats[size_bin][rix0].ewma_pct * ssc->smoothing_rate) +
+			 (pct * (100 - ssc->smoothing_rate))) / 100;
+	}
+
+
 	if (rix0 == sn->current_sample_rix[size_bin]) {
 		IEEE80211_NOTE(an->an_node.ni_vap, IEEE80211_MSG_RATECTL,
 		   &an->an_node,
@@ -907,6 +864,11 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an,
 	short_tries = ts->ts_shortretry;
 	long_tries = ts->ts_longretry + 1;
 
+	if (nframes == 0) {
+		device_printf(sc->sc_dev, "%s: nframes=0?\n", __func__);
+		return;
+	}
+
 	if (frame_size == 0)		    /* NB: should not happen */
 		frame_size = 1500;
 
@@ -950,13 +912,6 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an,
 			     0, 0,
 			     short_tries, long_tries, status,
 			     nframes, nbad);
-		update_ewma_stats(sc, an, frame_size, 
-			     final_rix, long_tries,
-			     0, 0,
-			     0, 0,
-			     0, 0,
-			     short_tries, long_tries, status,
-			     nframes, nbad);
 
 	} else {
 		int finalTSIdx = ts->ts_finaltsi;
@@ -1008,15 +963,6 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an,
 				     short_tries, long_tries,
 				     long_tries > rc[0].tries,
 				     nframes, nbad);
-			update_ewma_stats(sc, an, frame_size,
-				     rc[0].rix, rc[0].tries,
-				     rc[1].rix, rc[1].tries,
-				     rc[2].rix, rc[2].tries,
-				     rc[3].rix, rc[3].tries,
-				     short_tries, long_tries,
-				     long_tries > rc[0].tries,
-				     nframes, nbad);
-
 			long_tries -= rc[0].tries;
 		}
 		
@@ -1029,14 +975,6 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an,
 				     short_tries, long_tries,
 				     status,
 				     nframes, nbad);
-			update_ewma_stats(sc, an, frame_size,
-				     rc[1].rix, rc[1].tries,
-				     rc[2].rix, rc[2].tries,
-				     rc[3].rix, rc[3].tries,
-				     0, 0,
-				     short_tries, long_tries,
-				     status,
-				     nframes, nbad);
 			long_tries -= rc[1].tries;
 		}
 
@@ -1049,14 +987,6 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an,
 				     short_tries, long_tries,
 				     status,
 				     nframes, nbad);
-			update_ewma_stats(sc, an, frame_size,
-				     rc[2].rix, rc[2].tries,
-				     rc[3].rix, rc[3].tries,
-				     0, 0,
-				     0, 0,
-				     short_tries, long_tries,
-				     status,
-				     nframes, nbad);
 			long_tries -= rc[2].tries;
 		}
 
@@ -1069,14 +999,6 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an,
 				     short_tries, long_tries,
 				     status,
 				     nframes, nbad);
-			update_ewma_stats(sc, an, frame_size,
-				     rc[3].rix, rc[3].tries,
-				     0, 0,
-				     0, 0,
-				     0, 0,
-				     short_tries, long_tries,
-				     status,
-				     nframes, nbad);
 		}
 	}
 }
diff --git a/sys/dev/ath/if_ath.c b/sys/dev/ath/if_ath.c
index a614d6f..fd1a7c3 100644
--- a/sys/dev/ath/if_ath.c
+++ b/sys/dev/ath/if_ath.c
@@ -781,6 +781,28 @@ ath_attach(u_int16_t devid, struct ath_softc *sc)
 		ic->ic_txstream = txs;
 		ic->ic_rxstream = rxs;
 
+		/*
+		 * Setup TX and RX STBC based on what the HAL allows and
+		 * the currently configured chainmask set.
+		 * Ie - don't enable STBC TX if only one chain is enabled.
+		 * STBC RX is fine on a single RX chain; it just won't
+		 * provide any real benefit.
+		 */
+		if (ath_hal_getcapability(ah, HAL_CAP_RX_STBC, 0,
+		    NULL) == HAL_OK) {
+			sc->sc_rx_stbc = 1;
+			device_printf(sc->sc_dev,
+			    "[HT] 1 stream STBC receive enabled\n");
+			ic->ic_htcaps |= IEEE80211_HTCAP_RXSTBC_1STREAM;
+		}
+		if (txs > 1 && ath_hal_getcapability(ah, HAL_CAP_TX_STBC, 0,
+		    NULL) == HAL_OK) {
+			sc->sc_tx_stbc = 1;
+			device_printf(sc->sc_dev,
+			    "[HT] 1 stream STBC transmit enabled\n");
+			ic->ic_htcaps |= IEEE80211_HTCAP_TXSTBC;
+		}
+
 		(void) ath_hal_getcapability(ah, HAL_CAP_RTS_AGGR_LIMIT, 1,
 		    &sc->sc_rts_aggr_limit);
 		if (sc->sc_rts_aggr_limit != (64 * 1024))
diff --git a/sys/dev/ath/if_ath_tx_ht.c b/sys/dev/ath/if_ath_tx_ht.c
index c0e72ac..d382f8f 100644
--- a/sys/dev/ath/if_ath_tx_ht.c
+++ b/sys/dev/ath/if_ath_tx_ht.c
@@ -536,16 +536,29 @@ ath_rateseries_setup(struct ath_softc *sc, struct ieee80211_node *ni,
 			series[i].RateFlags |= HAL_RATESERIES_HALFGI;
 
 		/*
-		 * XXX TODO: STBC if it's possible
+		 * Setup rate and TX power cap for this series.
 		 */
+		series[i].Rate = rt->info[rc[i].rix].rateCode;
+		series[i].RateIndex = rc[i].rix;
+		series[i].tx_power_cap = 0x3f;	/* XXX for now */
+
 
 		/*
-		 * XXX TODO: LDPC if it's possible
+		 * If we have STBC TX enabled and the receiver
+		 * can receive (at least) 1 stream STBC, AND it's
+		 * MCS 0-7, AND we have at least two chains enabled,
+		 * enable STBC.
 		 */
+		if (ic->ic_htcaps & IEEE80211_HTCAP_TXSTBC &&
+		    ni->ni_htcap & IEEE80211_HTCAP_RXSTBC_1STREAM &&
+		    (sc->sc_cur_txchainmask > 1) &&
+		    HT_RC_2_STREAMS(series[i].Rate) == 1) {
+			series[i].RateFlags |= HAL_RATESERIES_STBC;
+		}
 
-		series[i].Rate = rt->info[rc[i].rix].rateCode;
-		series[i].RateIndex = rc[i].rix;
-		series[i].tx_power_cap = 0x3f;	/* XXX for now */
+		/*
+		 * XXX TODO: LDPC if it's possible
+		 */
 
 		/*
 		 * PktDuration doesn't include slot, ACK, RTS, etc timing -
diff --git a/sys/dev/ath/if_athvar.h b/sys/dev/ath/if_athvar.h
index e8fdeff..42442de 100644
--- a/sys/dev/ath/if_athvar.h
+++ b/sys/dev/ath/if_athvar.h
@@ -567,7 +567,9 @@ struct ath_softc {
 	/*
 	 * Second set of flags.
 	 */
-	u_int32_t		sc_use_ent  : 1;
+	u_int32_t		sc_use_ent  : 1,
+				sc_rx_stbc  : 1,
+				sc_tx_stbc  : 1;
 
 	/*
 	 * Enterprise mode configuration for AR9380 and later chipsets.
diff --git a/sys/dev/mfi/mfi.c b/sys/dev/mfi/mfi.c
index ed759fc..e799b9d 100644
--- a/sys/dev/mfi/mfi.c
+++ b/sys/dev/mfi/mfi.c
@@ -108,6 +108,7 @@ static void	mfi_bio_complete(struct mfi_command *);
 static struct mfi_command *mfi_build_ldio(struct mfi_softc *,struct bio*);
 static struct mfi_command *mfi_build_syspdio(struct mfi_softc *,struct bio*);
 static int	mfi_send_frame(struct mfi_softc *, struct mfi_command *);
+static int	mfi_std_send_frame(struct mfi_softc *, struct mfi_command *);
 static int	mfi_abort(struct mfi_softc *, struct mfi_command **);
 static int	mfi_linux_ioctl_int(struct cdev *, u_long, caddr_t, int, struct thread *);
 static void	mfi_timeout(void *);
@@ -132,24 +133,30 @@ static int mfi_check_for_sscd(struct mfi_softc *sc, struct mfi_command *cm);
 SYSCTL_NODE(_hw, OID_AUTO, mfi, CTLFLAG_RD, 0, "MFI driver parameters");
 static int	mfi_event_locale = MFI_EVT_LOCALE_ALL;
 TUNABLE_INT("hw.mfi.event_locale", &mfi_event_locale);
-SYSCTL_INT(_hw_mfi, OID_AUTO, event_locale, CTLFLAG_RW, &mfi_event_locale,
-            0, "event message locale");
+SYSCTL_INT(_hw_mfi, OID_AUTO, event_locale, CTLFLAG_RWTUN, &mfi_event_locale,
+           0, "event message locale");
 
 static int	mfi_event_class = MFI_EVT_CLASS_INFO;
 TUNABLE_INT("hw.mfi.event_class", &mfi_event_class);
-SYSCTL_INT(_hw_mfi, OID_AUTO, event_class, CTLFLAG_RW, &mfi_event_class,
-          0, "event message class");
+SYSCTL_INT(_hw_mfi, OID_AUTO, event_class, CTLFLAG_RWTUN, &mfi_event_class,
+           0, "event message class");
 
 static int	mfi_max_cmds = 128;
 TUNABLE_INT("hw.mfi.max_cmds", &mfi_max_cmds);
-SYSCTL_INT(_hw_mfi, OID_AUTO, max_cmds, CTLFLAG_RD, &mfi_max_cmds,
-	   0, "Max commands");
+SYSCTL_INT(_hw_mfi, OID_AUTO, max_cmds, CTLFLAG_RDTUN, &mfi_max_cmds,
+	   0, "Max commands limit (-1 = controller limit)");
 
 static int	mfi_detect_jbod_change = 1;
 TUNABLE_INT("hw.mfi.detect_jbod_change", &mfi_detect_jbod_change);
-SYSCTL_INT(_hw_mfi, OID_AUTO, detect_jbod_change, CTLFLAG_RW,
+SYSCTL_INT(_hw_mfi, OID_AUTO, detect_jbod_change, CTLFLAG_RWTUN,
 	   &mfi_detect_jbod_change, 0, "Detect a change to a JBOD");
 
+int		mfi_polled_cmd_timeout = MFI_POLL_TIMEOUT_SECS;
+TUNABLE_INT("hw.mfi.polled_cmd_timeout", &mfi_polled_cmd_timeout);
+SYSCTL_INT(_hw_mfi, OID_AUTO, polled_cmd_timeout, CTLFLAG_RWTUN,
+	   &mfi_polled_cmd_timeout, 0,
+	   "Polled command timeout - used for firmware flash etc (in seconds)");
+
 /* Management interface */
 static d_open_t		mfi_open;
 static d_close_t	mfi_close;
@@ -361,7 +368,7 @@ mfi_attach(struct mfi_softc *sc)
 {
 	uint32_t status;
 	int error, commsz, framessz, sensesz;
-	int frames, unit, max_fw_sge;
+	int frames, unit, max_fw_sge, max_fw_cmds;
 	uint32_t tb_mem_size = 0;
 
 	if (sc == NULL)
@@ -456,7 +463,14 @@ mfi_attach(struct mfi_softc *sc)
 	 * instead of compile time.
 	 */
 	status = sc->mfi_read_fw_status(sc);
-	sc->mfi_max_fw_cmds = status & MFI_FWSTATE_MAXCMD_MASK;
+	max_fw_cmds = status & MFI_FWSTATE_MAXCMD_MASK;
+	if (mfi_max_cmds > 0 && mfi_max_cmds < max_fw_cmds) {
+		device_printf(sc->mfi_dev, "FW MaxCmds = %d, limiting to %d\n",
+		    max_fw_cmds, mfi_max_cmds);
+		sc->mfi_max_fw_cmds = mfi_max_cmds;
+	} else {
+		sc->mfi_max_fw_cmds = max_fw_cmds;
+	}
 	max_fw_sge = (status & MFI_FWSTATE_MAXSGL_MASK) >> 16;
 	sc->mfi_max_sge = min(max_fw_sge, ((MFI_MAXPHYS / PAGE_SIZE) + 1));
 
@@ -464,7 +478,8 @@ mfi_attach(struct mfi_softc *sc)
 
 	if (sc->mfi_flags & MFI_FLAGS_TBOLT) {
 		mfi_tbolt_init_globals(sc);
-		device_printf(sc->mfi_dev, "MaxCmd = %x MaxSgl = %x state = %x \n",
+		device_printf(sc->mfi_dev, "MaxCmd = %d, Drv MaxCmd = %d, "
+		    "MaxSgl = %d, state = %#x\n", max_fw_cmds,
 		    sc->mfi_max_fw_cmds, sc->mfi_max_sge, status);
 		tb_mem_size = mfi_tbolt_get_memory_requirement(sc);
 
@@ -503,8 +518,8 @@ mfi_attach(struct mfi_softc *sc)
 				0,			/* flags */
 				NULL, NULL,		/* lockfunc, lockarg */
 				&sc->mfi_tb_init_dmat)) {
-		device_printf(sc->mfi_dev, "Cannot allocate init DMA tag\n");
-		return (ENOMEM);
+			device_printf(sc->mfi_dev, "Cannot allocate init DMA tag\n");
+			return (ENOMEM);
 		}
 		if (bus_dmamem_alloc(sc->mfi_tb_init_dmat, (void **)&sc->mfi_tb_init,
 		    BUS_DMA_NOWAIT, &sc->mfi_tb_init_dmamap)) {
@@ -683,11 +698,14 @@ mfi_attach(struct mfi_softc *sc)
 	/* ThunderBolt MFI_IOC2 INIT */
 	if (sc->mfi_flags & MFI_FLAGS_TBOLT) {
 		sc->mfi_disable_intr(sc);
+		mtx_lock(&sc->mfi_io_lock);
 		if ((error = mfi_tbolt_init_MFI_queue(sc)) != 0) {
 			device_printf(sc->mfi_dev,
 			    "TB Init has failed with error %d\n",error);
+			mtx_unlock(&sc->mfi_io_lock);
 			return error;
 		}
+		mtx_unlock(&sc->mfi_io_lock);
 
 		if ((error = mfi_tbolt_alloc_cmd(sc)) != 0)
 			return error;
@@ -723,10 +741,12 @@ mfi_attach(struct mfi_softc *sc)
 		    "hook\n");
 		return (EINVAL);
 	}
+	mtx_lock(&sc->mfi_io_lock);
 	if ((error = mfi_aen_setup(sc, 0), 0) != 0) {
 		mtx_unlock(&sc->mfi_io_lock);
 		return (error);
 	}
+	mtx_unlock(&sc->mfi_io_lock);
 
 	/*
 	 * Register a shutdown handler.
@@ -766,7 +786,9 @@ mfi_attach(struct mfi_softc *sc)
 	    mfi_timeout, sc);
 
 	if (sc->mfi_flags & MFI_FLAGS_TBOLT) {
+		mtx_lock(&sc->mfi_io_lock);
 		mfi_tbolt_sync_map_info(sc);
+		mtx_unlock(&sc->mfi_io_lock);
 	}
 
 	return (0);
@@ -776,21 +798,16 @@ static int
 mfi_alloc_commands(struct mfi_softc *sc)
 {
 	struct mfi_command *cm;
-	int i, ncmds;
+	int i, j;
 
 	/*
 	 * XXX Should we allocate all the commands up front, or allocate on
 	 * demand later like 'aac' does?
 	 */
-	ncmds = MIN(mfi_max_cmds, sc->mfi_max_fw_cmds);
-	if (bootverbose)
-		device_printf(sc->mfi_dev, "Max fw cmds= %d, sizing driver "
-		   "pool to %d\n", sc->mfi_max_fw_cmds, ncmds);
-
-	sc->mfi_commands = malloc(sizeof(struct mfi_command) * ncmds, M_MFIBUF,
-	    M_WAITOK | M_ZERO);
+	sc->mfi_commands = malloc(sizeof(sc->mfi_commands[0]) *
+	    sc->mfi_max_fw_cmds, M_MFIBUF, M_WAITOK | M_ZERO);
 
-	for (i = 0; i < ncmds; i++) {
+	for (i = 0; i < sc->mfi_max_fw_cmds; i++) {
 		cm = &sc->mfi_commands[i];
 		cm->cm_frame = (union mfi_frame *)((uintptr_t)sc->mfi_frames +
 		    sc->mfi_cmd_size * i);
@@ -806,10 +823,20 @@ mfi_alloc_commands(struct mfi_softc *sc)
 			mtx_lock(&sc->mfi_io_lock);
 			mfi_release_command(cm);
 			mtx_unlock(&sc->mfi_io_lock);
+		} else {
+			device_printf(sc->mfi_dev, "Failed to allocate %d "
+			   "command blocks, only allocated %d\n",
+			    sc->mfi_max_fw_cmds, i - 1);
+			for (j = 0; j < i; j++) {
+				cm = &sc->mfi_commands[i];
+				bus_dmamap_destroy(sc->mfi_buffer_dmat,
+				    cm->cm_dmamap);
+			}
+			free(sc->mfi_commands, M_MFIBUF);
+			sc->mfi_commands = NULL;
+
+			return (ENOMEM);
 		}
-		else
-			break;
-		sc->mfi_total_cmds++;
 	}
 
 	return (0);
@@ -834,6 +861,29 @@ mfi_release_command(struct mfi_command *cm)
 		cm->cm_sg->sg32[0].addr = 0;
 	}
 
+	/*
+	 * Command may be on other queues e.g. busy queue depending on the
+	 * flow of a previous call to mfi_mapcmd, so ensure its dequeued
+	 * properly
+	 */
+	if ((cm->cm_flags & MFI_ON_MFIQ_BUSY) != 0)
+		mfi_remove_busy(cm);
+	if ((cm->cm_flags & MFI_ON_MFIQ_READY) != 0)
+		mfi_remove_ready(cm);
+
+	/* We're not expecting it to be on any other queue but check */
+	if ((cm->cm_flags & MFI_ON_MFIQ_MASK) != 0) {
+		panic("Command %p is still on another queue, flags = %#x",
+		    cm, cm->cm_flags);
+	}
+
+	/* tbolt cleanup */
+	if ((cm->cm_flags & MFI_CMD_TBOLT) != 0) {
+		mfi_tbolt_return_cmd(cm->cm_sc,
+		    cm->cm_sc->mfi_cmd_pool_tbolt[cm->cm_extra_frames - 1],
+		    cm);
+	}
+
 	hdr_data = (uint32_t *)cm->cm_frame;
 	hdr_data[0] = 0;	/* cmd, sense_len, cmd_status, scsi_status */
 	hdr_data[1] = 0;	/* target_id, lun_id, cdb_len, sg_count */
@@ -916,8 +966,10 @@ mfi_comms_init(struct mfi_softc *sc)
 	uint32_t context = 0;
 
 	mtx_lock(&sc->mfi_io_lock);
-	if ((cm = mfi_dequeue_free(sc)) == NULL)
+	if ((cm = mfi_dequeue_free(sc)) == NULL) {
+		mtx_unlock(&sc->mfi_io_lock);
 		return (EBUSY);
+	}
 
 	/* Zero out the MFI frame */
 	context = cm->cm_frame->header.context;
@@ -946,15 +998,12 @@ mfi_comms_init(struct mfi_softc *sc)
 	cm->cm_data = NULL;
 	cm->cm_flags = MFI_CMD_POLLED;
 
-	if ((error = mfi_mapcmd(sc, cm)) != 0) {
+	if ((error = mfi_mapcmd(sc, cm)) != 0)
 		device_printf(sc->mfi_dev, "failed to send init command\n");
-		mtx_unlock(&sc->mfi_io_lock);
-		return (error);
-	}
 	mfi_release_command(cm);
 	mtx_unlock(&sc->mfi_io_lock);
 
-	return (0);
+	return (error);
 }
 
 static int
@@ -1005,7 +1054,7 @@ mfi_get_log_state(struct mfi_softc *sc, struct mfi_evt_log_state **log_state)
 	struct mfi_command *cm = NULL;
 	int error;
 
-	mtx_lock(&sc->mfi_io_lock);
+	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 	error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_EVENT_GETINFO,
 	    (void **)log_state, sizeof(**log_state));
 	if (error)
@@ -1024,7 +1073,6 @@ mfi_get_log_state(struct mfi_softc *sc, struct mfi_evt_log_state **log_state)
 out:
 	if (cm)
 		mfi_release_command(cm);
-	mtx_unlock(&sc->mfi_io_lock);
 
 	return (error);
 }
@@ -1037,32 +1085,32 @@ mfi_aen_setup(struct mfi_softc *sc, uint32_t seq_start)
 	int error = 0;
 	uint32_t seq;
 
+	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
+
 	class_locale.members.reserved = 0;
 	class_locale.members.locale = mfi_event_locale;
 	class_locale.members.evt_class  = mfi_event_class;
 
 	if (seq_start == 0) {
-		error = mfi_get_log_state(sc, &log_state);
+		if ((error = mfi_get_log_state(sc, &log_state)) != 0)
+			goto out;
 		sc->mfi_boot_seq_num = log_state->boot_seq_num;
-		if (error) {
-			if (log_state)
-				free(log_state, M_MFIBUF);
-			return (error);
-		}
 
 		/*
 		 * Walk through any events that fired since the last
 		 * shutdown.
 		 */
-		mfi_parse_entries(sc, log_state->shutdown_seq_num,
-		    log_state->newest_seq_num);
+		if ((error = mfi_parse_entries(sc, log_state->shutdown_seq_num,
+		    log_state->newest_seq_num)) != 0)
+			goto out;
 		seq = log_state->newest_seq_num;
 	} else
 		seq = seq_start;
-	mfi_aen_register(sc, seq, class_locale.word);
+	error = mfi_aen_register(sc, seq, class_locale.word);
+out:
 	free(log_state, M_MFIBUF);
 
-	return 0;
+	return (error);
 }
 
 int
@@ -1072,7 +1120,6 @@ mfi_wait_command(struct mfi_softc *sc, struct mfi_command *cm)
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 	cm->cm_complete = NULL;
 
-
 	/*
 	 * MegaCli can issue a DCMD of 0.  In this case do nothing
 	 * and return 0 to it as status
@@ -1100,12 +1147,13 @@ mfi_free(struct mfi_softc *sc)
 	if (sc->mfi_cdev != NULL)
 		destroy_dev(sc->mfi_cdev);
 
-	if (sc->mfi_total_cmds != 0) {
-		for (i = 0; i < sc->mfi_total_cmds; i++) {
+	if (sc->mfi_commands != NULL) {
+		for (i = 0; i < sc->mfi_max_fw_cmds; i++) {
 			cm = &sc->mfi_commands[i];
 			bus_dmamap_destroy(sc->mfi_buffer_dmat, cm->cm_dmamap);
 		}
 		free(sc->mfi_commands, M_MFIBUF);
+		sc->mfi_commands = NULL;
 	}
 
 	if (sc->mfi_intr)
@@ -1161,7 +1209,8 @@ mfi_free(struct mfi_softc *sc)
 		/* End LSIP200113393 */
 		/* ThunderBolt INIT packet memory Free */
 		if (sc->mfi_tb_init_busaddr != 0)
-			bus_dmamap_unload(sc->mfi_tb_init_dmat, sc->mfi_tb_init_dmamap);
+			bus_dmamap_unload(sc->mfi_tb_init_dmat,
+			    sc->mfi_tb_init_dmamap);
 		if (sc->mfi_tb_init != NULL)
 			bus_dmamem_free(sc->mfi_tb_init_dmat, sc->mfi_tb_init,
 			    sc->mfi_tb_init_dmamap);
@@ -1178,16 +1227,14 @@ mfi_free(struct mfi_softc *sc)
 			    sc->mfi_tb_ioc_init_dmamap);
 		if (sc->mfi_tb_ioc_init_dmat != NULL)
 			bus_dma_tag_destroy(sc->mfi_tb_ioc_init_dmat);
-		for (int i = 0; i < sc->mfi_max_fw_cmds; i++) {
-			if (sc->mfi_cmd_pool_tbolt != NULL) {
+		if (sc->mfi_cmd_pool_tbolt != NULL) {
+			for (int i = 0; i < sc->mfi_max_fw_cmds; i++) {
 				if (sc->mfi_cmd_pool_tbolt[i] != NULL) {
 					free(sc->mfi_cmd_pool_tbolt[i],
 					    M_MFIBUF);
 					sc->mfi_cmd_pool_tbolt[i] = NULL;
 				}
 			}
-		}
-		if (sc->mfi_cmd_pool_tbolt != NULL) {
 			free(sc->mfi_cmd_pool_tbolt, M_MFIBUF);
 			sc->mfi_cmd_pool_tbolt = NULL;
 		}
@@ -1252,16 +1299,14 @@ restart:
 			cm->cm_error = 0;
 			mfi_complete(sc, cm);
 		}
-		if (++ci == (sc->mfi_max_fw_cmds + 1)) {
+		if (++ci == (sc->mfi_max_fw_cmds + 1))
 			ci = 0;
-		}
 	}
 
 	sc->mfi_comms->hw_ci = ci;
 
 	/* Give defered I/O a chance to run */
-	if (sc->mfi_flags & MFI_FLAGS_QFRZN)
-		sc->mfi_flags &= ~MFI_FLAGS_QFRZN;
+	sc->mfi_flags &= ~MFI_FLAGS_QFRZN;
 	mfi_startio(sc);
 	mtx_unlock(&sc->mfi_io_lock);
 
@@ -1284,15 +1329,15 @@ mfi_shutdown(struct mfi_softc *sc)
 	int error;
 
 
-	if (sc->mfi_aen_cm)
+	if (sc->mfi_aen_cm != NULL) {
 		sc->cm_aen_abort = 1;
-	if (sc->mfi_aen_cm != NULL)
 		mfi_abort(sc, &sc->mfi_aen_cm);
+	}
 
-	if (sc->mfi_map_sync_cm)
+	if (sc->mfi_map_sync_cm != NULL) {
 		sc->cm_map_abort = 1;
-	if (sc->mfi_map_sync_cm != NULL)
 		mfi_abort(sc, &sc->mfi_map_sync_cm);
+	}
 
 	mtx_lock(&sc->mfi_io_lock);
 	error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_SHUTDOWN, NULL, 0);
@@ -1306,9 +1351,8 @@ mfi_shutdown(struct mfi_softc *sc)
 	cm->cm_flags = MFI_CMD_POLLED;
 	cm->cm_data = NULL;
 
-	if ((error = mfi_mapcmd(sc, cm)) != 0) {
+	if ((error = mfi_mapcmd(sc, cm)) != 0)
 		device_printf(sc->mfi_dev, "Failed to shutdown controller\n");
-	}
 
 	mfi_release_command(cm);
 	mtx_unlock(&sc->mfi_io_lock);
@@ -1374,8 +1418,10 @@ mfi_syspdprobe(struct mfi_softc *sc)
 	TAILQ_FOREACH_SAFE(syspd, &sc->mfi_syspd_tqh, pd_link, tmp) {
 		found = 0;
 		for (i = 0; i < pdlist->count; i++) {
-			if (syspd->pd_id == pdlist->addr[i].device_id)
+			if (syspd->pd_id == pdlist->addr[i].device_id) {
 				found = 1;
+				break;
+			}
 		}
 		if (found == 0) {
 			printf("DELETE\n");
@@ -1628,6 +1674,8 @@ mfi_aen_register(struct mfi_softc *sc, int seq, int locale)
 	struct mfi_evt_detail *ed = NULL;
 	int error = 0;
 
+	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
+
 	current_aen.word = locale;
 	if (sc->mfi_aen_cm != NULL) {
 		prior_aen.word =
@@ -1646,13 +1694,10 @@ mfi_aen_register(struct mfi_softc *sc, int seq, int locale)
 		}
 	}
 
-	mtx_lock(&sc->mfi_io_lock);
 	error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_EVENT_WAIT,
 	    (void **)&ed, sizeof(*ed));
-	mtx_unlock(&sc->mfi_io_lock);
-	if (error) {
+	if (error)
 		goto out;
-	}
 
 	dcmd = &cm->cm_frame->dcmd;
 	((uint32_t *)&dcmd->mbox)[0] = seq;
@@ -1663,10 +1708,8 @@ mfi_aen_register(struct mfi_softc *sc, int seq, int locale)
 	sc->last_seq_num = seq;
 	sc->mfi_aen_cm = cm;
 
-	mtx_lock(&sc->mfi_io_lock);
 	mfi_enqueue_ready(cm);
 	mfi_startio(sc);
-	mtx_unlock(&sc->mfi_io_lock);
 
 out:
 	return (error);
@@ -1684,11 +1727,11 @@ mfi_aen_complete(struct mfi_command *cm)
 	sc = cm->cm_sc;
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 
-	hdr = &cm->cm_frame->header;
-
 	if (sc->mfi_aen_cm == NULL)
 		return;
 
+	hdr = &cm->cm_frame->header;
+
 	if (sc->cm_aen_abort ||
 	    hdr->cmd_status == MFI_STAT_INVALID_STATUS) {
 		sc->cm_aen_abort = 0;
@@ -1714,16 +1757,13 @@ mfi_aen_complete(struct mfi_command *cm)
 	}
 
 	free(cm->cm_data, M_MFIBUF);
-	sc->mfi_aen_cm = NULL;
 	wakeup(&sc->mfi_aen_cm);
+	sc->mfi_aen_cm = NULL;
 	mfi_release_command(cm);
 
 	/* set it up again so the driver can catch more events */
-	if (!aborted) {
-		mtx_unlock(&sc->mfi_io_lock);
+	if (!aborted)
 		mfi_aen_setup(sc, seq);
-		mtx_lock(&sc->mfi_io_lock);
-	}
 }
 
 #define MAX_EVENTS 15
@@ -1737,6 +1777,8 @@ mfi_parse_entries(struct mfi_softc *sc, int start_seq, int stop_seq)
 	union mfi_evt class_locale;
 	int error, i, seq, size;
 
+	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
+
 	class_locale.members.reserved = 0;
 	class_locale.members.locale = mfi_event_locale;
 	class_locale.members.evt_class  = mfi_event_class;
@@ -1748,13 +1790,10 @@ mfi_parse_entries(struct mfi_softc *sc, int start_seq, int stop_seq)
 		return (ENOMEM);
 
 	for (seq = start_seq;;) {
-		mtx_lock(&sc->mfi_io_lock);
 		if ((cm = mfi_dequeue_free(sc)) == NULL) {
 			free(el, M_MFIBUF);
-			mtx_unlock(&sc->mfi_io_lock);
 			return (EBUSY);
 		}
-		mtx_unlock(&sc->mfi_io_lock);
 
 		dcmd = &cm->cm_frame->dcmd;
 		bzero(dcmd->mbox, MFI_MBOX_SIZE);
@@ -1770,38 +1809,30 @@ mfi_parse_entries(struct mfi_softc *sc, int start_seq, int stop_seq)
 		cm->cm_data = el;
 		cm->cm_len = size;
 
-		mtx_lock(&sc->mfi_io_lock);
 		if ((error = mfi_mapcmd(sc, cm)) != 0) {
 			device_printf(sc->mfi_dev,
 			    "Failed to get controller entries\n");
 			mfi_release_command(cm);
-			mtx_unlock(&sc->mfi_io_lock);
 			break;
 		}
 
-		mtx_unlock(&sc->mfi_io_lock);
 		bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap,
 		    BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap);
 
 		if (dcmd->header.cmd_status == MFI_STAT_NOT_FOUND) {
-			mtx_lock(&sc->mfi_io_lock);
 			mfi_release_command(cm);
-			mtx_unlock(&sc->mfi_io_lock);
 			break;
 		}
 		if (dcmd->header.cmd_status != MFI_STAT_OK) {
 			device_printf(sc->mfi_dev,
 			    "Error %d fetching controller entries\n",
 			    dcmd->header.cmd_status);
-			mtx_lock(&sc->mfi_io_lock);
 			mfi_release_command(cm);
-			mtx_unlock(&sc->mfi_io_lock);
+			error = EIO;
 			break;
 		}
-		mtx_lock(&sc->mfi_io_lock);
 		mfi_release_command(cm);
-		mtx_unlock(&sc->mfi_io_lock);
 
 		for (i = 0; i < el->count; i++) {
 			/*
@@ -1817,15 +1848,13 @@ mfi_parse_entries(struct mfi_softc *sc, int start_seq, int stop_seq)
 				else if (el->event[i].seq < start_seq)
 					break;
 			}
-			mtx_lock(&sc->mfi_io_lock);
 			mfi_queue_evt(sc, &el->event[i]);
-			mtx_unlock(&sc->mfi_io_lock);
 		}
 		seq = el->event[el->count - 1].seq + 1;
 	}
 
 	free(el, M_MFIBUF);
-	return (0);
+	return (error);
 }
 
 static int
@@ -1942,11 +1971,12 @@ static int mfi_add_sys_pd(struct mfi_softc *sc, int id)
 	dcmd->mbox[0]=id;
 	dcmd->header.scsi_status = 0;
 	dcmd->header.pad0 = 0;
-	if (mfi_mapcmd(sc, cm) != 0) {
+	if ((error = mfi_mapcmd(sc, cm)) != 0) {
 		device_printf(sc->mfi_dev,
 		    "Failed to get physical drive info %d\n", id);
 		free(pd_info, M_MFIBUF);
-		return (0);
+		mfi_release_command(cm);
+		return (error);
 	}
 	bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap,
 	    BUS_DMASYNC_POSTREAD);
@@ -2096,6 +2126,8 @@ mfi_build_syspdio(struct mfi_softc *sc, struct bio *bio)
 	int flags = 0, blkcount = 0, readop;
 	uint8_t cdb_len;
 
+	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
+
 	if ((cm = mfi_dequeue_free(sc)) == NULL)
 	    return (NULL);
 
@@ -2142,6 +2174,7 @@ mfi_build_syspdio(struct mfi_softc *sc, struct bio *bio)
 	cm->cm_sg = &pass->sgl;
 	cm->cm_total_frame_size = MFI_PASS_FRAME_SIZE;
 	cm->cm_flags = flags;
+
 	return (cm);
 }
 
@@ -2154,6 +2187,8 @@ mfi_build_ldio(struct mfi_softc *sc, struct bio *bio)
 	uint32_t blkcount;
 	uint32_t context = 0;
 
+	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
+
 	if ((cm = mfi_dequeue_free(sc)) == NULL)
 	    return (NULL);
 
@@ -2195,6 +2230,7 @@ mfi_build_ldio(struct mfi_softc *sc, struct bio *bio)
 	cm->cm_sg = &io->sgl;
 	cm->cm_total_frame_size = MFI_IO_FRAME_SIZE;
 	cm->cm_flags = flags;
+
 	return (cm);
 }
 
@@ -2212,11 +2248,14 @@ mfi_bio_complete(struct mfi_command *cm)
 	if ((hdr->cmd_status != MFI_STAT_OK) || (hdr->scsi_status != 0)) {
 		bio->bio_flags |= BIO_ERROR;
 		bio->bio_error = EIO;
-		device_printf(sc->mfi_dev, "I/O error, status= %d "
-		    "scsi_status= %d\n", hdr->cmd_status, hdr->scsi_status);
+		device_printf(sc->mfi_dev, "I/O error, cmd=%p, status=%#x, "
+		    "scsi_status=%#x\n", cm, hdr->cmd_status, hdr->scsi_status);
 		mfi_print_sense(cm->cm_sc, cm->cm_sense);
 	} else if (cm->cm_error != 0) {
 		bio->bio_flags |= BIO_ERROR;
+		bio->bio_error = cm->cm_error;
+		device_printf(sc->mfi_dev, "I/O error, cmd=%p, error=%#x\n",
+		    cm, cm->cm_error);
 	}
 
 	mfi_release_command(cm);
@@ -2252,6 +2291,7 @@ mfi_startio(struct mfi_softc *sc)
 
 		/* Send the command to the controller */
 		if (mfi_mapcmd(sc, cm) != 0) {
+			device_printf(sc->mfi_dev, "Failed to startio\n");
 			mfi_requeue_ready(cm);
 			break;
 		}
@@ -2280,10 +2320,7 @@ mfi_mapcmd(struct mfi_softc *sc, struct mfi_command *cm)
 			return (0);
 		}
 	} else {
-		if (sc->MFA_enabled)
-			error = mfi_tbolt_send_frame(sc, cm);
-		else
-			error = mfi_send_frame(sc, cm);
+		error = mfi_send_frame(sc, cm);
 	}
 
 	return (error);
@@ -2297,18 +2334,28 @@ mfi_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 	union mfi_sgl *sgl;
 	struct mfi_softc *sc;
 	int i, j, first, dir;
-	int sge_size;
+	int sge_size, locked;
 
 	cm = (struct mfi_command *)arg;
 	sc = cm->cm_sc;
 	hdr = &cm->cm_frame->header;
 	sgl = cm->cm_sg;
 
+	/*
+	 * We need to check if we have the lock as this is async
+	 * callback so even though our caller mfi_mapcmd asserts
+	 * it has the lock, there is no garantee that hasn't been
+	 * dropped if bus_dmamap_load returned prior to our
+	 * completion.
+	 */
+	if ((locked = mtx_owned(&sc->mfi_io_lock)) == 0)
+		mtx_lock(&sc->mfi_io_lock);
+
 	if (error) {
 		printf("error %d in callback\n", error);
 		cm->cm_error = error;
 		mfi_complete(sc, cm);
-		return;
+		goto out;
 	}
 	/* Use IEEE sgl only for IO's on a SKINNY controller
 	 * For other commands on a SKINNY controller use either
@@ -2380,10 +2427,17 @@ mfi_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 	cm->cm_total_frame_size += (sc->mfi_sge_size * nsegs);
 	cm->cm_extra_frames = (cm->cm_total_frame_size - 1) / MFI_FRAME_SIZE;
 
-	if (sc->MFA_enabled)
-			mfi_tbolt_send_frame(sc, cm);
-	else
-		mfi_send_frame(sc, cm);
+	if ((error = mfi_send_frame(sc, cm)) != 0) {
+		printf("error %d in callback from mfi_send_frame\n", error);
+		cm->cm_error = error;
+		mfi_complete(sc, cm);
+		goto out;
+	}
+
+out:
+	/* leave the lock in the state we found it */
+	if (locked == 0)
+		mtx_unlock(&sc->mfi_io_lock);
 
 	return;
 }
@@ -2391,8 +2445,26 @@ mfi_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 static int
 mfi_send_frame(struct mfi_softc *sc, struct mfi_command *cm)
 {
+	int error;
+
+	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
+
+	if (sc->MFA_enabled)
+		error = mfi_tbolt_send_frame(sc, cm);
+	else
+		error = mfi_std_send_frame(sc, cm);
+
+	if (error != 0 && (cm->cm_flags & MFI_ON_MFIQ_BUSY) != 0)
+		mfi_remove_busy(cm);
+
+	return (error);
+}
+
+static int
+mfi_std_send_frame(struct mfi_softc *sc, struct mfi_command *cm)
+{
 	struct mfi_frame_header *hdr;
-	int tm = MFI_POLL_TIMEOUT_SECS * 1000;
+	int tm = mfi_polled_cmd_timeout * 1000;
 
 	hdr = &cm->cm_frame->header;
 
@@ -2446,6 +2518,7 @@ void
 mfi_complete(struct mfi_softc *sc, struct mfi_command *cm)
 {
 	int dir;
+	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 
 	if ((cm->cm_flags & MFI_CMD_MAPPED) != 0) {
 		dir = 0;
@@ -2473,11 +2546,12 @@ mfi_abort(struct mfi_softc *sc, struct mfi_command **cm_abort)
 {
 	struct mfi_command *cm;
 	struct mfi_abort_frame *abort;
-	int i = 0;
+	int i = 0, error;
 	uint32_t context = 0;
 
 	mtx_lock(&sc->mfi_io_lock);
 	if ((cm = mfi_dequeue_free(sc)) == NULL) {
+		mtx_unlock(&sc->mfi_io_lock);
 		return (EBUSY);
 	}
 
@@ -2497,7 +2571,8 @@ mfi_abort(struct mfi_softc *sc, struct mfi_command **cm_abort)
 	cm->cm_data = NULL;
 	cm->cm_flags = MFI_CMD_POLLED;
 
-	mfi_mapcmd(sc, cm);
+	if ((error = mfi_mapcmd(sc, cm)) != 0)
+		device_printf(sc->mfi_dev, "failed to abort command\n");
 	mfi_release_command(cm);
 
 	mtx_unlock(&sc->mfi_io_lock);
@@ -2513,7 +2588,7 @@ mfi_abort(struct mfi_softc *sc, struct mfi_command **cm_abort)
 		mtx_unlock(&sc->mfi_io_lock);
 	}
 
-	return (0);
+	return (error);
 }
 
 int
@@ -2551,7 +2626,8 @@ mfi_dump_blocks(struct mfi_softc *sc, int id, uint64_t lba, void *virt,
 	cm->cm_total_frame_size = MFI_IO_FRAME_SIZE;
 	cm->cm_flags = MFI_CMD_POLLED | MFI_CMD_DATAOUT;
 
-	error = mfi_mapcmd(sc, cm);
+	if ((error = mfi_mapcmd(sc, cm)) != 0)
+		device_printf(sc->mfi_dev, "failed dump blocks\n");
 	bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap,
 	    BUS_DMASYNC_POSTWRITE);
 	bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap);
@@ -2594,7 +2670,8 @@ mfi_dump_syspd_blocks(struct mfi_softc *sc, int id, uint64_t lba, void *virt,
 	cm->cm_total_frame_size = MFI_PASS_FRAME_SIZE;
 	cm->cm_flags = MFI_CMD_POLLED | MFI_CMD_DATAOUT | MFI_CMD_SCSI;
 
-	error = mfi_mapcmd(sc, cm);
+	if ((error = mfi_mapcmd(sc, cm)) != 0)
+		device_printf(sc->mfi_dev, "failed dump blocks\n");
 	bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap,
 	    BUS_DMASYNC_POSTWRITE);
 	bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap);
@@ -3308,8 +3385,10 @@ out:
 		}
 	case MFI_SET_AEN:
 		aen = (struct mfi_ioc_aen *)arg;
+		mtx_lock(&sc->mfi_io_lock);
 		error = mfi_aen_register(sc, aen->aen_seq_num,
 		    aen->aen_class_locale);
+		mtx_unlock(&sc->mfi_io_lock);
 
 		break;
 	case MFI_LINUX_CMD_2: /* Firmware Linux ioctl shim */
@@ -3638,7 +3717,7 @@ mfi_dump_all(void)
 		deadline = time_uptime - MFI_CMD_TIMEOUT;
 		mtx_lock(&sc->mfi_io_lock);
 		TAILQ_FOREACH(cm, &sc->mfi_busy, cm_link) {
-			if (cm->cm_timestamp < deadline) {
+			if (cm->cm_timestamp <= deadline) {
 				device_printf(sc->mfi_dev,
 				    "COMMAND %p TIMEOUT AFTER %d SECONDS\n",
 				    cm, (int)(time_uptime - cm->cm_timestamp));
@@ -3649,7 +3728,7 @@ mfi_dump_all(void)
 
 #if 0
 		if (timedout)
-			MFI_DUMP_CMDS(SC);
+			MFI_DUMP_CMDS(sc);
 #endif
 
 		mtx_unlock(&sc->mfi_io_lock);
@@ -3662,7 +3741,7 @@ static void
 mfi_timeout(void *data)
 {
 	struct mfi_softc *sc = (struct mfi_softc *)data;
-	struct mfi_command *cm;
+	struct mfi_command *cm, *tmp;
 	time_t deadline;
 	int timedout = 0;
 
@@ -3674,10 +3753,10 @@ mfi_timeout(void *data)
 		}
 	}
 	mtx_lock(&sc->mfi_io_lock);
-	TAILQ_FOREACH(cm, &sc->mfi_busy, cm_link) {
+	TAILQ_FOREACH_SAFE(cm, &sc->mfi_busy, cm_link, tmp) {
 		if (sc->mfi_aen_cm == cm || sc->mfi_map_sync_cm == cm)
 			continue;
-		if (cm->cm_timestamp < deadline) {
+		if (cm->cm_timestamp <= deadline) {
 			if (sc->adpreset != 0 && sc->issuepend_done == 0) {
 				cm->cm_timestamp = time_uptime;
 			} else {
@@ -3687,6 +3766,13 @@ mfi_timeout(void *data)
 				     );
 				MFI_PRINT_CMD(cm);
 				MFI_VALIDATE_CMD(sc, cm);
+				/*
+				 * Fail the command instead of leaving it on
+				 * the queue where it could remain stuck forever
+				 */
+				mfi_remove_busy(cm);
+				cm->cm_error = ETIMEDOUT;
+				mfi_complete(sc, cm);
 				timedout++;
 			}
 		}
@@ -3694,7 +3780,7 @@ mfi_timeout(void *data)
 
 #if 0
 	if (timedout)
-		MFI_DUMP_CMDS(SC);
+		MFI_DUMP_CMDS(sc);
 #endif
 
 	mtx_unlock(&sc->mfi_io_lock);
diff --git a/sys/dev/mfi/mfi_cam.c b/sys/dev/mfi/mfi_cam.c
index 325b064..0ea2326 100644
--- a/sys/dev/mfi/mfi_cam.c
+++ b/sys/dev/mfi/mfi_cam.c
@@ -145,6 +145,7 @@ mfip_attach(device_t dev)
 				MFI_SCSI_MAX_CMDS, sc->devq);
 	if (sc->sim == NULL) {
 		cam_simq_free(sc->devq);
+		sc->devq = NULL;
 		device_printf(dev, "CAM SIM attach failed\n");
 		return (EINVAL);
 	}
@@ -155,7 +156,9 @@ mfip_attach(device_t dev)
 	if (xpt_bus_register(sc->sim, dev, 0) != 0) {
 		device_printf(dev, "XPT bus registration failed\n");
 		cam_sim_free(sc->sim, FALSE);
+		sc->sim = NULL;
 		cam_simq_free(sc->devq);
+		sc->devq = NULL;
 		mtx_unlock(&mfisc->mfi_io_lock);
 		return (EINVAL);
 	}
@@ -187,11 +190,14 @@ mfip_detach(device_t dev)
 		mtx_lock(&sc->mfi_sc->mfi_io_lock);
 		xpt_bus_deregister(cam_sim_path(sc->sim));
 		cam_sim_free(sc->sim, FALSE);
+		sc->sim = NULL;
 		mtx_unlock(&sc->mfi_sc->mfi_io_lock);
 	}
 
-	if (sc->devq != NULL)
+	if (sc->devq != NULL) {
 		cam_simq_free(sc->devq);
+		sc->devq = NULL;
+	}
 
 	return (0);
 }
diff --git a/sys/dev/mfi/mfi_debug.c b/sys/dev/mfi/mfi_debug.c
index 2e66e19..4aec4f7 100644
--- a/sys/dev/mfi/mfi_debug.c
+++ b/sys/dev/mfi/mfi_debug.c
@@ -57,14 +57,7 @@ __FBSDID("$FreeBSD$");
 static void
 mfi_print_frame_flags(device_t dev, uint32_t flags)
 {
-	device_printf(dev, "flags=%b\n", flags,
-	    "\20"
-	    "\1NOPOST"
-	    "\2SGL64"
-	    "\3SENSE64"
-	    "\4WRITE"
-	    "\5READ"
-	    "\6IEEESGL");
+	device_printf(dev, "flags=%b\n", flags, MFI_FRAME_FMT);
 }
 
 static void
@@ -205,16 +198,7 @@ mfi_print_cmd(struct mfi_command *cm)
 	device_printf(dev, "cm=%p index=%d total_frame_size=%d "
 	    "extra_frames=%d\n", cm, cm->cm_index, cm->cm_total_frame_size,
 	    cm->cm_extra_frames);
-	device_printf(dev, "flags=%b\n", cm->cm_flags,
-	    "\20"
-	    "\1MAPPED"
-	    "\2DATAIN"
-	    "\3DATAOUT"
-	    "\4COMPLETED"
-	    "\5POLLED"
-	    "\6Q_FREE"
-	    "\7Q_READY"
-	    "\10Q_BUSY");
+	device_printf(dev, "flags=%b\n", cm->cm_flags, MFI_CMD_FLAGS_FMT);
 
 	switch (cm->cm_frame->header.cmd) {
 	case MFI_CMD_DCMD:
@@ -237,7 +221,7 @@ mfi_dump_cmds(struct mfi_softc *sc)
 {
 	int i;
 
-	for (i = 0; i < sc->mfi_total_cmds; i++)
+	for (i = 0; i < sc->mfi_max_fw_cmds; i++)
 		mfi_print_generic_frame(sc, &sc->mfi_commands[i]);
 }
 
diff --git a/sys/dev/mfi/mfi_tbolt.c b/sys/dev/mfi/mfi_tbolt.c
index cce63c0..9d29ea0 100644
--- a/sys/dev/mfi/mfi_tbolt.c
+++ b/sys/dev/mfi/mfi_tbolt.c
@@ -55,14 +55,12 @@ __FBSDID("$FreeBSD$");
 #include <dev/mfi/mfi_ioctl.h>
 #include <dev/mfi/mfivar.h>
 
-struct mfi_cmd_tbolt *mfi_tbolt_get_cmd(struct mfi_softc *sc);
+struct mfi_cmd_tbolt *mfi_tbolt_get_cmd(struct mfi_softc *sc, struct mfi_command *);
 union mfi_mpi2_request_descriptor *
 mfi_tbolt_get_request_descriptor(struct mfi_softc *sc, uint16_t index);
 void mfi_tbolt_complete_cmd(struct mfi_softc *sc);
 int mfi_tbolt_build_io(struct mfi_softc *sc, struct mfi_command *mfi_cmd,
     struct mfi_cmd_tbolt *cmd);
-static inline void mfi_tbolt_return_cmd(struct mfi_softc *sc,
-    struct mfi_cmd_tbolt *cmd);
 union mfi_mpi2_request_descriptor *mfi_tbolt_build_mpt_cmd(struct mfi_softc
     *sc, struct mfi_command *cmd);
 uint8_t
@@ -84,6 +82,15 @@ static void mfi_queue_map_sync(struct mfi_softc *sc);
 
 #define MFI_FUSION_ENABLE_INTERRUPT_MASK	(0x00000008)
 
+
+extern int	mfi_polled_cmd_timeout;
+static int	mfi_fw_reset_test = 0;
+#ifdef MFI_DEBUG
+TUNABLE_INT("hw.mfi.fw_reset_test", &mfi_fw_reset_test);
+SYSCTL_INT(_hw_mfi, OID_AUTO, fw_reset_test, CTLFLAG_RWTUN, &mfi_fw_reset_test,
+           0, "Force a firmware reset condition");
+#endif
+
 void
 mfi_tbolt_enable_intr_ppc(struct mfi_softc *sc)
 {
@@ -162,14 +169,14 @@ mfi_tbolt_adp_reset(struct mfi_softc *sc)
 	while (!( HostDiag & DIAG_WRITE_ENABLE)) {
 		for (i = 0; i < 1000; i++);
 		HostDiag = (uint32_t)MFI_READ4(sc, MFI_HDR);
-		device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: retry time=%x, "
-		    "hostdiag=%x\n", retry, HostDiag);
+		device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: retry time=%d, "
+		    "hostdiag=%#x\n", retry, HostDiag);
 
 		if (retry++ >= 100)
 			return 1;
 	}
 
-	device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: HostDiag=%x\n", HostDiag);
+	device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: HostDiag=%#x\n", HostDiag);
 
 	MFI_WRITE4(sc, MFI_HDR, (HostDiag | DIAG_RESET_ADAPTER));
 
@@ -181,8 +188,8 @@ mfi_tbolt_adp_reset(struct mfi_softc *sc)
 	while (HostDiag & DIAG_RESET_ADAPTER) {
 		for (i = 0; i < 1000; i++) ;
 		HostDiag = (uint32_t)MFI_READ4(sc, MFI_RSR);
-		device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: retry time=%x, "
-		    "hostdiag=%x\n", retry, HostDiag);
+		device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: retry time=%d, "
+		    "hostdiag=%#x\n", retry, HostDiag);
 
 		if (retry++ >= 1000)
 			return 1;
@@ -311,6 +318,8 @@ mfi_tbolt_init_desc_pool(struct mfi_softc *sc, uint8_t* mem_location,
 	sc->sg_frame_busaddr = sc->reply_frame_busaddr + offset;
 	/* initialize the last_reply_idx to 0 */
 	sc->last_reply_idx = 0;
+	MFI_WRITE4(sc, MFI_RFPI, sc->mfi_max_fw_cmds - 1);
+	MFI_WRITE4(sc, MFI_RPI, sc->last_reply_idx);
 	offset = (sc->sg_frame_busaddr + (MEGASAS_MAX_SZ_CHAIN_FRAME *
 	    sc->mfi_max_fw_cmds)) - sc->mfi_tb_busaddr;
 	if (offset > tbolt_contg_length)
@@ -327,30 +336,35 @@ int
 mfi_tbolt_init_MFI_queue(struct mfi_softc *sc)
 {
 	struct MPI2_IOC_INIT_REQUEST   *mpi2IocInit;
-	struct mfi_init_frame	*mfi_init;
+	struct mfi_init_frame		*mfi_init;
 	uintptr_t			offset = 0;
 	bus_addr_t			phyAddress;
 	MFI_ADDRESS			*mfiAddressTemp;
-	struct mfi_command *cm;
+	struct mfi_command		*cm, cmd_tmp;
 	int error;
 
-	mpi2IocInit = (struct MPI2_IOC_INIT_REQUEST *)sc->mfi_tb_ioc_init_desc;
+	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
+
 	/* Check if initialization is already completed */
 	if (sc->MFA_enabled) {
+		device_printf(sc->mfi_dev, "tbolt_init already initialised!\n");
 		return 1;
 	}
 
-	mtx_lock(&sc->mfi_io_lock);
 	if ((cm = mfi_dequeue_free(sc)) == NULL) {
-		mtx_unlock(&sc->mfi_io_lock);
+		device_printf(sc->mfi_dev, "tbolt_init failed to get command "
+		    " entry!\n");
 		return (EBUSY);
 	}
+
+	cmd_tmp.cm_frame = cm->cm_frame;
+	cmd_tmp.cm_frame_busaddr = cm->cm_frame_busaddr;
+	cmd_tmp.cm_dmamap = cm->cm_dmamap;
+
 	cm->cm_frame = (union mfi_frame *)((uintptr_t)sc->mfi_tb_init);
 	cm->cm_frame_busaddr = sc->mfi_tb_init_busaddr;
 	cm->cm_dmamap = sc->mfi_tb_init_dmamap;
 	cm->cm_frame->header.context = 0;
-	cm->cm_sc = sc;
-	cm->cm_index = 0;
 
 	/*
 	 * Abuse the SG list area of the frame to hold the init_qinfo
@@ -358,6 +372,7 @@ mfi_tbolt_init_MFI_queue(struct mfi_softc *sc)
 	 */
 	mfi_init = &cm->cm_frame->init;
 
+	mpi2IocInit = (struct MPI2_IOC_INIT_REQUEST *)sc->mfi_tb_ioc_init_desc;
 	bzero(mpi2IocInit, sizeof(struct MPI2_IOC_INIT_REQUEST));
 	mpi2IocInit->Function  = MPI2_FUNCTION_IOC_INIT;
 	mpi2IocInit->WhoInit   = MPI2_WHOINIT_HOST_DRIVER;
@@ -411,23 +426,25 @@ mfi_tbolt_init_MFI_queue(struct mfi_softc *sc)
 	if ((error = mfi_mapcmd(sc, cm)) != 0) {
 		device_printf(sc->mfi_dev, "failed to send IOC init2 "
 		    "command %d at %lx\n", error, (long)cm->cm_frame_busaddr);
-		mfi_release_command(cm);
-		mtx_unlock(&sc->mfi_io_lock);
-		return (error);
+		goto out;
 	}
-	mfi_release_command(cm);
-	mtx_unlock(&sc->mfi_io_lock);
 
-	if (mfi_init->header.cmd_status == 0) {
+	if (mfi_init->header.cmd_status == MFI_STAT_OK) {
 		sc->MFA_enabled = 1;
-	}
-	else {
-		device_printf(sc->mfi_dev, "Init command Failed %x\n",
+	} else {
+		device_printf(sc->mfi_dev, "Init command Failed %#x\n",
 		    mfi_init->header.cmd_status);
-		return 1;
+		error = mfi_init->header.cmd_status;
+		goto out;
 	}
 
-	return 0;
+out:
+	cm->cm_frame = cmd_tmp.cm_frame;
+	cm->cm_frame_busaddr = cmd_tmp.cm_frame_busaddr;
+	cm->cm_dmamap = cmd_tmp.cm_dmamap;
+	mfi_release_command(cm);
+
+	return (error);
 
 }
 
@@ -447,13 +464,21 @@ mfi_tbolt_alloc_cmd(struct mfi_softc *sc)
 	sc->request_desc_pool = malloc(sizeof(
 	    union mfi_mpi2_request_descriptor) * sc->mfi_max_fw_cmds,
 	    M_MFIBUF, M_NOWAIT|M_ZERO);
+
+	if (sc->request_desc_pool == NULL) {
+		device_printf(sc->mfi_dev, "Could not alloc "
+		    "memory for request_desc_pool\n");
+		return (ENOMEM);
+	}
+
 	sc->mfi_cmd_pool_tbolt = malloc(sizeof(struct mfi_cmd_tbolt*)
 	    * sc->mfi_max_fw_cmds, M_MFIBUF, M_NOWAIT|M_ZERO);
 
-	if (!sc->mfi_cmd_pool_tbolt) {
-		device_printf(sc->mfi_dev, "out of memory. Could not alloc "
-		    "memory for cmd_list_fusion\n");
-		return 1;
+	if (sc->mfi_cmd_pool_tbolt == NULL) {
+		free(sc->request_desc_pool, M_MFIBUF);
+		device_printf(sc->mfi_dev, "Could not alloc "
+		    "memory for cmd_pool_tbolt\n");
+		return (ENOMEM);
 	}
 
 	for (i = 0; i < sc->mfi_max_fw_cmds; i++) {
@@ -461,20 +486,24 @@ mfi_tbolt_alloc_cmd(struct mfi_softc *sc)
 		    struct mfi_cmd_tbolt),M_MFIBUF, M_NOWAIT|M_ZERO);
 
 		if (!sc->mfi_cmd_pool_tbolt[i]) {
-			device_printf(sc->mfi_dev, "Could not alloc cmd list "
-			    "fusion\n");
+			device_printf(sc->mfi_dev, "Could not alloc "
+			    "cmd_pool_tbolt entry\n");
 
 			for (j = 0; j < i; j++)
 				free(sc->mfi_cmd_pool_tbolt[j], M_MFIBUF);
 
+			free(sc->request_desc_pool, M_MFIBUF);
+			sc->request_desc_pool = NULL;
 			free(sc->mfi_cmd_pool_tbolt, M_MFIBUF);
 			sc->mfi_cmd_pool_tbolt = NULL;
+
+			return (ENOMEM);
 		}
 	}
 
 	/*
 	 * The first 256 bytes (SMID 0) is not used. Don't add to the cmd
-	 *list
+	 * list
 	 */
 	io_req_base = sc->request_message_pool_align
 		+ MEGASAS_THUNDERBOLT_NEW_MSG_SIZE;
@@ -520,7 +549,8 @@ mfi_tbolt_reset(struct mfi_softc *sc)
 
 	if (sc->mfi_flags & MFI_FLAGS_TBOLT) {
 		fw_state = sc->mfi_read_fw_status(sc);
-		if ((fw_state & MFI_FWSTATE_FAULT) == MFI_FWSTATE_FAULT) {
+		if ((fw_state & MFI_FWSTATE_FAULT) == MFI_FWSTATE_FAULT ||
+		    mfi_fw_reset_test) {
 			if ((sc->disableOnlineCtrlReset == 0)
 			    && (sc->adpreset == 0)) {
 				device_printf(sc->mfi_dev, "Adapter RESET "
@@ -554,8 +584,7 @@ mfi_intr_tbolt(void *arg)
 		return;
 	mtx_lock(&sc->mfi_io_lock);
 	mfi_tbolt_complete_cmd(sc);
-	if (sc->mfi_flags & MFI_FLAGS_QFRZN)
-		sc->mfi_flags &= ~MFI_FLAGS_QFRZN;
+	sc->mfi_flags &= ~MFI_FLAGS_QFRZN;
 	mfi_startio(sc);
 	mtx_unlock(&sc->mfi_io_lock);
 	return;
@@ -573,58 +602,63 @@ map_tbolt_cmd_status(struct mfi_command *mfi_cmd, uint8_t status,
     uint8_t ext_status)
 {
 	switch (status) {
-		case MFI_STAT_OK:
-			mfi_cmd->cm_frame->header.cmd_status = MFI_STAT_OK;
-			mfi_cmd->cm_frame->dcmd.header.cmd_status = MFI_STAT_OK;
-			mfi_cmd->cm_error = MFI_STAT_OK;
-			break;
-
-		case MFI_STAT_SCSI_IO_FAILED:
-		case MFI_STAT_LD_INIT_IN_PROGRESS:
-			mfi_cmd->cm_frame->header.cmd_status = status;
-			mfi_cmd->cm_frame->header.scsi_status = ext_status;
-			mfi_cmd->cm_frame->dcmd.header.cmd_status = status;
-			mfi_cmd->cm_frame->dcmd.header.scsi_status
-			    = ext_status;
-			break;
-
-		case MFI_STAT_SCSI_DONE_WITH_ERROR:
-			mfi_cmd->cm_frame->header.cmd_status = ext_status;
-			mfi_cmd->cm_frame->dcmd.header.cmd_status = ext_status;
-			break;
-
-		case MFI_STAT_LD_OFFLINE:
-		case MFI_STAT_DEVICE_NOT_FOUND:
-			mfi_cmd->cm_frame->header.cmd_status = status;
-			mfi_cmd->cm_frame->dcmd.header.cmd_status = status;
-			break;
-
-		default:
-			mfi_cmd->cm_frame->header.cmd_status = status;
-			mfi_cmd->cm_frame->dcmd.header.cmd_status = status;
-			break;
-		}
+	case MFI_STAT_OK:
+		mfi_cmd->cm_frame->header.cmd_status = MFI_STAT_OK;
+		mfi_cmd->cm_frame->dcmd.header.cmd_status = MFI_STAT_OK;
+		mfi_cmd->cm_error = MFI_STAT_OK;
+		break;
+
+	case MFI_STAT_SCSI_IO_FAILED:
+	case MFI_STAT_LD_INIT_IN_PROGRESS:
+		mfi_cmd->cm_frame->header.cmd_status = status;
+		mfi_cmd->cm_frame->header.scsi_status = ext_status;
+		mfi_cmd->cm_frame->dcmd.header.cmd_status = status;
+		mfi_cmd->cm_frame->dcmd.header.scsi_status
+		    = ext_status;
+		break;
+
+	case MFI_STAT_SCSI_DONE_WITH_ERROR:
+		mfi_cmd->cm_frame->header.cmd_status = ext_status;
+		mfi_cmd->cm_frame->dcmd.header.cmd_status = ext_status;
+		break;
+
+	case MFI_STAT_LD_OFFLINE:
+	case MFI_STAT_DEVICE_NOT_FOUND:
+		mfi_cmd->cm_frame->header.cmd_status = status;
+		mfi_cmd->cm_frame->dcmd.header.cmd_status = status;
+		break;
+
+	default:
+		mfi_cmd->cm_frame->header.cmd_status = status;
+		mfi_cmd->cm_frame->dcmd.header.cmd_status = status;
+		break;
+	}
 }
 
 /*
  * mfi_tbolt_return_cmd -	Return a cmd to free command pool
  * @instance:		Adapter soft state
- * @cmd:		Command packet to be returned to free command pool
+ * @tbolt_cmd:		Tbolt command packet to be returned to free command pool
+ * @mfi_cmd:		Oning MFI command packe
  */
-static inline void
-mfi_tbolt_return_cmd(struct mfi_softc *sc, struct mfi_cmd_tbolt *cmd)
+void
+mfi_tbolt_return_cmd(struct mfi_softc *sc, struct mfi_cmd_tbolt *tbolt_cmd,
+    struct mfi_command *mfi_cmd)
 {
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 
-	cmd->sync_cmd_idx = sc->mfi_max_fw_cmds;
-	TAILQ_INSERT_TAIL(&sc->mfi_cmd_tbolt_tqh, cmd, next);
+	mfi_cmd->cm_flags &= ~MFI_CMD_TBOLT;
+	mfi_cmd->cm_extra_frames = 0;
+	tbolt_cmd->sync_cmd_idx = sc->mfi_max_fw_cmds;
+
+	TAILQ_INSERT_TAIL(&sc->mfi_cmd_tbolt_tqh, tbolt_cmd, next);
 }
 
 void
 mfi_tbolt_complete_cmd(struct mfi_softc *sc)
 {
 	struct mfi_mpi2_reply_header *desc, *reply_desc;
-	struct mfi_command *cmd_mfi, *cmd_mfi_check;	/* For MFA Cmds */
+	struct mfi_command *cmd_mfi;	/* For MFA Cmds */
 	struct mfi_cmd_tbolt *cmd_tbolt;
 	uint16_t smid;
 	uint8_t reply_descript_type;
@@ -632,14 +666,17 @@ mfi_tbolt_complete_cmd(struct mfi_softc *sc)
 	uint32_t status, extStatus;
 	uint16_t num_completed;
 	union desc_value val;
+	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 
 	desc = (struct mfi_mpi2_reply_header *)
 		((uintptr_t)sc->reply_frame_pool_align
 		+ sc->last_reply_idx * sc->reply_size);
 	reply_desc = desc;
 
-	if (!reply_desc)
+	if (reply_desc == NULL) {
 		device_printf(sc->mfi_dev, "reply desc is NULL!!\n");
+		return;
+	}
 
 	reply_descript_type = reply_desc->ReplyFlags
 	     & MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK;
@@ -652,13 +689,18 @@ mfi_tbolt_complete_cmd(struct mfi_softc *sc)
 	/* Read Reply descriptor */
 	while ((val.u.low != 0xFFFFFFFF) && (val.u.high != 0xFFFFFFFF)) {
 		smid = reply_desc->SMID;
-		if (!smid || smid > sc->mfi_max_fw_cmds + 1) {
-			device_printf(sc->mfi_dev, "smid is %x. Cannot "
-			    "proceed. Returning \n", smid);
-			return;
+		if (smid == 0 || smid > sc->mfi_max_fw_cmds) {
+			device_printf(sc->mfi_dev, "smid is %d cannot "
+			    "proceed - skipping\n", smid);
+			goto next;
 		}
-
 		cmd_tbolt = sc->mfi_cmd_pool_tbolt[smid - 1];
+		if (cmd_tbolt->sync_cmd_idx == sc->mfi_max_fw_cmds) {
+			device_printf(sc->mfi_dev, "cmd_tbolt %p "
+			    "has invalid sync_cmd_idx=%d - skipping\n",
+			    cmd_tbolt, cmd_tbolt->sync_cmd_idx);
+			goto next;
+		}
 		cmd_mfi = &sc->mfi_commands[cmd_tbolt->sync_cmd_idx];
 		scsi_io_req = cmd_tbolt->io_request;
 
@@ -666,33 +708,30 @@ mfi_tbolt_complete_cmd(struct mfi_softc *sc)
 		extStatus = cmd_mfi->cm_frame->dcmd.header.scsi_status;
 		map_tbolt_cmd_status(cmd_mfi, status, extStatus);
 
-		if (cmd_mfi->cm_flags & MFI_CMD_SCSI &&
+		/* mfi_tbolt_return_cmd is handled by mfi complete / return */
+		if ((cmd_mfi->cm_flags & MFI_CMD_SCSI) != 0 &&
 		    (cmd_mfi->cm_flags & MFI_CMD_POLLED) != 0) {
 			/* polled LD/SYSPD IO command */
-			mfi_tbolt_return_cmd(sc, cmd_tbolt);
 			/* XXX mark okay for now DJA */
 			cmd_mfi->cm_frame->header.cmd_status = MFI_STAT_OK;
-		} else {
 
+		} else {
 			/* remove command from busy queue if not polled */
-			TAILQ_FOREACH(cmd_mfi_check, &sc->mfi_busy, cm_link) {
-				if (cmd_mfi_check == cmd_mfi) {
-					mfi_remove_busy(cmd_mfi);
-					break;
-				}
-			}
+			if ((cmd_mfi->cm_flags & MFI_ON_MFIQ_BUSY) != 0)
+				mfi_remove_busy(cmd_mfi);
 
 			/* complete the command */
 			mfi_complete(sc, cmd_mfi);
-			mfi_tbolt_return_cmd(sc, cmd_tbolt);
 		}
 
+next:
 		sc->last_reply_idx++;
 		if (sc->last_reply_idx >= sc->mfi_max_fw_cmds) {
 			MFI_WRITE4(sc, MFI_RPI, sc->last_reply_idx);
 			sc->last_reply_idx = 0;
 		}
-		/*set it back to all 0xfff.*/
+
+		/* Set it back to all 0xfff */
 		((union mfi_mpi2_reply_descriptor*)desc)->words =
 			~((uint64_t)0x00);
 
@@ -728,17 +767,23 @@ mfi_tbolt_complete_cmd(struct mfi_softc *sc)
  */
 
 struct mfi_cmd_tbolt *
-mfi_tbolt_get_cmd(struct mfi_softc *sc)
+mfi_tbolt_get_cmd(struct mfi_softc *sc, struct mfi_command *mfi_cmd)
 {
 	struct mfi_cmd_tbolt *cmd = NULL;
 
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 
-	cmd = TAILQ_FIRST(&sc->mfi_cmd_tbolt_tqh);
+	if ((cmd = TAILQ_FIRST(&sc->mfi_cmd_tbolt_tqh)) == NULL)
+		return (NULL);
 	TAILQ_REMOVE(&sc->mfi_cmd_tbolt_tqh, cmd, next);
 	memset((uint8_t *)cmd->sg_frame, 0, MEGASAS_MAX_SZ_CHAIN_FRAME);
 	memset((uint8_t *)cmd->io_request, 0,
 	    MEGASAS_THUNDERBOLT_NEW_MSG_SIZE);
+
+	cmd->sync_cmd_idx = mfi_cmd->cm_index;
+	mfi_cmd->cm_extra_frames = cmd->index; /* Frame count used as SMID */
+	mfi_cmd->cm_flags |= MFI_CMD_TBOLT;
+
 	return cmd;
 }
 
@@ -767,11 +812,9 @@ mfi_build_mpt_pass_thru(struct mfi_softc *sc, struct mfi_command *mfi_cmd)
 	struct mfi_mpi2_request_raid_scsi_io *io_req;
 	struct mfi_cmd_tbolt *cmd;
 
-	cmd = mfi_tbolt_get_cmd(sc);
+	cmd = mfi_tbolt_get_cmd(sc, mfi_cmd);
 	if (!cmd)
 		return EBUSY;
-	mfi_cmd->cm_extra_frames = cmd->index; /* Frame count used as SMID */
-	cmd->sync_cmd_idx = mfi_cmd->cm_index;
 	io_req = cmd->io_request;
 	mpi25_ieee_chain = (MPI25_IEEE_SGE_CHAIN64 *)&io_req->SGL.IeeeChain;
 
@@ -980,16 +1023,21 @@ mfi_build_and_issue_cmd(struct mfi_softc *sc, struct mfi_command *mfi_cmd)
 	struct mfi_cmd_tbolt *cmd;
 	union mfi_mpi2_request_descriptor *req_desc = NULL;
 	uint16_t index;
-	cmd = mfi_tbolt_get_cmd(sc);
-	if (!cmd)
-		return NULL;
-	mfi_cmd->cm_extra_frames = cmd->index;
-	cmd->sync_cmd_idx = mfi_cmd->cm_index;
+	cmd = mfi_tbolt_get_cmd(sc, mfi_cmd);
+	if (cmd == NULL)
+		return (NULL);
 
 	index = cmd->index;
 	req_desc = mfi_tbolt_get_request_descriptor(sc, index-1);
-	if (mfi_tbolt_build_io(sc, mfi_cmd, cmd))
-		return NULL;
+	if (req_desc == NULL) {
+		mfi_tbolt_return_cmd(sc, cmd, mfi_cmd);
+		return (NULL);
+	}
+
+	if (mfi_tbolt_build_io(sc, mfi_cmd, cmd) != 0) {
+		mfi_tbolt_return_cmd(sc, cmd, mfi_cmd);
+		return (NULL);
+	}
 	req_desc->header.SMID = index;
 	return req_desc;
 }
@@ -1008,7 +1056,7 @@ mfi_tbolt_build_mpt_cmd(struct mfi_softc *sc, struct mfi_command *cmd)
 	index = cmd->cm_extra_frames;
 
 	req_desc = mfi_tbolt_get_request_descriptor(sc, index - 1);
-	if (!req_desc)
+	if (req_desc == NULL)
 		return NULL;
 
 	bzero(req_desc, sizeof(*req_desc));
@@ -1024,7 +1072,7 @@ mfi_tbolt_send_frame(struct mfi_softc *sc, struct mfi_command *cm)
 	struct mfi_frame_header *hdr;
 	uint8_t *cdb;
 	union mfi_mpi2_request_descriptor *req_desc = NULL;
-	int tm = MFI_POLL_TIMEOUT_SECS * 1000;
+	int tm = mfi_polled_cmd_timeout * 1000;
 
 	hdr = &cm->cm_frame->header;
 	cdb = cm->cm_frame->pass.cdb;
@@ -1058,9 +1106,8 @@ mfi_tbolt_send_frame(struct mfi_softc *sc, struct mfi_command *cm)
 			return 1;
 		}
 	} else if ((req_desc = mfi_tbolt_build_mpt_cmd(sc, cm)) == NULL) {
-			device_printf(sc->mfi_dev, "Mapping from MFI to MPT "
-			    "Failed\n");
-			return 1;
+		device_printf(sc->mfi_dev, "Mapping from MFI to MPT Failed\n");
+		return (1);
 	}
 
 	if (cm->cm_flags & MFI_CMD_SCSI) {
@@ -1078,23 +1125,30 @@ mfi_tbolt_send_frame(struct mfi_softc *sc, struct mfi_command *cm)
 	if ((cm->cm_flags & MFI_CMD_POLLED) == 0)
 		return 0;
 
-	if (cm->cm_flags & MFI_CMD_SCSI) {
-		/* check reply queue */
-		mfi_tbolt_complete_cmd(sc);
-	}
-
-	/* This is a polled command, so busy-wait for it to complete. */
+	/*
+	 * This is a polled command, so busy-wait for it to complete.
+	 *
+	 * The value of hdr->cmd_status is updated directly by the hardware
+	 * so there is no garantee that mfi_tbolt_complete_cmd is called
+	 * prior to this value changing.
+	 */
 	while (hdr->cmd_status == MFI_STAT_INVALID_STATUS) {
 		DELAY(1000);
 		tm -= 1;
 		if (tm <= 0)
 			break;
 		if (cm->cm_flags & MFI_CMD_SCSI) {
-			/* check reply queue */
+			/*
+			 * Force check reply queue.
+			 * This ensures that dump works correctly
+			 */
 			mfi_tbolt_complete_cmd(sc);
 		}
 	}
 
+	/* ensure the command cleanup has been processed before returning */
+	mfi_tbolt_complete_cmd(sc);
+
 	if (hdr->cmd_status == MFI_STAT_INVALID_STATUS) {
 		device_printf(sc->mfi_dev, "Frame %p timed out "
 		    "command 0x%X\n", hdr, cm->cm_frame->dcmd.opcode);
@@ -1104,9 +1158,10 @@ mfi_tbolt_send_frame(struct mfi_softc *sc, struct mfi_command *cm)
 }
 
 static void
-mfi_issue_pending_cmds_again (struct mfi_softc *sc)
+mfi_issue_pending_cmds_again(struct mfi_softc *sc)
 {
 	struct mfi_command *cm, *tmp;
+	struct mfi_cmd_tbolt *cmd;
 
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 	TAILQ_FOREACH_REVERSE_SAFE(cm, &sc->mfi_busy, BUSYQ, cm_link, tmp) {
@@ -1119,50 +1174,51 @@ mfi_issue_pending_cmds_again (struct mfi_softc *sc)
 		 * should be performed on the controller
 		 */
 		if (cm->retry_for_fw_reset == 3) {
-			device_printf(sc->mfi_dev, "megaraid_sas: command %d "
-			    "was tried multiple times during adapter reset"
-			    "Shutting down the HBA\n", cm->cm_index);
+			device_printf(sc->mfi_dev, "megaraid_sas: command %p "
+			    "index=%d was tried multiple times during adapter "
+			    "reset - Shutting down the HBA\n", cm, cm->cm_index);
 			mfi_kill_hba(sc);
 			sc->hw_crit_error = 1;
 			return;
 		}
 
-		if ((cm->cm_flags & MFI_ON_MFIQ_BUSY) != 0) {
-			struct mfi_cmd_tbolt *cmd;
-			mfi_remove_busy(cm);
-			cmd = sc->mfi_cmd_pool_tbolt[cm->cm_extra_frames -
-			    1 ];
-			mfi_tbolt_return_cmd(sc, cmd);
-			if ((cm->cm_flags & MFI_ON_MFIQ_MASK) == 0) {
-				if (cm->cm_frame->dcmd.opcode !=
-				    MFI_DCMD_CTRL_EVENT_WAIT) {
-					device_printf(sc->mfi_dev,
-					    "APJ ****requeue command %d \n",
-					    cm->cm_index);
-					mfi_requeue_ready(cm);
-				}
+		mfi_remove_busy(cm);
+		if ((cm->cm_flags & MFI_CMD_TBOLT) != 0) {
+			if (cm->cm_extra_frames != 0 && cm->cm_extra_frames <=
+			    sc->mfi_max_fw_cmds) {
+				cmd = sc->mfi_cmd_pool_tbolt[cm->cm_extra_frames - 1];
+				mfi_tbolt_return_cmd(sc, cmd, cm);
+			} else {
+				device_printf(sc->mfi_dev,
+				    "Invalid extra_frames: %d detected\n",
+				    cm->cm_extra_frames);
 			}
-			else
-				mfi_release_command(cm);
 		}
+
+		if (cm->cm_frame->dcmd.opcode != MFI_DCMD_CTRL_EVENT_WAIT) {
+			device_printf(sc->mfi_dev,
+			    "APJ ****requeue command %p index=%d\n",
+			    cm, cm->cm_index);
+			mfi_requeue_ready(cm);
+		} else
+			mfi_release_command(cm);
 	}
 	mfi_startio(sc);
 }
 
 static void
-mfi_kill_hba (struct mfi_softc *sc)
+mfi_kill_hba(struct mfi_softc *sc)
 {
 	if (sc->mfi_flags & MFI_FLAGS_TBOLT)
-		MFI_WRITE4 (sc, 0x00,MFI_STOP_ADP);
+		MFI_WRITE4(sc, 0x00, MFI_STOP_ADP);
 	else
-		MFI_WRITE4 (sc, MFI_IDB,MFI_STOP_ADP);
+		MFI_WRITE4(sc, MFI_IDB, MFI_STOP_ADP);
 }
 
 static void
 mfi_process_fw_state_chg_isr(void *arg)
 {
 	struct mfi_softc *sc= (struct mfi_softc *)arg;
-	struct mfi_cmd_tbolt *cmd;
 	int error, status;
 
 	if (sc->adpreset == 1) {
@@ -1191,26 +1247,32 @@ mfi_process_fw_state_chg_isr(void *arg)
 			device_printf(sc->mfi_dev, "controller is not in "
 			    "ready state\n");
 			mfi_kill_hba(sc);
-			sc->hw_crit_error= 1;
-			return ;
+			sc->hw_crit_error = 1;
+			return;
+		}
+		if ((error = mfi_tbolt_init_MFI_queue(sc)) != 0) {
+			device_printf(sc->mfi_dev, "Failed to initialise MFI "
+			    "queue\n");
+			mfi_kill_hba(sc);
+			sc->hw_crit_error = 1;
+			return;
 		}
-		if ((error = mfi_tbolt_init_MFI_queue(sc)) != 0)
-				return;
 
-		mtx_lock(&sc->mfi_io_lock);
+		/* Init last reply index and max */
+		MFI_WRITE4(sc, MFI_RFPI, sc->mfi_max_fw_cmds - 1);
+		MFI_WRITE4(sc, MFI_RPI, sc->last_reply_idx);
 
 		sc->mfi_enable_intr(sc);
 		sc->adpreset = 0;
-		free(sc->mfi_aen_cm->cm_data, M_MFIBUF);
-		mfi_remove_busy(sc->mfi_aen_cm);
-		cmd = sc->mfi_cmd_pool_tbolt[sc->mfi_aen_cm->cm_extra_frames
-		    - 1];
-		mfi_tbolt_return_cmd(sc, cmd);
-		if (sc->mfi_aen_cm) {
+		if (sc->mfi_aen_cm != NULL) {
+			free(sc->mfi_aen_cm->cm_data, M_MFIBUF);
+			mfi_remove_busy(sc->mfi_aen_cm);
 			mfi_release_command(sc->mfi_aen_cm);
 			sc->mfi_aen_cm = NULL;
 		}
-		if (sc->mfi_map_sync_cm) {
+
+		if (sc->mfi_map_sync_cm != NULL) {
+			mfi_remove_busy(sc->mfi_map_sync_cm);
 			mfi_release_command(sc->mfi_map_sync_cm);
 			sc->mfi_map_sync_cm = NULL;
 		}
@@ -1223,9 +1285,12 @@ mfi_process_fw_state_chg_isr(void *arg)
 		 */
 		if (!sc->hw_crit_error) {
 			/*
-			 * Initiate AEN (Asynchronous Event Notification)
+			 * Initiate AEN (Asynchronous Event Notification) &
+			 * Sync Map
 			 */
 			mfi_aen_setup(sc, sc->last_seq_num);
+			mfi_tbolt_sync_map_info(sc);
+
 			sc->issuepend_done = 1;
 			device_printf(sc->mfi_dev, "second stage of reset "
 			    "complete, FW is ready now.\n");
@@ -1237,7 +1302,6 @@ mfi_process_fw_state_chg_isr(void *arg)
 		device_printf(sc->mfi_dev, "mfi_process_fw_state_chg_isr "
 		    "called with unhandled value:%d\n", sc->adpreset);
 	}
-	mtx_unlock(&sc->mfi_io_lock);
 }
 
 /*
@@ -1276,25 +1340,27 @@ void
 mfi_tbolt_sync_map_info(struct mfi_softc *sc)
 {
 	int error = 0, i;
-	struct mfi_command *cmd;
-	struct mfi_dcmd_frame *dcmd;
+	struct mfi_command *cmd = NULL;
+	struct mfi_dcmd_frame *dcmd = NULL;
 	uint32_t context = 0;
-	union mfi_ld_ref *ld_sync;
+	union mfi_ld_ref *ld_sync = NULL;
 	size_t ld_size;
 	struct mfi_frame_header *hdr;
 	struct mfi_command *cm = NULL;
 	struct mfi_ld_list *list = NULL;
 
+	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
+
 	if (sc->mfi_map_sync_cm != NULL || sc->cm_map_abort)
 		return;
 
-	mtx_lock(&sc->mfi_io_lock);
 	error = mfi_dcmd_command(sc, &cm, MFI_DCMD_LD_GET_LIST,
 	    (void **)&list, sizeof(*list));
 	if (error)
 		goto out;
 
 	cm->cm_flags = MFI_CMD_POLLED | MFI_CMD_DATAIN;
+
 	if (mfi_wait_command(sc, cm) != 0) {
 		device_printf(sc->mfi_dev, "Failed to get device listing\n");
 		goto out;
@@ -1308,18 +1374,15 @@ mfi_tbolt_sync_map_info(struct mfi_softc *sc)
 	}
 
 	ld_size = sizeof(*ld_sync) * list->ld_count;
-	mtx_unlock(&sc->mfi_io_lock);
 	ld_sync = (union mfi_ld_ref *) malloc(ld_size, M_MFIBUF,
-	     M_WAITOK | M_ZERO);
+	     M_NOWAIT | M_ZERO);
 	if (ld_sync == NULL) {
 		device_printf(sc->mfi_dev, "Failed to allocate sync\n");
 		goto out;
 	}
-	for (i = 0; i < list->ld_count; i++) {
+	for (i = 0; i < list->ld_count; i++)
 		ld_sync[i].ref = list->ld_list[i].ld.ref;
-	}
 
-	mtx_lock(&sc->mfi_io_lock);
 	if ((cmd = mfi_dequeue_free(sc)) == NULL) {
 		device_printf(sc->mfi_dev, "Failed to get command\n");
 		free(ld_sync, M_MFIBUF);
@@ -1355,7 +1418,7 @@ mfi_tbolt_sync_map_info(struct mfi_softc *sc)
 		device_printf(sc->mfi_dev, "failed to send map sync\n");
 		free(ld_sync, M_MFIBUF);
 		sc->mfi_map_sync_cm = NULL;
-		mfi_requeue_ready(cmd);
+		mfi_release_command(cmd);
 		goto out;
 	}
 
@@ -1364,7 +1427,6 @@ out:
 		free(list, M_MFIBUF);
 	if (cm)
 		mfi_release_command(cm);
-	mtx_unlock(&sc->mfi_io_lock);
 }
 
 static void
@@ -1389,14 +1451,13 @@ mfi_sync_map_complete(struct mfi_command *cm)
 	}
 
 	free(cm->cm_data, M_MFIBUF);
-	sc->mfi_map_sync_cm = NULL;
 	wakeup(&sc->mfi_map_sync_cm);
+	sc->mfi_map_sync_cm = NULL;
 	mfi_release_command(cm);
 
 	/* set it up again so the driver can catch more events */
-	if (!aborted) {
+	if (!aborted)
 		mfi_queue_map_sync(sc);
-	}
 }
 
 static void
@@ -1412,5 +1473,7 @@ mfi_handle_map_sync(void *context, int pending)
 	struct mfi_softc *sc;
 
 	sc = context;
+	mtx_lock(&sc->mfi_io_lock);
 	mfi_tbolt_sync_map_info(sc);
+	mtx_unlock(&sc->mfi_io_lock);
 }
diff --git a/sys/dev/mfi/mfireg.h b/sys/dev/mfi/mfireg.h
index dab9cf7..52ddafe 100644
--- a/sys/dev/mfi/mfireg.h
+++ b/sys/dev/mfi/mfireg.h
@@ -86,6 +86,7 @@ __FBSDID("$FreeBSD$");
 *  ThunderBolt specific Register
 */
 
+#define MFI_RFPI	0x48 		/* reply_free_post_host_index */
 #define MFI_RPI		0x6c 		/* reply_post_host_index */
 #define MFI_ILQP 	0xc0		/* inbound_low_queue_port */
 #define MFI_IHQP 	0xc4		/* inbound_high_queue_port */
@@ -259,6 +260,13 @@ typedef enum {
 #define MFI_FRAME_DIR_READ			0x0010
 #define MFI_FRAME_DIR_BOTH			0x0018
 #define MFI_FRAME_IEEE_SGL			0x0020
+#define MFI_FRAME_FMT "\20" \
+    "\1NOPOST" \
+    "\2SGL64" \
+    "\3SENSE64" \
+    "\4WRITE" \
+    "\5READ" \
+    "\6IEEESGL"
 
 /* ThunderBolt Specific */
 
@@ -456,8 +464,8 @@ typedef enum {
 #define MFI_FRAME_SIZE		64
 #define MFI_MBOX_SIZE		12
 
-/* Firmware flashing can take 40s */
-#define MFI_POLL_TIMEOUT_SECS	50
+/* Firmware flashing can take 50+ seconds */
+#define MFI_POLL_TIMEOUT_SECS	60
 
 /* Allow for speedier math calculations */
 #define MFI_SECTOR_LEN		512
diff --git a/sys/dev/mfi/mfivar.h b/sys/dev/mfi/mfivar.h
index bb2a324..664ede9 100644
--- a/sys/dev/mfi/mfivar.h
+++ b/sys/dev/mfi/mfivar.h
@@ -102,12 +102,25 @@ struct mfi_command {
 #define MFI_CMD_DATAOUT		(1<<2)
 #define MFI_CMD_COMPLETED	(1<<3)
 #define MFI_CMD_POLLED		(1<<4)
-#define MFI_ON_MFIQ_FREE	(1<<5)
-#define MFI_ON_MFIQ_READY	(1<<6)
-#define MFI_ON_MFIQ_BUSY	(1<<7)
-#define MFI_ON_MFIQ_MASK	((1<<5)|(1<<6)|(1<<7))
-#define MFI_CMD_SCSI		(1<<8)
-#define MFI_CMD_CCB		(1<<9)
+#define MFI_CMD_SCSI		(1<<5)
+#define MFI_CMD_CCB		(1<<6)
+#define MFI_CMD_TBOLT		(1<<7)
+#define MFI_ON_MFIQ_FREE	(1<<8)
+#define MFI_ON_MFIQ_READY	(1<<9)
+#define MFI_ON_MFIQ_BUSY	(1<<10)
+#define MFI_ON_MFIQ_MASK	(MFI_ON_MFIQ_FREE | MFI_ON_MFIQ_READY| \
+    MFI_ON_MFIQ_BUSY)
+#define MFI_CMD_FLAGS_FMT	"\20" \
+    "\1MAPPED" \
+    "\2DATAIN" \
+    "\3DATAOUT" \
+    "\4COMPLETED" \
+    "\5POLLED" \
+    "\6SCSI" \
+    "\7TBOLT" \
+    "\10Q_FREE" \
+    "\11Q_READY" \
+    "\12Q_BUSY"
 	uint8_t			retry_for_fw_reset;
 	void			(* cm_complete)(struct mfi_command *cm);
 	void			*cm_private;
@@ -268,10 +281,6 @@ struct mfi_softc {
 	 */
 	struct mfi_command		*mfi_commands;
 	/*
-	 * How many commands were actually allocated
-	 */
-	int				mfi_total_cmds;
-	/*
 	 * How many commands the firmware can handle.  Also how big the reply
 	 * queue is, minus 1.
 	 */
@@ -470,9 +479,8 @@ extern int mfi_build_cdb(int, uint8_t, u_int64_t, u_int32_t, uint8_t *);
 	mfi_enqueue_ ## name (struct mfi_command *cm)			\
 	{								\
 		if ((cm->cm_flags & MFI_ON_MFIQ_MASK) != 0) {		\
-			printf("command %p is on another queue, "	\
+			panic("command %p is on another queue, "	\
 			    "flags = %#x\n", cm, cm->cm_flags);		\
-			panic("command is on another queue");		\
 		}							\
 		TAILQ_INSERT_TAIL(&cm->cm_sc->mfi_ ## name, cm, cm_link); \
 		cm->cm_flags |= MFI_ON_ ## index;			\
@@ -482,9 +490,8 @@ extern int mfi_build_cdb(int, uint8_t, u_int64_t, u_int32_t, uint8_t *);
 	mfi_requeue_ ## name (struct mfi_command *cm)			\
 	{								\
 		if ((cm->cm_flags & MFI_ON_MFIQ_MASK) != 0) {		\
-			printf("command %p is on another queue, "	\
+			panic("command %p is on another queue, "	\
 			    "flags = %#x\n", cm, cm->cm_flags);		\
-			panic("command is on another queue");		\
 		}							\
 		TAILQ_INSERT_HEAD(&cm->cm_sc->mfi_ ## name, cm, cm_link); \
 		cm->cm_flags |= MFI_ON_ ## index;			\
@@ -497,10 +504,9 @@ extern int mfi_build_cdb(int, uint8_t, u_int64_t, u_int32_t, uint8_t *);
 									\
 		if ((cm = TAILQ_FIRST(&sc->mfi_ ## name)) != NULL) {	\
 			if ((cm->cm_flags & MFI_ON_ ## index) == 0) {	\
-				printf("command %p not in queue, "	\
+				panic("command %p not in queue, "	\
 				    "flags = %#x, bit = %#x\n", cm,	\
 				    cm->cm_flags, MFI_ON_ ## index);	\
-				panic("command not in queue");		\
 			}						\
 			TAILQ_REMOVE(&sc->mfi_ ## name, cm, cm_link);	\
 			cm->cm_flags &= ~MFI_ON_ ## index;		\
@@ -512,10 +518,9 @@ extern int mfi_build_cdb(int, uint8_t, u_int64_t, u_int32_t, uint8_t *);
 	mfi_remove_ ## name (struct mfi_command *cm)			\
 	{								\
 		if ((cm->cm_flags & MFI_ON_ ## index) == 0) {		\
-			printf("command %p not in queue, flags = %#x, " \
+			panic("command %p not in queue, flags = %#x, " \
 			    "bit = %#x\n", cm, cm->cm_flags,		\
 			    MFI_ON_ ## index);				\
-			panic("command not in queue");			\
 		}							\
 		TAILQ_REMOVE(&cm->cm_sc->mfi_ ## name, cm, cm_link);	\
 		cm->cm_flags &= ~MFI_ON_ ## index;			\
@@ -608,7 +613,8 @@ SYSCTL_DECL(_hw_mfi);
 #ifdef MFI_DEBUG
 extern void mfi_print_cmd(struct mfi_command *cm);
 extern void mfi_dump_cmds(struct mfi_softc *sc);
-extern void mfi_validate_sg(struct mfi_softc *, struct mfi_command *, const char *, int );
+extern void mfi_validate_sg(struct mfi_softc *, struct mfi_command *,
+    const char *, int);
 #define MFI_PRINT_CMD(cm)	mfi_print_cmd(cm)
 #define MFI_DUMP_CMDS(sc)	mfi_dump_cmds(sc)
 #define MFI_VALIDATE_CMD(sc, cm) mfi_validate_sg(sc, cm, __FUNCTION__, __LINE__)
@@ -618,6 +624,8 @@ extern void mfi_validate_sg(struct mfi_softc *, struct mfi_command *, const char
 #define MFI_VALIDATE_CMD(sc, cm)
 #endif
 
-extern void mfi_release_command(struct mfi_command *cm);
+extern void mfi_release_command(struct mfi_command *);
+extern void mfi_tbolt_return_cmd(struct mfi_softc *,
+    struct mfi_cmd_tbolt *, struct mfi_command *);
 
 #endif /* _MFIVAR_H */
diff --git a/sys/dev/msk/if_msk.c b/sys/dev/msk/if_msk.c
index d0ca808..664575c 100644
--- a/sys/dev/msk/if_msk.c
+++ b/sys/dev/msk/if_msk.c
@@ -1695,6 +1695,12 @@ msk_attach(device_t dev)
 			ifp->if_capabilities |= IFCAP_VLAN_HWCSUM;
 	}
 	ifp->if_capenable = ifp->if_capabilities;
+	/*
+	 * Disable RX checksum offloading on controllers that don't use
+	 * new descriptor format but give chance to enable it.
+	 */
+	if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0)
+		ifp->if_capenable &= ~IFCAP_RXCSUM;
 
 	/*
 	 * Tell the upper layer(s) we support long frames.
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 66da0d0..6d110ab 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1269,6 +1269,15 @@ brelse(struct buf *bp)
 	KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)),
 	    ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
 
+	if (BUF_LOCKRECURSED(bp)) {
+		/*
+		 * Do not process, in particular, do not handle the
+		 * B_INVAL/B_RELBUF and do not release to free list.
+		 */
+		BUF_UNLOCK(bp);
+		return;
+	}
+
 	if (bp->b_flags & B_MANAGED) {
 		bqrelse(bp);
 		return;
@@ -1445,12 +1454,6 @@ brelse(struct buf *bp)
 			brelvp(bp);
 	}
 			
-	if (BUF_LOCKRECURSED(bp)) {
-		/* do not release to free list */
-		BUF_UNLOCK(bp);
-		return;
-	}
-
 	/* enqueue */
 	mtx_lock(&bqlock);
 	/* Handle delayed bremfree() processing. */
@@ -2682,6 +2685,9 @@ loop:
 		/* We timed out or were interrupted. */
 		else if (error)
 			return (NULL);
+		/* If recursed, assume caller knows the rules. */
+		else if (BUF_LOCKRECURSED(bp))
+			goto end;
 
 		/*
 		 * The buffer is locked.  B_CACHE is cleared if the buffer is 
@@ -2865,6 +2871,7 @@ loop:
 	}
 	CTR4(KTR_BUF, "getblk(%p, %ld, %d) = %p", vp, (long)blkno, size, bp);
 	BUF_ASSERT_HELD(bp);
+end:
 	KASSERT(bp->b_bufobj == bo,
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 	return (bp);
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index b54dc04..0696edd 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -385,6 +385,7 @@ extern int		vttoif_tab[];
 #define	SKIPSYSTEM	0x0001	/* vflush: skip vnodes marked VSYSTEM */
 #define	FORCECLOSE	0x0002	/* vflush: force file closure */
 #define	WRITECLOSE	0x0004	/* vflush: only close writable files */
+#define	EARLYFLUSH	0x0008	/* vflush: early call for ffs_flushfiles */
 #define	V_SAVE		0x0001	/* vinvalbuf: sync file first */
 #define	V_ALT		0x0002	/* vinvalbuf: invalidate only alternate bufs */
 #define	V_NORMAL	0x0004	/* vinvalbuf: invalidate only regular bufs */
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index abe4073..789a7cf 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -1790,6 +1790,17 @@ fail:
 	return (0);
 }
 
+static inline struct buf *
+getinobuf(struct inode *ip, u_int cg, u_int32_t cginoblk, int gbflags)
+{
+	struct fs *fs;
+
+	fs = ip->i_fs;
+	return (getblk(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs,
+	    cg * fs->fs_ipg + cginoblk)), (int)fs->fs_bsize, 0, 0,
+	    gbflags));
+}
+
 /*
  * Determine whether an inode can be allocated.
  *
@@ -1814,9 +1825,11 @@ ffs_nodealloccg(ip, cg, ipref, mode, unused)
 	u_int8_t *inosused, *loc;
 	struct ufs2_dinode *dp2;
 	int error, start, len, i;
+	u_int32_t old_initediblk;
 
 	fs = ip->i_fs;
 	ump = ip->i_ump;
+check_nifree:
 	if (fs->fs_cs(fs, cg).cs_nifree == 0)
 		return (0);
 	UFS_UNLOCK(ump);
@@ -1828,13 +1841,13 @@ ffs_nodealloccg(ip, cg, ipref, mode, unused)
 		return (0);
 	}
 	cgp = (struct cg *)bp->b_data;
+restart:
 	if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
 		brelse(bp);
 		UFS_LOCK(ump);
 		return (0);
 	}
 	bp->b_xflags |= BX_BKGRDWRITE;
-	cgp->cg_old_time = cgp->cg_time = time_second;
 	inosused = cg_inosused(cgp);
 	if (ipref) {
 		ipref %= fs->fs_ipg;
@@ -1856,7 +1869,6 @@ ffs_nodealloccg(ip, cg, ipref, mode, unused)
 		}
 	}
 	ipref = (loc - inosused) * NBBY + ffs(~*loc) - 1;
-	cgp->cg_irotor = ipref;
 gotit:
 	/*
 	 * Check to see if we need to initialize more inodes.
@@ -1864,9 +1876,37 @@ gotit:
 	if (fs->fs_magic == FS_UFS2_MAGIC &&
 	    ipref + INOPB(fs) > cgp->cg_initediblk &&
 	    cgp->cg_initediblk < cgp->cg_niblk) {
-		ibp = getblk(ip->i_devvp, fsbtodb(fs,
-		    ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)),
-		    (int)fs->fs_bsize, 0, 0, 0);
+		old_initediblk = cgp->cg_initediblk;
+
+		/*
+		 * Free the cylinder group lock before writing the
+		 * initialized inode block.  Entering the
+		 * babarrierwrite() with the cylinder group lock
+		 * causes lock order violation between the lock and
+		 * snaplk.
+		 *
+		 * Another thread can decide to initialize the same
+		 * inode block, but whichever thread first gets the
+		 * cylinder group lock after writing the newly
+		 * allocated inode block will update it and the other
+		 * will realize that it has lost and leave the
+		 * cylinder group unchanged.
+		 */
+		ibp = getinobuf(ip, cg, old_initediblk, GB_LOCK_NOWAIT);
+		brelse(bp);
+		if (ibp == NULL) {
+			/*
+			 * The inode block buffer is already owned by
+			 * another thread, which must initialize it.
+			 * Wait on the buffer to allow another thread
+			 * to finish the updates, with dropped cg
+			 * buffer lock, then retry.
+			 */
+			ibp = getinobuf(ip, cg, old_initediblk, 0);
+			brelse(ibp);
+			UFS_LOCK(ump);
+			goto check_nifree;
+		}
 		bzero(ibp->b_data, (int)fs->fs_bsize);
 		dp2 = (struct ufs2_dinode *)(ibp->b_data);
 		for (i = 0; i < INOPB(fs); i++) {
@@ -1883,8 +1923,29 @@ gotit:
 		 * loading of newly created filesystems.
 		 */
 		babarrierwrite(ibp);
-		cgp->cg_initediblk += INOPB(fs);
+
+		/*
+		 * After the inode block is written, try to update the
+		 * cg initediblk pointer.  If another thread beat us
+		 * to it, then leave it unchanged as the other thread
+		 * has already set it correctly.
+		 */
+		error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+		    (int)fs->fs_cgsize, NOCRED, &bp);
+		UFS_LOCK(ump);
+		ACTIVECLEAR(fs, cg);
+		UFS_UNLOCK(ump);
+		if (error != 0) {
+			brelse(bp);
+			return (error);
+		}
+		cgp = (struct cg *)bp->b_data;
+		if (cgp->cg_initediblk == old_initediblk)
+			cgp->cg_initediblk += INOPB(fs);
+		goto restart;
 	}
+	cgp->cg_old_time = cgp->cg_time = time_second;
+	cgp->cg_irotor = ipref;
 	UFS_LOCK(ump);
 	ACTIVECLEAR(fs, cg);
 	setbit(inosused, ipref);
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 16fe134..e39fd46 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -1908,7 +1908,12 @@ softdep_flushfiles(oldmnt, flags, td)
 	int flags;
 	struct thread *td;
 {
-	int error, depcount, loopcnt, retry_flush_count, retry;
+#ifdef QUOTA
+	struct ufsmount *ump;
+	int i;
+#endif
+	int error, early, depcount, loopcnt, retry_flush_count, retry;
+	int morework;
 
 	loopcnt = 10;
 	retry_flush_count = 3;
@@ -1926,7 +1931,9 @@ retry_flush:
 		 * Do another flush in case any vnodes were brought in
 		 * as part of the cleanup operations.
 		 */
-		if ((error = ffs_flushfiles(oldmnt, flags, td)) != 0)
+		early = retry_flush_count == 1 || (oldmnt->mnt_kern_flag &
+		    MNTK_UNMOUNT) == 0 ? 0 : EARLYFLUSH;
+		if ((error = ffs_flushfiles(oldmnt, flags | early, td)) != 0)
 			break;
 		if ((error = softdep_flushworklist(oldmnt, &depcount, td)) != 0 ||
 		    depcount == 0)
@@ -1950,7 +1957,17 @@ retry_flush:
 			MNT_ILOCK(oldmnt);
 			KASSERT((oldmnt->mnt_kern_flag & MNTK_NOINSMNTQ) != 0,
 			    ("softdep_flushfiles: !MNTK_NOINSMNTQ"));
-			if (oldmnt->mnt_nvnodelistsize > 0) {
+			morework = oldmnt->mnt_nvnodelistsize > 0;
+#ifdef QUOTA
+			ump = VFSTOUFS(oldmnt);
+			UFS_LOCK(ump);
+			for (i = 0; i < MAXQUOTAS; i++) {
+				if (ump->um_quotas[i] != NULLVP)
+					morework = 1;
+			}
+			UFS_UNLOCK(ump);
+#endif
+			if (morework) {
 				if (--retry_flush_count > 0) {
 					retry = 1;
 					loopcnt = 3;
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 0204613..b3292d0 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -1351,9 +1351,10 @@ ffs_flushfiles(mp, flags, td)
 	struct thread *td;
 {
 	struct ufsmount *ump;
-	int error;
+	int qerror, error;
 
 	ump = VFSTOUFS(mp);
+	qerror = 0;
 #ifdef QUOTA
 	if (mp->mnt_flag & MNT_QUOTA) {
 		int i;
@@ -1361,11 +1362,19 @@ ffs_flushfiles(mp, flags, td)
 		if (error)
 			return (error);
 		for (i = 0; i < MAXQUOTAS; i++) {
-			quotaoff(td, mp, i);
+			error = quotaoff(td, mp, i);
+			if (error != 0) {
+				if ((flags & EARLYFLUSH) == 0)
+					return (error);
+				else
+					qerror = error;
+			}
 		}
+
 		/*
-		 * Here we fall through to vflush again to ensure
-		 * that we have gotten rid of all the system vnodes.
+		 * Here we fall through to vflush again to ensure that
+		 * we have gotten rid of all the system vnodes, unless
+		 * quotas must not be closed.
 		 */
 	}
 #endif
@@ -1380,11 +1389,21 @@ ffs_flushfiles(mp, flags, td)
 		 * that we have gotten rid of all the system vnodes.
 		 */
 	}
-        /*
-	 * Flush all the files.
+
+	/*
+	 * Do not close system files if quotas were not closed, to be
+	 * able to sync the remaining dquots.  The freeblks softupdate
+	 * workitems might hold a reference on a dquot, preventing
+	 * quotaoff() from completing.  Next round of
+	 * softdep_flushworklist() iteration should process the
+	 * blockers, allowing the next run of quotaoff() to finally
+	 * flush held dquots.
+	 *
+	 * Otherwise, flush all the files.
 	 */
-	if ((error = vflush(mp, 0, flags, td)) != 0)
+	if (qerror == 0 && (error = vflush(mp, 0, flags, td)) != 0)
 		return (error);
+
 	/*
 	 * Flush filesystem metadata.
 	 */
diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c
index 87ac9a1..a949898 100644
--- a/sys/ufs/ufs/ufs_quota.c
+++ b/sys/ufs/ufs/ufs_quota.c
@@ -80,7 +80,7 @@ static int dqopen(struct vnode *, struct ufsmount *, int);
 static int dqget(struct vnode *,
 	u_long, struct ufsmount *, int, struct dquot **);
 static int dqsync(struct vnode *, struct dquot *);
-static void dqflush(struct vnode *);
+static int dqflush(struct vnode *);
 static int quotaoff1(struct thread *td, struct mount *mp, int type);
 static int quotaoff_inchange(struct thread *td, struct mount *mp, int type);
 
@@ -674,8 +674,12 @@ again:
 		vrele(vp);
 	}
 
-	dqflush(qvp);
-	/* Clear um_quotas before closing the quota vnode to prevent
+	error = dqflush(qvp);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * Clear um_quotas before closing the quota vnode to prevent
 	 * access to the closed vnode from dqget/dqsync
 	 */
 	UFS_LOCK(ump);
@@ -1594,17 +1598,19 @@ out:
 /*
  * Flush all entries from the cache for a particular vnode.
  */
-static void
+static int
 dqflush(struct vnode *vp)
 {
 	struct dquot *dq, *nextdq;
 	struct dqhash *dqh;
+	int error;
 
 	/*
 	 * Move all dquot's that used to refer to this quota
 	 * file off their hash chains (they will eventually
 	 * fall off the head of the free list and be re-used).
 	 */
+	error = 0;
 	DQH_LOCK();
 	for (dqh = &dqhashtbl[dqhash]; dqh >= dqhashtbl; dqh--) {
 		for (dq = LIST_FIRST(dqh); dq; dq = nextdq) {
@@ -1612,12 +1618,15 @@ dqflush(struct vnode *vp)
 			if (dq->dq_ump->um_quotas[dq->dq_type] != vp)
 				continue;
 			if (dq->dq_cnt)
-				panic("dqflush: stray dquot");
-			LIST_REMOVE(dq, dq_hash);
-			dq->dq_ump = (struct ufsmount *)0;
+				error = EBUSY;
+			else {
+				LIST_REMOVE(dq, dq_hash);
+				dq->dq_ump = NULL;
+			}
 		}
 	}
 	DQH_UNLOCK();
+	return (error);
 }
 
 /*
author	attilio <attilio@FreeBSD.org>	2013-02-27 18:17:34 +0000
committer	attilio <attilio@FreeBSD.org>	2013-02-27 18:17:34 +0000
commit	52c57fbbdb554a7ce0cdbb6bf27051ef70834bdf (patch)
tree	d0908474209a17865e044675940a2f62f9ff2493
parent	c74a3afc6a5d7d1ced989c36d4ba0a7d2bbc43b9 (diff)
download	FreeBSD-src-52c57fbbdb554a7ce0cdbb6bf27051ef70834bdf.zip FreeBSD-src-52c57fbbdb554a7ce0cdbb6bf27051ef70834bdf.tar.gz