summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorattilio <attilio@FreeBSD.org>2013-02-27 18:17:34 +0000
committerattilio <attilio@FreeBSD.org>2013-02-27 18:17:34 +0000
commit52c57fbbdb554a7ce0cdbb6bf27051ef70834bdf (patch)
treed0908474209a17865e044675940a2f62f9ff2493 /sys
parentc74a3afc6a5d7d1ced989c36d4ba0a7d2bbc43b9 (diff)
downloadFreeBSD-src-52c57fbbdb554a7ce0cdbb6bf27051ef70834bdf.zip
FreeBSD-src-52c57fbbdb554a7ce0cdbb6bf27051ef70834bdf.tar.gz
MFC
Diffstat (limited to 'sys')
-rw-r--r--sys/arm/ti/ti_gpio.c14
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c381
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c33
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h32
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h7
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c1
-rw-r--r--sys/dev/ath/ath_hal/ah.c4
-rw-r--r--sys/dev/ath/ath_hal/ah.h3
-rw-r--r--sys/dev/ath/ath_rate/sample/sample.c132
-rw-r--r--sys/dev/ath/if_ath.c22
-rw-r--r--sys/dev/ath/if_ath_tx_ht.c23
-rw-r--r--sys/dev/ath/if_athvar.h4
-rw-r--r--sys/dev/mfi/mfi.c316
-rw-r--r--sys/dev/mfi/mfi_cam.c8
-rw-r--r--sys/dev/mfi/mfi_debug.c22
-rw-r--r--sys/dev/mfi/mfi_tbolt.c395
-rw-r--r--sys/dev/mfi/mfireg.h12
-rw-r--r--sys/dev/mfi/mfivar.h48
-rw-r--r--sys/dev/msk/if_msk.c6
-rw-r--r--sys/kern/vfs_bio.c19
-rw-r--r--sys/sys/vnode.h1
-rw-r--r--sys/ufs/ffs/ffs_alloc.c73
-rw-r--r--sys/ufs/ffs/ffs_softdep.c23
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c33
-rw-r--r--sys/ufs/ufs/ufs_quota.c23
25 files changed, 1064 insertions, 571 deletions
diff --git a/sys/arm/ti/ti_gpio.c b/sys/arm/ti/ti_gpio.c
index 58de516..4edb10e 100644
--- a/sys/arm/ti/ti_gpio.c
+++ b/sys/arm/ti/ti_gpio.c
@@ -653,6 +653,9 @@ ti_gpio_attach(device_t dev)
struct ti_gpio_softc *sc = device_get_softc(dev);
unsigned int i;
int err = 0;
+ int pin;
+ uint32_t flags;
+ uint32_t reg_oe;
sc->sc_dev = dev;
@@ -720,6 +723,17 @@ ti_gpio_attach(device_t dev)
/* Disable interrupts for all pins */
ti_gpio_write_4(sc, i, TI_GPIO_CLEARIRQENABLE1, 0xffffffff);
ti_gpio_write_4(sc, i, TI_GPIO_CLEARIRQENABLE2, 0xffffffff);
+
+ /* Init OE register based on pads configuration */
+ reg_oe = 0xffffffff;
+ for (pin = 0; pin < 32; pin++) {
+ ti_scm_padconf_get_gpioflags(
+ PINS_PER_BANK*i + pin, &flags);
+ if (flags & GPIO_PIN_OUTPUT)
+ reg_oe &= ~(1U << pin);
+ }
+
+ ti_gpio_write_4(sc, i, TI_GPIO_OE, reg_oe);
}
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
index e81dc02..d6651f9 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
@@ -48,6 +48,14 @@ uint64_t metaslab_aliquot = 512ULL << 10;
uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */
/*
+ * The in-core space map representation is more compact than its on-disk form.
+ * The zfs_condense_pct determines how much more compact the in-core
+ * space_map representation must be before we compact it on-disk.
+ * Values should be greater than or equal to 100.
+ */
+int zfs_condense_pct = 200;
+
+/*
* This value defines the number of allowed allocation failures per vdev.
* If a device reaches this threshold in a given txg then we consider skipping
* allocations on that device.
@@ -215,9 +223,9 @@ metaslab_compare(const void *x1, const void *x2)
/*
* If the weights are identical, use the offset to force uniqueness.
*/
- if (m1->ms_map.sm_start < m2->ms_map.sm_start)
+ if (m1->ms_map->sm_start < m2->ms_map->sm_start)
return (-1);
- if (m1->ms_map.sm_start > m2->ms_map.sm_start)
+ if (m1->ms_map->sm_start > m2->ms_map->sm_start)
return (1);
ASSERT3P(m1, ==, m2);
@@ -732,14 +740,15 @@ metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
* addition of new space; and for debugging, it ensures that we'd
* data fault on any attempt to use this metaslab before it's ready.
*/
- space_map_create(&msp->ms_map, start, size,
+ msp->ms_map = kmem_zalloc(sizeof (space_map_t), KM_SLEEP);
+ space_map_create(msp->ms_map, start, size,
vd->vdev_ashift, &msp->ms_lock);
metaslab_group_add(mg, msp);
if (metaslab_debug && smo->smo_object != 0) {
mutex_enter(&msp->ms_lock);
- VERIFY(space_map_load(&msp->ms_map, mg->mg_class->mc_ops,
+ VERIFY(space_map_load(msp->ms_map, mg->mg_class->mc_ops,
SM_FREE, smo, spa_meta_objset(vd->vdev_spa)) == 0);
mutex_exit(&msp->ms_lock);
}
@@ -767,22 +776,27 @@ metaslab_fini(metaslab_t *msp)
metaslab_group_t *mg = msp->ms_group;
vdev_space_update(mg->mg_vd,
- -msp->ms_smo.smo_alloc, 0, -msp->ms_map.sm_size);
+ -msp->ms_smo.smo_alloc, 0, -msp->ms_map->sm_size);
metaslab_group_remove(mg, msp);
mutex_enter(&msp->ms_lock);
- space_map_unload(&msp->ms_map);
- space_map_destroy(&msp->ms_map);
+ space_map_unload(msp->ms_map);
+ space_map_destroy(msp->ms_map);
+ kmem_free(msp->ms_map, sizeof (*msp->ms_map));
for (int t = 0; t < TXG_SIZE; t++) {
- space_map_destroy(&msp->ms_allocmap[t]);
- space_map_destroy(&msp->ms_freemap[t]);
+ space_map_destroy(msp->ms_allocmap[t]);
+ space_map_destroy(msp->ms_freemap[t]);
+ kmem_free(msp->ms_allocmap[t], sizeof (*msp->ms_allocmap[t]));
+ kmem_free(msp->ms_freemap[t], sizeof (*msp->ms_freemap[t]));
}
- for (int t = 0; t < TXG_DEFER_SIZE; t++)
- space_map_destroy(&msp->ms_defermap[t]);
+ for (int t = 0; t < TXG_DEFER_SIZE; t++) {
+ space_map_destroy(msp->ms_defermap[t]);
+ kmem_free(msp->ms_defermap[t], sizeof (*msp->ms_defermap[t]));
+ }
ASSERT0(msp->ms_deferspace);
@@ -801,7 +815,7 @@ static uint64_t
metaslab_weight(metaslab_t *msp)
{
metaslab_group_t *mg = msp->ms_group;
- space_map_t *sm = &msp->ms_map;
+ space_map_t *sm = msp->ms_map;
space_map_obj_t *smo = &msp->ms_smo;
vdev_t *vd = mg->mg_vd;
uint64_t weight, space;
@@ -809,6 +823,16 @@ metaslab_weight(metaslab_t *msp)
ASSERT(MUTEX_HELD(&msp->ms_lock));
/*
+ * This vdev is in the process of being removed so there is nothing
+ * for us to do here.
+ */
+ if (vd->vdev_removing) {
+ ASSERT0(smo->smo_alloc);
+ ASSERT0(vd->vdev_ms_shift);
+ return (0);
+ }
+
+ /*
* The baseline weight is the metaslab's free space.
*/
space = sm->sm_size - smo->smo_alloc;
@@ -861,7 +885,7 @@ metaslab_prefetch(metaslab_group_t *mg)
* Prefetch the next potential metaslabs
*/
for (msp = avl_first(t), m = 0; msp; msp = AVL_NEXT(t, msp), m++) {
- space_map_t *sm = &msp->ms_map;
+ space_map_t *sm = msp->ms_map;
space_map_obj_t *smo = &msp->ms_smo;
/* If we have reached our prefetch limit then we're done */
@@ -882,7 +906,7 @@ static int
metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
{
metaslab_group_t *mg = msp->ms_group;
- space_map_t *sm = &msp->ms_map;
+ space_map_t *sm = msp->ms_map;
space_map_ops_t *sm_ops = msp->ms_group->mg_class->mc_ops;
ASSERT(MUTEX_HELD(&msp->ms_lock));
@@ -899,7 +923,7 @@ metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
return (error);
}
for (int t = 0; t < TXG_DEFER_SIZE; t++)
- space_map_walk(&msp->ms_defermap[t],
+ space_map_walk(msp->ms_defermap[t],
space_map_claim, sm);
}
@@ -930,12 +954,158 @@ metaslab_passivate(metaslab_t *msp, uint64_t size)
* this metaslab again. In that case, it had better be empty,
* or we would be leaving space on the table.
*/
- ASSERT(size >= SPA_MINBLOCKSIZE || msp->ms_map.sm_space == 0);
+ ASSERT(size >= SPA_MINBLOCKSIZE || msp->ms_map->sm_space == 0);
metaslab_group_sort(msp->ms_group, msp, MIN(msp->ms_weight, size));
ASSERT((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0);
}
/*
+ * Determine if the in-core space map representation can be condensed on-disk.
+ * We would like to use the following criteria to make our decision:
+ *
+ * 1. The size of the space map object should not dramatically increase as a
+ * result of writing out our in-core free map.
+ *
+ * 2. The minimal on-disk space map representation is zfs_condense_pct/100
+ * times the size than the in-core representation (i.e. zfs_condense_pct = 110
+ * and in-core = 1MB, minimal = 1.1.MB).
+ *
+ * Checking the first condition is tricky since we don't want to walk
+ * the entire AVL tree calculating the estimated on-disk size. Instead we
+ * use the size-ordered AVL tree in the space map and calculate the
+ * size required for the largest segment in our in-core free map. If the
+ * size required to represent that segment on disk is larger than the space
+ * map object then we avoid condensing this map.
+ *
+ * To determine the second criterion we use a best-case estimate and assume
+ * each segment can be represented on-disk as a single 64-bit entry. We refer
+ * to this best-case estimate as the space map's minimal form.
+ */
+static boolean_t
+metaslab_should_condense(metaslab_t *msp)
+{
+ space_map_t *sm = msp->ms_map;
+ space_map_obj_t *smo = &msp->ms_smo_syncing;
+ space_seg_t *ss;
+ uint64_t size, entries, segsz;
+
+ ASSERT(MUTEX_HELD(&msp->ms_lock));
+ ASSERT(sm->sm_loaded);
+
+ /*
+ * Use the sm_pp_root AVL tree, which is ordered by size, to obtain
+ * the largest segment in the in-core free map. If the tree is
+ * empty then we should condense the map.
+ */
+ ss = avl_last(sm->sm_pp_root);
+ if (ss == NULL)
+ return (B_TRUE);
+
+ /*
+ * Calculate the number of 64-bit entries this segment would
+ * require when written to disk. If this single segment would be
+ * larger on-disk than the entire current on-disk structure, then
+ * clearly condensing will increase the on-disk structure size.
+ */
+ size = (ss->ss_end - ss->ss_start) >> sm->sm_shift;
+ entries = size / (MIN(size, SM_RUN_MAX));
+ segsz = entries * sizeof (uint64_t);
+
+ return (segsz <= smo->smo_objsize &&
+ smo->smo_objsize >= (zfs_condense_pct *
+ sizeof (uint64_t) * avl_numnodes(&sm->sm_root)) / 100);
+}
+
+/*
+ * Condense the on-disk space map representation to its minimized form.
+ * The minimized form consists of a small number of allocations followed by
+ * the in-core free map.
+ */
+static void
+metaslab_condense(metaslab_t *msp, uint64_t txg, dmu_tx_t *tx)
+{
+ spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+ space_map_t *freemap = msp->ms_freemap[txg & TXG_MASK];
+ space_map_t condense_map;
+ space_map_t *sm = msp->ms_map;
+ objset_t *mos = spa_meta_objset(spa);
+ space_map_obj_t *smo = &msp->ms_smo_syncing;
+
+ ASSERT(MUTEX_HELD(&msp->ms_lock));
+ ASSERT3U(spa_sync_pass(spa), ==, 1);
+ ASSERT(sm->sm_loaded);
+
+ spa_dbgmsg(spa, "condensing: txg %llu, msp[%llu] %p, "
+ "smo size %llu, segments %lu", txg,
+ (msp->ms_map->sm_start / msp->ms_map->sm_size), msp,
+ smo->smo_objsize, avl_numnodes(&sm->sm_root));
+
+ /*
+ * Create an map that is a 100% allocated map. We remove segments
+ * that have been freed in this txg, any deferred frees that exist,
+ * and any allocation in the future. Removing segments should be
+ * a relatively inexpensive operation since we expect these maps to
+ * a small number of nodes.
+ */
+ space_map_create(&condense_map, sm->sm_start, sm->sm_size,
+ sm->sm_shift, sm->sm_lock);
+ space_map_add(&condense_map, condense_map.sm_start,
+ condense_map.sm_size);
+
+ /*
+ * Remove what's been freed in this txg from the condense_map.
+ * Since we're in sync_pass 1, we know that all the frees from
+ * this txg are in the freemap.
+ */
+ space_map_walk(freemap, space_map_remove, &condense_map);
+
+ for (int t = 0; t < TXG_DEFER_SIZE; t++)
+ space_map_walk(msp->ms_defermap[t],
+ space_map_remove, &condense_map);
+
+ for (int t = 1; t < TXG_CONCURRENT_STATES; t++)
+ space_map_walk(msp->ms_allocmap[(txg + t) & TXG_MASK],
+ space_map_remove, &condense_map);
+
+ /*
+ * We're about to drop the metaslab's lock thus allowing
+ * other consumers to change it's content. Set the
+ * space_map's sm_condensing flag to ensure that
+ * allocations on this metaslab do not occur while we're
+ * in the middle of committing it to disk. This is only critical
+ * for the ms_map as all other space_maps use per txg
+ * views of their content.
+ */
+ sm->sm_condensing = B_TRUE;
+
+ mutex_exit(&msp->ms_lock);
+ space_map_truncate(smo, mos, tx);
+ mutex_enter(&msp->ms_lock);
+
+ /*
+ * While we would ideally like to create a space_map representation
+ * that consists only of allocation records, doing so can be
+ * prohibitively expensive because the in-core free map can be
+ * large, and therefore computationally expensive to subtract
+ * from the condense_map. Instead we sync out two maps, a cheap
+ * allocation only map followed by the in-core free map. While not
+ * optimal, this is typically close to optimal, and much cheaper to
+ * compute.
+ */
+ space_map_sync(&condense_map, SM_ALLOC, smo, mos, tx);
+ space_map_vacate(&condense_map, NULL, NULL);
+ space_map_destroy(&condense_map);
+
+ space_map_sync(sm, SM_FREE, smo, mos, tx);
+ sm->sm_condensing = B_FALSE;
+
+ spa_dbgmsg(spa, "condensed: txg %llu, msp[%llu] %p, "
+ "smo size %llu", txg,
+ (msp->ms_map->sm_start / msp->ms_map->sm_size), msp,
+ smo->smo_objsize);
+}
+
+/*
* Write a metaslab to disk in the context of the specified transaction group.
*/
void
@@ -944,17 +1114,29 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
vdev_t *vd = msp->ms_group->mg_vd;
spa_t *spa = vd->vdev_spa;
objset_t *mos = spa_meta_objset(spa);
- space_map_t *allocmap = &msp->ms_allocmap[txg & TXG_MASK];
- space_map_t *freemap = &msp->ms_freemap[txg & TXG_MASK];
- space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
- space_map_t *sm = &msp->ms_map;
+ space_map_t *allocmap = msp->ms_allocmap[txg & TXG_MASK];
+ space_map_t **freemap = &msp->ms_freemap[txg & TXG_MASK];
+ space_map_t **freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
+ space_map_t *sm = msp->ms_map;
space_map_obj_t *smo = &msp->ms_smo_syncing;
dmu_buf_t *db;
dmu_tx_t *tx;
ASSERT(!vd->vdev_ishole);
- if (allocmap->sm_space == 0 && freemap->sm_space == 0)
+ /*
+ * This metaslab has just been added so there's no work to do now.
+ */
+ if (*freemap == NULL) {
+ ASSERT3P(allocmap, ==, NULL);
+ return;
+ }
+
+ ASSERT3P(allocmap, !=, NULL);
+ ASSERT3P(*freemap, !=, NULL);
+ ASSERT3P(*freed_map, !=, NULL);
+
+ if (allocmap->sm_space == 0 && (*freemap)->sm_space == 0)
return;
/*
@@ -982,49 +1164,36 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
mutex_enter(&msp->ms_lock);
- space_map_walk(freemap, space_map_add, freed_map);
-
- if (sm->sm_loaded && spa_sync_pass(spa) == 1 && smo->smo_objsize >=
- 2 * sizeof (uint64_t) * avl_numnodes(&sm->sm_root)) {
- /*
- * The in-core space map representation is twice as compact
- * as the on-disk one, so it's time to condense the latter
- * by generating a pure allocmap from first principles.
- *
- * This metaslab is 100% allocated,
- * minus the content of the in-core map (sm),
- * minus what's been freed this txg (freed_map),
- * minus deferred frees (ms_defermap[]),
- * minus allocations from txgs in the future
- * (because they haven't been committed yet).
- */
- space_map_vacate(allocmap, NULL, NULL);
- space_map_vacate(freemap, NULL, NULL);
-
- space_map_add(allocmap, allocmap->sm_start, allocmap->sm_size);
-
- space_map_walk(sm, space_map_remove, allocmap);
- space_map_walk(freed_map, space_map_remove, allocmap);
+ if (sm->sm_loaded && spa_sync_pass(spa) == 1 &&
+ metaslab_should_condense(msp)) {
+ metaslab_condense(msp, txg, tx);
+ } else {
+ space_map_sync(allocmap, SM_ALLOC, smo, mos, tx);
+ space_map_sync(*freemap, SM_FREE, smo, mos, tx);
+ }
- for (int t = 0; t < TXG_DEFER_SIZE; t++)
- space_map_walk(&msp->ms_defermap[t],
- space_map_remove, allocmap);
+ space_map_vacate(allocmap, NULL, NULL);
- for (int t = 1; t < TXG_CONCURRENT_STATES; t++)
- space_map_walk(&msp->ms_allocmap[(txg + t) & TXG_MASK],
- space_map_remove, allocmap);
-
- mutex_exit(&msp->ms_lock);
- space_map_truncate(smo, mos, tx);
- mutex_enter(&msp->ms_lock);
+ /*
+ * For sync pass 1, we avoid walking the entire space map and
+ * instead will just swap the pointers for freemap and
+ * freed_map. We can safely do this since the freed_map is
+ * guaranteed to be empty on the initial pass.
+ */
+ if (spa_sync_pass(spa) == 1) {
+ ASSERT0((*freed_map)->sm_space);
+ ASSERT0(avl_numnodes(&(*freed_map)->sm_root));
+ space_map_swap(freemap, freed_map);
+ } else {
+ space_map_vacate(*freemap, space_map_add, *freed_map);
}
- space_map_sync(allocmap, SM_ALLOC, smo, mos, tx);
- space_map_sync(freemap, SM_FREE, smo, mos, tx);
+ ASSERT0(msp->ms_allocmap[txg & TXG_MASK]->sm_space);
+ ASSERT0(msp->ms_freemap[txg & TXG_MASK]->sm_space);
mutex_exit(&msp->ms_lock);
- VERIFY(0 == dmu_bonus_hold(mos, smo->smo_object, FTAG, &db));
+ VERIFY0(dmu_bonus_hold(mos, smo->smo_object, FTAG, &db));
dmu_buf_will_dirty(db, tx);
ASSERT3U(db->db_size, >=, sizeof (*smo));
bcopy(smo, db->db_data, sizeof (*smo));
@@ -1042,9 +1211,9 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
{
space_map_obj_t *smo = &msp->ms_smo;
space_map_obj_t *smosync = &msp->ms_smo_syncing;
- space_map_t *sm = &msp->ms_map;
- space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
- space_map_t *defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE];
+ space_map_t *sm = msp->ms_map;
+ space_map_t **freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
+ space_map_t **defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE];
metaslab_group_t *mg = msp->ms_group;
vdev_t *vd = mg->mg_vd;
int64_t alloc_delta, defer_delta;
@@ -1055,40 +1224,57 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
/*
* If this metaslab is just becoming available, initialize its
- * allocmaps and freemaps and add its capacity to the vdev.
+ * allocmaps, freemaps, and defermap and add its capacity to the vdev.
*/
- if (freed_map->sm_size == 0) {
+ if (*freed_map == NULL) {
+ ASSERT(*defer_map == NULL);
for (int t = 0; t < TXG_SIZE; t++) {
- space_map_create(&msp->ms_allocmap[t], sm->sm_start,
+ msp->ms_allocmap[t] = kmem_zalloc(sizeof (space_map_t),
+ KM_SLEEP);
+ space_map_create(msp->ms_allocmap[t], sm->sm_start,
sm->sm_size, sm->sm_shift, sm->sm_lock);
- space_map_create(&msp->ms_freemap[t], sm->sm_start,
+ msp->ms_freemap[t] = kmem_zalloc(sizeof (space_map_t),
+ KM_SLEEP);
+ space_map_create(msp->ms_freemap[t], sm->sm_start,
sm->sm_size, sm->sm_shift, sm->sm_lock);
}
- for (int t = 0; t < TXG_DEFER_SIZE; t++)
- space_map_create(&msp->ms_defermap[t], sm->sm_start,
+ for (int t = 0; t < TXG_DEFER_SIZE; t++) {
+ msp->ms_defermap[t] = kmem_zalloc(sizeof (space_map_t),
+ KM_SLEEP);
+ space_map_create(msp->ms_defermap[t], sm->sm_start,
sm->sm_size, sm->sm_shift, sm->sm_lock);
+ }
+
+ freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
+ defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE];
vdev_space_update(vd, 0, 0, sm->sm_size);
}
alloc_delta = smosync->smo_alloc - smo->smo_alloc;
- defer_delta = freed_map->sm_space - defer_map->sm_space;
+ defer_delta = (*freed_map)->sm_space - (*defer_map)->sm_space;
vdev_space_update(vd, alloc_delta + defer_delta, defer_delta, 0);
- ASSERT(msp->ms_allocmap[txg & TXG_MASK].sm_space == 0);
- ASSERT(msp->ms_freemap[txg & TXG_MASK].sm_space == 0);
+ ASSERT(msp->ms_allocmap[txg & TXG_MASK]->sm_space == 0);
+ ASSERT(msp->ms_freemap[txg & TXG_MASK]->sm_space == 0);
/*
* If there's a space_map_load() in progress, wait for it to complete
* so that we have a consistent view of the in-core space map.
- * Then, add defer_map (oldest deferred frees) to this map and
- * transfer freed_map (this txg's frees) to defer_map.
*/
space_map_load_wait(sm);
- space_map_vacate(defer_map, sm->sm_loaded ? space_map_free : NULL, sm);
- space_map_vacate(freed_map, space_map_add, defer_map);
+
+ /*
+ * Move the frees from the defer_map to this map (if it's loaded).
+ * Swap the freed_map and the defer_map -- this is safe to do
+ * because we've just emptied out the defer_map.
+ */
+ space_map_vacate(*defer_map, sm->sm_loaded ? space_map_free : NULL, sm);
+ ASSERT0((*defer_map)->sm_space);
+ ASSERT0(avl_numnodes(&(*defer_map)->sm_root));
+ space_map_swap(freed_map, defer_map);
*smo = *smosync;
@@ -1112,7 +1298,7 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
int evictable = 1;
for (int t = 1; t < TXG_CONCURRENT_STATES; t++)
- if (msp->ms_allocmap[(txg + t) & TXG_MASK].sm_space)
+ if (msp->ms_allocmap[(txg + t) & TXG_MASK]->sm_space)
evictable = 0;
if (evictable && !metaslab_debug)
@@ -1137,7 +1323,7 @@ metaslab_sync_reassess(metaslab_group_t *mg)
for (int m = 0; m < vd->vdev_ms_count; m++) {
metaslab_t *msp = vd->vdev_ms[m];
- if (msp->ms_map.sm_start > mg->mg_bonus_area)
+ if (msp->ms_map->sm_start > mg->mg_bonus_area)
break;
mutex_enter(&msp->ms_lock);
@@ -1158,7 +1344,7 @@ metaslab_distance(metaslab_t *msp, dva_t *dva)
{
uint64_t ms_shift = msp->ms_group->mg_vd->vdev_ms_shift;
uint64_t offset = DVA_GET_OFFSET(dva) >> ms_shift;
- uint64_t start = msp->ms_map.sm_start >> ms_shift;
+ uint64_t start = msp->ms_map->sm_start >> ms_shift;
if (msp->ms_group->mg_vd->vdev_id != DVA_GET_VDEV(dva))
return (1ULL << 63);
@@ -1206,6 +1392,13 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
mutex_exit(&mg->mg_lock);
return (-1ULL);
}
+
+ /*
+ * If the selected metaslab is condensing, skip it.
+ */
+ if (msp->ms_map->sm_condensing)
+ continue;
+
was_active = msp->ms_weight & METASLAB_ACTIVE_MASK;
if (activation_weight == METASLAB_WEIGHT_PRIMARY)
break;
@@ -1271,20 +1464,30 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
continue;
}
- if ((offset = space_map_alloc(&msp->ms_map, asize)) != -1ULL)
+ /*
+ * If this metaslab is currently condensing then pick again as
+ * we can't manipulate this metaslab until it's committed
+ * to disk.
+ */
+ if (msp->ms_map->sm_condensing) {
+ mutex_exit(&msp->ms_lock);
+ continue;
+ }
+
+ if ((offset = space_map_alloc(msp->ms_map, asize)) != -1ULL)
break;
atomic_inc_64(&mg->mg_alloc_failures);
- metaslab_passivate(msp, space_map_maxsize(&msp->ms_map));
+ metaslab_passivate(msp, space_map_maxsize(msp->ms_map));
mutex_exit(&msp->ms_lock);
}
- if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
+ if (msp->ms_allocmap[txg & TXG_MASK]->sm_space == 0)
vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg);
- space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, asize);
+ space_map_add(msp->ms_allocmap[txg & TXG_MASK], offset, asize);
mutex_exit(&msp->ms_lock);
@@ -1516,13 +1719,13 @@ metaslab_free_dva(spa_t *spa, const dva_t *dva, uint64_t txg, boolean_t now)
mutex_enter(&msp->ms_lock);
if (now) {
- space_map_remove(&msp->ms_allocmap[txg & TXG_MASK],
+ space_map_remove(msp->ms_allocmap[txg & TXG_MASK],
offset, size);
- space_map_free(&msp->ms_map, offset, size);
+ space_map_free(msp->ms_map, offset, size);
} else {
- if (msp->ms_freemap[txg & TXG_MASK].sm_space == 0)
+ if (msp->ms_freemap[txg & TXG_MASK]->sm_space == 0)
vdev_dirty(vd, VDD_METASLAB, msp, txg);
- space_map_add(&msp->ms_freemap[txg & TXG_MASK], offset, size);
+ space_map_add(msp->ms_freemap[txg & TXG_MASK], offset, size);
}
mutex_exit(&msp->ms_lock);
@@ -1557,10 +1760,10 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
mutex_enter(&msp->ms_lock);
- if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map.sm_loaded)
+ if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map->sm_loaded)
error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY);
- if (error == 0 && !space_map_contains(&msp->ms_map, offset, size))
+ if (error == 0 && !space_map_contains(msp->ms_map, offset, size))
error = ENOENT;
if (error || txg == 0) { /* txg == 0 indicates dry run */
@@ -1568,12 +1771,12 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
return (error);
}
- space_map_claim(&msp->ms_map, offset, size);
+ space_map_claim(msp->ms_map, offset, size);
if (spa_writeable(spa)) { /* don't dirty if we're zdb(1M) */
- if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
+ if (msp->ms_allocmap[txg & TXG_MASK]->sm_space == 0)
vdev_dirty(vd, VDD_METASLAB, msp, txg);
- space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size);
+ space_map_add(msp->ms_allocmap[txg & TXG_MASK], offset, size);
}
mutex_exit(&msp->ms_lock);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c
index bebb0f3..190fefe 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c
@@ -114,6 +114,7 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
int merge_before, merge_after;
ASSERT(MUTEX_HELD(sm->sm_lock));
+ VERIFY(!sm->sm_condensing);
VERIFY(size != 0);
VERIFY3U(start, >=, sm->sm_start);
VERIFY3U(end, <=, sm->sm_start + sm->sm_size);
@@ -198,6 +199,7 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
int left_over, right_over;
ASSERT(MUTEX_HELD(sm->sm_lock));
+ VERIFY(!sm->sm_condensing);
VERIFY(size != 0);
VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
@@ -267,6 +269,20 @@ space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
}
void
+space_map_swap(space_map_t **msrc, space_map_t **mdst)
+{
+ space_map_t *sm;
+
+ ASSERT(MUTEX_HELD((*msrc)->sm_lock));
+ ASSERT0((*mdst)->sm_space);
+ ASSERT0(avl_numnodes(&(*mdst)->sm_root));
+
+ sm = *msrc;
+ *msrc = *mdst;
+ *mdst = sm;
+}
+
+void
space_map_vacate(space_map_t *sm, space_map_func_t *func, space_map_t *mdest)
{
space_seg_t *ss;
@@ -447,9 +463,9 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx)
{
spa_t *spa = dmu_objset_spa(os);
- void *cookie = NULL;
+ avl_tree_t *t = &sm->sm_root;
space_seg_t *ss;
- uint64_t bufsize, start, size, run_len, delta, sm_space;
+ uint64_t bufsize, start, size, run_len, total, sm_space, nodes;
uint64_t *entry, *entry_map, *entry_map_end;
ASSERT(MUTEX_HELD(sm->sm_lock));
@@ -478,13 +494,14 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(spa)) |
SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx));
- delta = 0;
+ total = 0;
+ nodes = avl_numnodes(&sm->sm_root);
sm_space = sm->sm_space;
- while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) {
+ for (ss = avl_first(t); ss != NULL; ss = AVL_NEXT(t, ss)) {
size = ss->ss_end - ss->ss_start;
start = (ss->ss_start - sm->sm_start) >> sm->sm_shift;
- delta += size;
+ total += size;
size >>= sm->sm_shift;
while (size) {
@@ -506,7 +523,6 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
start += run_len;
size -= run_len;
}
- kmem_cache_free(space_seg_cache, ss);
}
if (entry != entry_map) {
@@ -522,12 +538,11 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
* Ensure that the space_map's accounting wasn't changed
* while we were in the middle of writing it out.
*/
+ VERIFY3U(nodes, ==, avl_numnodes(&sm->sm_root));
VERIFY3U(sm->sm_space, ==, sm_space);
+ VERIFY3U(sm->sm_space, ==, total);
zio_buf_free(entry_map, bufsize);
-
- sm->sm_space -= delta;
- VERIFY0(sm->sm_space);
}
void
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h
index f1f1b38..138e14e 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h
@@ -66,20 +66,38 @@ struct metaslab_group {
};
/*
- * Each metaslab's free space is tracked in space map object in the MOS,
- * which is only updated in syncing context. Each time we sync a txg,
+ * Each metaslab maintains an in-core free map (ms_map) that contains the
+ * current list of free segments. As blocks are allocated, the allocated
+ * segment is removed from the ms_map and added to a per txg allocation map.
+ * As blocks are freed, they are added to the per txg free map. These per
+ * txg maps allow us to process all allocations and frees in syncing context
+ * where it is safe to update the on-disk space maps.
+ *
+ * Each metaslab's free space is tracked in a space map object in the MOS,
+ * which is only updated in syncing context. Each time we sync a txg,
* we append the allocs and frees from that txg to the space map object.
* When the txg is done syncing, metaslab_sync_done() updates ms_smo
- * to ms_smo_syncing. Everything in ms_smo is always safe to allocate.
+ * to ms_smo_syncing. Everything in ms_smo is always safe to allocate.
+ *
+ * To load the in-core free map we read the space map object from disk.
+ * This object contains a series of alloc and free records that are
+ * combined to make up the list of all free segments in this metaslab. These
+ * segments are represented in-core by the ms_map and are stored in an
+ * AVL tree.
+ *
+ * As the space map objects grows (as a result of the appends) it will
+ * eventually become space-inefficient. When the space map object is
+ * zfs_condense_pct/100 times the size of the minimal on-disk representation,
+ * we rewrite it in its minimized form.
*/
struct metaslab {
kmutex_t ms_lock; /* metaslab lock */
space_map_obj_t ms_smo; /* synced space map object */
space_map_obj_t ms_smo_syncing; /* syncing space map object */
- space_map_t ms_allocmap[TXG_SIZE]; /* allocated this txg */
- space_map_t ms_freemap[TXG_SIZE]; /* freed this txg */
- space_map_t ms_defermap[TXG_DEFER_SIZE]; /* deferred frees */
- space_map_t ms_map; /* in-core free space map */
+ space_map_t *ms_allocmap[TXG_SIZE]; /* allocated this txg */
+ space_map_t *ms_freemap[TXG_SIZE]; /* freed this txg */
+ space_map_t *ms_defermap[TXG_DEFER_SIZE]; /* deferred frees */
+ space_map_t *ms_map; /* in-core free space map */
int64_t ms_deferspace; /* sum of ms_defermap[] space */
uint64_t ms_weight; /* weight vs. others in group */
metaslab_group_t *ms_group; /* metaslab group */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h
index 463b6bb..2da50fb 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h
@@ -40,17 +40,17 @@ extern "C" {
typedef struct space_map_ops space_map_ops_t;
typedef struct space_map {
- avl_tree_t sm_root; /* AVL tree of map segments */
+ avl_tree_t sm_root; /* offset-ordered segment AVL tree */
uint64_t sm_space; /* sum of all segments in the map */
uint64_t sm_start; /* start of map */
uint64_t sm_size; /* size of map */
uint8_t sm_shift; /* unit shift */
- uint8_t sm_pad[3]; /* unused */
uint8_t sm_loaded; /* map loaded? */
uint8_t sm_loading; /* map loading? */
+ uint8_t sm_condensing; /* map condensing? */
kcondvar_t sm_load_cv; /* map load completion */
space_map_ops_t *sm_ops; /* space map block picker ops vector */
- avl_tree_t *sm_pp_root; /* picker-private AVL tree */
+ avl_tree_t *sm_pp_root; /* size-ordered, picker-private tree */
void *sm_ppd; /* picker-private data */
kmutex_t *sm_lock; /* pointer to lock that protects map */
} space_map_t;
@@ -149,6 +149,7 @@ extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size);
extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size);
extern boolean_t space_map_contains(space_map_t *sm,
uint64_t start, uint64_t size);
+extern void space_map_swap(space_map_t **msrc, space_map_t **mdest);
extern void space_map_vacate(space_map_t *sm,
space_map_func_t *func, space_map_t *mdest);
extern void space_map_walk(space_map_t *sm,
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
index 59b461b..be5b0bf 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
@@ -1847,6 +1847,7 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg)
space_map_truncate(smo, mos, tx);
space_map_sync(&smsync, SM_ALLOC, smo, mos, tx);
+ space_map_vacate(&smsync, NULL, NULL);
space_map_destroy(&smsync);
diff --git a/sys/dev/ath/ath_hal/ah.c b/sys/dev/ath/ath_hal/ah.c
index d1ce7a8..551c225 100644
--- a/sys/dev/ath/ath_hal/ah.c
+++ b/sys/dev/ath/ath_hal/ah.c
@@ -692,6 +692,10 @@ ath_hal_getcapability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type,
return pCap->hal4AddrAggrSupport ? HAL_OK : HAL_ENOTSUPP;
case HAL_CAP_EXT_CHAN_DFS:
return pCap->halExtChanDfsSupport ? HAL_OK : HAL_ENOTSUPP;
+ case HAL_CAP_RX_STBC:
+ return pCap->halRxStbcSupport ? HAL_OK : HAL_ENOTSUPP;
+ case HAL_CAP_TX_STBC:
+ return pCap->halTxStbcSupport ? HAL_OK : HAL_ENOTSUPP;
case HAL_CAP_COMBINED_RADAR_RSSI:
return pCap->halUseCombinedRadarRssi ? HAL_OK : HAL_ENOTSUPP;
case HAL_CAP_AUTO_SLEEP:
diff --git a/sys/dev/ath/ath_hal/ah.h b/sys/dev/ath/ath_hal/ah.h
index 0e3d5ab..ca2e7ca 100644
--- a/sys/dev/ath/ath_hal/ah.h
+++ b/sys/dev/ath/ath_hal/ah.h
@@ -137,6 +137,9 @@ typedef enum {
HAL_CAP_RIFS_RX_ENABLED = 53,
HAL_CAP_BB_DFS_HANG = 54,
+ HAL_CAP_RX_STBC = 58,
+ HAL_CAP_TX_STBC = 59,
+
HAL_CAP_BT_COEX = 60, /* hardware is capable of bluetooth coexistence */
HAL_CAP_DYNAMIC_SMPS = 61, /* Dynamic MIMO Power Save hardware support */
diff --git a/sys/dev/ath/ath_rate/sample/sample.c b/sys/dev/ath/ath_rate/sample/sample.c
index a7d6af6..b3f82fa 100644
--- a/sys/dev/ath/ath_rate/sample/sample.c
+++ b/sys/dev/ath/ath_rate/sample/sample.c
@@ -708,71 +708,6 @@ ath_rate_setupxtxdesc(struct ath_softc *sc, struct ath_node *an,
s3code, sched->t3); /* series 3 */
}
-/*
- * Update the EWMA percentage.
- *
- * This is a simple hack to track an EWMA based on the current
- * rate scenario. For the rate codes which failed, this will
- * record a 0% against it. For the rate code which succeeded,
- * EWMA will record the nbad*100/nframes percentage against it.
- */
-static void
-update_ewma_stats(struct ath_softc *sc, struct ath_node *an,
- int frame_size,
- int rix0, int tries0,
- int rix1, int tries1,
- int rix2, int tries2,
- int rix3, int tries3,
- int short_tries, int tries, int status,
- int nframes, int nbad)
-{
- struct sample_node *sn = ATH_NODE_SAMPLE(an);
- struct sample_softc *ssc = ATH_SOFTC_SAMPLE(sc);
- const int size_bin = size_to_bin(frame_size);
- int tries_so_far;
- int pct;
- int rix = rix0;
-
- /* Calculate percentage based on current rate */
- if (nframes == 0)
- nframes = nbad = 1;
- pct = ((nframes - nbad) * 1000) / nframes;
-
- /* Figure out which rate index succeeded */
- tries_so_far = tries0;
-
- if (tries1 && tries_so_far < tries) {
- tries_so_far += tries1;
- rix = rix1;
- /* XXX bump ewma pct */
- }
-
- if (tries2 && tries_so_far < tries) {
- tries_so_far += tries2;
- rix = rix2;
- /* XXX bump ewma pct */
- }
-
- if (tries3 && tries_so_far < tries) {
- rix = rix3;
- /* XXX bump ewma pct */
- }
-
- /* rix is the successful rate, update EWMA for final rix */
- if (sn->stats[size_bin][rix].total_packets <
- ssc->smoothing_minpackets) {
- /* just average the first few packets */
- int a_pct = (sn->stats[size_bin][rix].packets_acked * 1000) /
- (sn->stats[size_bin][rix].total_packets);
- sn->stats[size_bin][rix].ewma_pct = a_pct;
- } else {
- /* use a ewma */
- sn->stats[size_bin][rix].ewma_pct =
- ((sn->stats[size_bin][rix].ewma_pct * ssc->smoothing_rate) +
- (pct * (100 - ssc->smoothing_rate))) / 100;
- }
-}
-
static void
update_stats(struct ath_softc *sc, struct ath_node *an,
int frame_size,
@@ -792,6 +727,7 @@ update_stats(struct ath_softc *sc, struct ath_node *an,
const int size = bin_to_size(size_bin);
int tt, tries_so_far;
int is_ht40 = (an->an_node.ni_chw == 40);
+ int pct;
if (!IS_RATE_DEFINED(sn, rix0))
return;
@@ -865,6 +801,27 @@ update_stats(struct ath_softc *sc, struct ath_node *an,
sn->stats[size_bin][rix0].last_tx = ticks;
sn->stats[size_bin][rix0].total_packets += nframes;
+ /* update EWMA for this rix */
+
+ /* Calculate percentage based on current rate */
+ if (nframes == 0)
+ nframes = nbad = 1;
+ pct = ((nframes - nbad) * 1000) / nframes;
+
+ if (sn->stats[size_bin][rix0].total_packets <
+ ssc->smoothing_minpackets) {
+ /* just average the first few packets */
+ int a_pct = (sn->stats[size_bin][rix0].packets_acked * 1000) /
+ (sn->stats[size_bin][rix0].total_packets);
+ sn->stats[size_bin][rix0].ewma_pct = a_pct;
+ } else {
+ /* use a ewma */
+ sn->stats[size_bin][rix0].ewma_pct =
+ ((sn->stats[size_bin][rix0].ewma_pct * ssc->smoothing_rate) +
+ (pct * (100 - ssc->smoothing_rate))) / 100;
+ }
+
+
if (rix0 == sn->current_sample_rix[size_bin]) {
IEEE80211_NOTE(an->an_node.ni_vap, IEEE80211_MSG_RATECTL,
&an->an_node,
@@ -907,6 +864,11 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an,
short_tries = ts->ts_shortretry;
long_tries = ts->ts_longretry + 1;
+ if (nframes == 0) {
+ device_printf(sc->sc_dev, "%s: nframes=0?\n", __func__);
+ return;
+ }
+
if (frame_size == 0) /* NB: should not happen */
frame_size = 1500;
@@ -950,13 +912,6 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an,
0, 0,
short_tries, long_tries, status,
nframes, nbad);
- update_ewma_stats(sc, an, frame_size,
- final_rix, long_tries,
- 0, 0,
- 0, 0,
- 0, 0,
- short_tries, long_tries, status,
- nframes, nbad);
} else {
int finalTSIdx = ts->ts_finaltsi;
@@ -1008,15 +963,6 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an,
short_tries, long_tries,
long_tries > rc[0].tries,
nframes, nbad);
- update_ewma_stats(sc, an, frame_size,
- rc[0].rix, rc[0].tries,
- rc[1].rix, rc[1].tries,
- rc[2].rix, rc[2].tries,
- rc[3].rix, rc[3].tries,
- short_tries, long_tries,
- long_tries > rc[0].tries,
- nframes, nbad);
-
long_tries -= rc[0].tries;
}
@@ -1029,14 +975,6 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an,
short_tries, long_tries,
status,
nframes, nbad);
- update_ewma_stats(sc, an, frame_size,
- rc[1].rix, rc[1].tries,
- rc[2].rix, rc[2].tries,
- rc[3].rix, rc[3].tries,
- 0, 0,
- short_tries, long_tries,
- status,
- nframes, nbad);
long_tries -= rc[1].tries;
}
@@ -1049,14 +987,6 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an,
short_tries, long_tries,
status,
nframes, nbad);
- update_ewma_stats(sc, an, frame_size,
- rc[2].rix, rc[2].tries,
- rc[3].rix, rc[3].tries,
- 0, 0,
- 0, 0,
- short_tries, long_tries,
- status,
- nframes, nbad);
long_tries -= rc[2].tries;
}
@@ -1069,14 +999,6 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an,
short_tries, long_tries,
status,
nframes, nbad);
- update_ewma_stats(sc, an, frame_size,
- rc[3].rix, rc[3].tries,
- 0, 0,
- 0, 0,
- 0, 0,
- short_tries, long_tries,
- status,
- nframes, nbad);
}
}
}
diff --git a/sys/dev/ath/if_ath.c b/sys/dev/ath/if_ath.c
index a614d6f..fd1a7c3 100644
--- a/sys/dev/ath/if_ath.c
+++ b/sys/dev/ath/if_ath.c
@@ -781,6 +781,28 @@ ath_attach(u_int16_t devid, struct ath_softc *sc)
ic->ic_txstream = txs;
ic->ic_rxstream = rxs;
+ /*
+ * Setup TX and RX STBC based on what the HAL allows and
+ * the currently configured chainmask set.
+ * Ie - don't enable STBC TX if only one chain is enabled.
+ * STBC RX is fine on a single RX chain; it just won't
+ * provide any real benefit.
+ */
+ if (ath_hal_getcapability(ah, HAL_CAP_RX_STBC, 0,
+ NULL) == HAL_OK) {
+ sc->sc_rx_stbc = 1;
+ device_printf(sc->sc_dev,
+ "[HT] 1 stream STBC receive enabled\n");
+ ic->ic_htcaps |= IEEE80211_HTCAP_RXSTBC_1STREAM;
+ }
+ if (txs > 1 && ath_hal_getcapability(ah, HAL_CAP_TX_STBC, 0,
+ NULL) == HAL_OK) {
+ sc->sc_tx_stbc = 1;
+ device_printf(sc->sc_dev,
+ "[HT] 1 stream STBC transmit enabled\n");
+ ic->ic_htcaps |= IEEE80211_HTCAP_TXSTBC;
+ }
+
(void) ath_hal_getcapability(ah, HAL_CAP_RTS_AGGR_LIMIT, 1,
&sc->sc_rts_aggr_limit);
if (sc->sc_rts_aggr_limit != (64 * 1024))
diff --git a/sys/dev/ath/if_ath_tx_ht.c b/sys/dev/ath/if_ath_tx_ht.c
index c0e72ac..d382f8f 100644
--- a/sys/dev/ath/if_ath_tx_ht.c
+++ b/sys/dev/ath/if_ath_tx_ht.c
@@ -536,16 +536,29 @@ ath_rateseries_setup(struct ath_softc *sc, struct ieee80211_node *ni,
series[i].RateFlags |= HAL_RATESERIES_HALFGI;
/*
- * XXX TODO: STBC if it's possible
+ * Setup rate and TX power cap for this series.
*/
+ series[i].Rate = rt->info[rc[i].rix].rateCode;
+ series[i].RateIndex = rc[i].rix;
+ series[i].tx_power_cap = 0x3f; /* XXX for now */
+
/*
- * XXX TODO: LDPC if it's possible
+ * If we have STBC TX enabled and the receiver
+ * can receive (at least) 1 stream STBC, AND it's
+ * MCS 0-7, AND we have at least two chains enabled,
+ * enable STBC.
*/
+ if (ic->ic_htcaps & IEEE80211_HTCAP_TXSTBC &&
+ ni->ni_htcap & IEEE80211_HTCAP_RXSTBC_1STREAM &&
+ (sc->sc_cur_txchainmask > 1) &&
+ HT_RC_2_STREAMS(series[i].Rate) == 1) {
+ series[i].RateFlags |= HAL_RATESERIES_STBC;
+ }
- series[i].Rate = rt->info[rc[i].rix].rateCode;
- series[i].RateIndex = rc[i].rix;
- series[i].tx_power_cap = 0x3f; /* XXX for now */
+ /*
+ * XXX TODO: LDPC if it's possible
+ */
/*
* PktDuration doesn't include slot, ACK, RTS, etc timing -
diff --git a/sys/dev/ath/if_athvar.h b/sys/dev/ath/if_athvar.h
index e8fdeff..42442de 100644
--- a/sys/dev/ath/if_athvar.h
+++ b/sys/dev/ath/if_athvar.h
@@ -567,7 +567,9 @@ struct ath_softc {
/*
* Second set of flags.
*/
- u_int32_t sc_use_ent : 1;
+ u_int32_t sc_use_ent : 1,
+ sc_rx_stbc : 1,
+ sc_tx_stbc : 1;
/*
* Enterprise mode configuration for AR9380 and later chipsets.
diff --git a/sys/dev/mfi/mfi.c b/sys/dev/mfi/mfi.c
index ed759fc..e799b9d 100644
--- a/sys/dev/mfi/mfi.c
+++ b/sys/dev/mfi/mfi.c
@@ -108,6 +108,7 @@ static void mfi_bio_complete(struct mfi_command *);
static struct mfi_command *mfi_build_ldio(struct mfi_softc *,struct bio*);
static struct mfi_command *mfi_build_syspdio(struct mfi_softc *,struct bio*);
static int mfi_send_frame(struct mfi_softc *, struct mfi_command *);
+static int mfi_std_send_frame(struct mfi_softc *, struct mfi_command *);
static int mfi_abort(struct mfi_softc *, struct mfi_command **);
static int mfi_linux_ioctl_int(struct cdev *, u_long, caddr_t, int, struct thread *);
static void mfi_timeout(void *);
@@ -132,24 +133,30 @@ static int mfi_check_for_sscd(struct mfi_softc *sc, struct mfi_command *cm);
SYSCTL_NODE(_hw, OID_AUTO, mfi, CTLFLAG_RD, 0, "MFI driver parameters");
static int mfi_event_locale = MFI_EVT_LOCALE_ALL;
TUNABLE_INT("hw.mfi.event_locale", &mfi_event_locale);
-SYSCTL_INT(_hw_mfi, OID_AUTO, event_locale, CTLFLAG_RW, &mfi_event_locale,
- 0, "event message locale");
+SYSCTL_INT(_hw_mfi, OID_AUTO, event_locale, CTLFLAG_RWTUN, &mfi_event_locale,
+ 0, "event message locale");
static int mfi_event_class = MFI_EVT_CLASS_INFO;
TUNABLE_INT("hw.mfi.event_class", &mfi_event_class);
-SYSCTL_INT(_hw_mfi, OID_AUTO, event_class, CTLFLAG_RW, &mfi_event_class,
- 0, "event message class");
+SYSCTL_INT(_hw_mfi, OID_AUTO, event_class, CTLFLAG_RWTUN, &mfi_event_class,
+ 0, "event message class");
static int mfi_max_cmds = 128;
TUNABLE_INT("hw.mfi.max_cmds", &mfi_max_cmds);
-SYSCTL_INT(_hw_mfi, OID_AUTO, max_cmds, CTLFLAG_RD, &mfi_max_cmds,
- 0, "Max commands");
+SYSCTL_INT(_hw_mfi, OID_AUTO, max_cmds, CTLFLAG_RDTUN, &mfi_max_cmds,
+ 0, "Max commands limit (-1 = controller limit)");
static int mfi_detect_jbod_change = 1;
TUNABLE_INT("hw.mfi.detect_jbod_change", &mfi_detect_jbod_change);
-SYSCTL_INT(_hw_mfi, OID_AUTO, detect_jbod_change, CTLFLAG_RW,
+SYSCTL_INT(_hw_mfi, OID_AUTO, detect_jbod_change, CTLFLAG_RWTUN,
&mfi_detect_jbod_change, 0, "Detect a change to a JBOD");
+int mfi_polled_cmd_timeout = MFI_POLL_TIMEOUT_SECS;
+TUNABLE_INT("hw.mfi.polled_cmd_timeout", &mfi_polled_cmd_timeout);
+SYSCTL_INT(_hw_mfi, OID_AUTO, polled_cmd_timeout, CTLFLAG_RWTUN,
+ &mfi_polled_cmd_timeout, 0,
+ "Polled command timeout - used for firmware flash etc (in seconds)");
+
/* Management interface */
static d_open_t mfi_open;
static d_close_t mfi_close;
@@ -361,7 +368,7 @@ mfi_attach(struct mfi_softc *sc)
{
uint32_t status;
int error, commsz, framessz, sensesz;
- int frames, unit, max_fw_sge;
+ int frames, unit, max_fw_sge, max_fw_cmds;
uint32_t tb_mem_size = 0;
if (sc == NULL)
@@ -456,7 +463,14 @@ mfi_attach(struct mfi_softc *sc)
* instead of compile time.
*/
status = sc->mfi_read_fw_status(sc);
- sc->mfi_max_fw_cmds = status & MFI_FWSTATE_MAXCMD_MASK;
+ max_fw_cmds = status & MFI_FWSTATE_MAXCMD_MASK;
+ if (mfi_max_cmds > 0 && mfi_max_cmds < max_fw_cmds) {
+ device_printf(sc->mfi_dev, "FW MaxCmds = %d, limiting to %d\n",
+ max_fw_cmds, mfi_max_cmds);
+ sc->mfi_max_fw_cmds = mfi_max_cmds;
+ } else {
+ sc->mfi_max_fw_cmds = max_fw_cmds;
+ }
max_fw_sge = (status & MFI_FWSTATE_MAXSGL_MASK) >> 16;
sc->mfi_max_sge = min(max_fw_sge, ((MFI_MAXPHYS / PAGE_SIZE) + 1));
@@ -464,7 +478,8 @@ mfi_attach(struct mfi_softc *sc)
if (sc->mfi_flags & MFI_FLAGS_TBOLT) {
mfi_tbolt_init_globals(sc);
- device_printf(sc->mfi_dev, "MaxCmd = %x MaxSgl = %x state = %x \n",
+ device_printf(sc->mfi_dev, "MaxCmd = %d, Drv MaxCmd = %d, "
+ "MaxSgl = %d, state = %#x\n", max_fw_cmds,
sc->mfi_max_fw_cmds, sc->mfi_max_sge, status);
tb_mem_size = mfi_tbolt_get_memory_requirement(sc);
@@ -503,8 +518,8 @@ mfi_attach(struct mfi_softc *sc)
0, /* flags */
NULL, NULL, /* lockfunc, lockarg */
&sc->mfi_tb_init_dmat)) {
- device_printf(sc->mfi_dev, "Cannot allocate init DMA tag\n");
- return (ENOMEM);
+ device_printf(sc->mfi_dev, "Cannot allocate init DMA tag\n");
+ return (ENOMEM);
}
if (bus_dmamem_alloc(sc->mfi_tb_init_dmat, (void **)&sc->mfi_tb_init,
BUS_DMA_NOWAIT, &sc->mfi_tb_init_dmamap)) {
@@ -683,11 +698,14 @@ mfi_attach(struct mfi_softc *sc)
/* ThunderBolt MFI_IOC2 INIT */
if (sc->mfi_flags & MFI_FLAGS_TBOLT) {
sc->mfi_disable_intr(sc);
+ mtx_lock(&sc->mfi_io_lock);
if ((error = mfi_tbolt_init_MFI_queue(sc)) != 0) {
device_printf(sc->mfi_dev,
"TB Init has failed with error %d\n",error);
+ mtx_unlock(&sc->mfi_io_lock);
return error;
}
+ mtx_unlock(&sc->mfi_io_lock);
if ((error = mfi_tbolt_alloc_cmd(sc)) != 0)
return error;
@@ -723,10 +741,12 @@ mfi_attach(struct mfi_softc *sc)
"hook\n");
return (EINVAL);
}
+ mtx_lock(&sc->mfi_io_lock);
if ((error = mfi_aen_setup(sc, 0), 0) != 0) {
mtx_unlock(&sc->mfi_io_lock);
return (error);
}
+ mtx_unlock(&sc->mfi_io_lock);
/*
* Register a shutdown handler.
@@ -766,7 +786,9 @@ mfi_attach(struct mfi_softc *sc)
mfi_timeout, sc);
if (sc->mfi_flags & MFI_FLAGS_TBOLT) {
+ mtx_lock(&sc->mfi_io_lock);
mfi_tbolt_sync_map_info(sc);
+ mtx_unlock(&sc->mfi_io_lock);
}
return (0);
@@ -776,21 +798,16 @@ static int
mfi_alloc_commands(struct mfi_softc *sc)
{
struct mfi_command *cm;
- int i, ncmds;
+ int i, j;
/*
* XXX Should we allocate all the commands up front, or allocate on
* demand later like 'aac' does?
*/
- ncmds = MIN(mfi_max_cmds, sc->mfi_max_fw_cmds);
- if (bootverbose)
- device_printf(sc->mfi_dev, "Max fw cmds= %d, sizing driver "
- "pool to %d\n", sc->mfi_max_fw_cmds, ncmds);
-
- sc->mfi_commands = malloc(sizeof(struct mfi_command) * ncmds, M_MFIBUF,
- M_WAITOK | M_ZERO);
+ sc->mfi_commands = malloc(sizeof(sc->mfi_commands[0]) *
+ sc->mfi_max_fw_cmds, M_MFIBUF, M_WAITOK | M_ZERO);
- for (i = 0; i < ncmds; i++) {
+ for (i = 0; i < sc->mfi_max_fw_cmds; i++) {
cm = &sc->mfi_commands[i];
cm->cm_frame = (union mfi_frame *)((uintptr_t)sc->mfi_frames +
sc->mfi_cmd_size * i);
@@ -806,10 +823,20 @@ mfi_alloc_commands(struct mfi_softc *sc)
mtx_lock(&sc->mfi_io_lock);
mfi_release_command(cm);
mtx_unlock(&sc->mfi_io_lock);
+ } else {
+ device_printf(sc->mfi_dev, "Failed to allocate %d "
+ "command blocks, only allocated %d\n",
+ sc->mfi_max_fw_cmds, i - 1);
+ for (j = 0; j < i; j++) {
+ cm = &sc->mfi_commands[i];
+ bus_dmamap_destroy(sc->mfi_buffer_dmat,
+ cm->cm_dmamap);
+ }
+ free(sc->mfi_commands, M_MFIBUF);
+ sc->mfi_commands = NULL;
+
+ return (ENOMEM);
}
- else
- break;
- sc->mfi_total_cmds++;
}
return (0);
@@ -834,6 +861,29 @@ mfi_release_command(struct mfi_command *cm)
cm->cm_sg->sg32[0].addr = 0;
}
+ /*
+ * Command may be on other queues e.g. busy queue depending on the
+ * flow of a previous call to mfi_mapcmd, so ensure its dequeued
+ * properly
+ */
+ if ((cm->cm_flags & MFI_ON_MFIQ_BUSY) != 0)
+ mfi_remove_busy(cm);
+ if ((cm->cm_flags & MFI_ON_MFIQ_READY) != 0)
+ mfi_remove_ready(cm);
+
+ /* We're not expecting it to be on any other queue but check */
+ if ((cm->cm_flags & MFI_ON_MFIQ_MASK) != 0) {
+ panic("Command %p is still on another queue, flags = %#x",
+ cm, cm->cm_flags);
+ }
+
+ /* tbolt cleanup */
+ if ((cm->cm_flags & MFI_CMD_TBOLT) != 0) {
+ mfi_tbolt_return_cmd(cm->cm_sc,
+ cm->cm_sc->mfi_cmd_pool_tbolt[cm->cm_extra_frames - 1],
+ cm);
+ }
+
hdr_data = (uint32_t *)cm->cm_frame;
hdr_data[0] = 0; /* cmd, sense_len, cmd_status, scsi_status */
hdr_data[1] = 0; /* target_id, lun_id, cdb_len, sg_count */
@@ -916,8 +966,10 @@ mfi_comms_init(struct mfi_softc *sc)
uint32_t context = 0;
mtx_lock(&sc->mfi_io_lock);
- if ((cm = mfi_dequeue_free(sc)) == NULL)
+ if ((cm = mfi_dequeue_free(sc)) == NULL) {
+ mtx_unlock(&sc->mfi_io_lock);
return (EBUSY);
+ }
/* Zero out the MFI frame */
context = cm->cm_frame->header.context;
@@ -946,15 +998,12 @@ mfi_comms_init(struct mfi_softc *sc)
cm->cm_data = NULL;
cm->cm_flags = MFI_CMD_POLLED;
- if ((error = mfi_mapcmd(sc, cm)) != 0) {
+ if ((error = mfi_mapcmd(sc, cm)) != 0)
device_printf(sc->mfi_dev, "failed to send init command\n");
- mtx_unlock(&sc->mfi_io_lock);
- return (error);
- }
mfi_release_command(cm);
mtx_unlock(&sc->mfi_io_lock);
- return (0);
+ return (error);
}
static int
@@ -1005,7 +1054,7 @@ mfi_get_log_state(struct mfi_softc *sc, struct mfi_evt_log_state **log_state)
struct mfi_command *cm = NULL;
int error;
- mtx_lock(&sc->mfi_io_lock);
+ mtx_assert(&sc->mfi_io_lock, MA_OWNED);
error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_EVENT_GETINFO,
(void **)log_state, sizeof(**log_state));
if (error)
@@ -1024,7 +1073,6 @@ mfi_get_log_state(struct mfi_softc *sc, struct mfi_evt_log_state **log_state)
out:
if (cm)
mfi_release_command(cm);
- mtx_unlock(&sc->mfi_io_lock);
return (error);
}
@@ -1037,32 +1085,32 @@ mfi_aen_setup(struct mfi_softc *sc, uint32_t seq_start)
int error = 0;
uint32_t seq;
+ mtx_assert(&sc->mfi_io_lock, MA_OWNED);
+
class_locale.members.reserved = 0;
class_locale.members.locale = mfi_event_locale;
class_locale.members.evt_class = mfi_event_class;
if (seq_start == 0) {
- error = mfi_get_log_state(sc, &log_state);
+ if ((error = mfi_get_log_state(sc, &log_state)) != 0)
+ goto out;
sc->mfi_boot_seq_num = log_state->boot_seq_num;
- if (error) {
- if (log_state)
- free(log_state, M_MFIBUF);
- return (error);
- }
/*
* Walk through any events that fired since the last
* shutdown.
*/
- mfi_parse_entries(sc, log_state->shutdown_seq_num,
- log_state->newest_seq_num);
+ if ((error = mfi_parse_entries(sc, log_state->shutdown_seq_num,
+ log_state->newest_seq_num)) != 0)
+ goto out;
seq = log_state->newest_seq_num;
} else
seq = seq_start;
- mfi_aen_register(sc, seq, class_locale.word);
+ error = mfi_aen_register(sc, seq, class_locale.word);
+out:
free(log_state, M_MFIBUF);
- return 0;
+ return (error);
}
int
@@ -1072,7 +1120,6 @@ mfi_wait_command(struct mfi_softc *sc, struct mfi_command *cm)
mtx_assert(&sc->mfi_io_lock, MA_OWNED);
cm->cm_complete = NULL;
-
/*
* MegaCli can issue a DCMD of 0. In this case do nothing
* and return 0 to it as status
@@ -1100,12 +1147,13 @@ mfi_free(struct mfi_softc *sc)
if (sc->mfi_cdev != NULL)
destroy_dev(sc->mfi_cdev);
- if (sc->mfi_total_cmds != 0) {
- for (i = 0; i < sc->mfi_total_cmds; i++) {
+ if (sc->mfi_commands != NULL) {
+ for (i = 0; i < sc->mfi_max_fw_cmds; i++) {
cm = &sc->mfi_commands[i];
bus_dmamap_destroy(sc->mfi_buffer_dmat, cm->cm_dmamap);
}
free(sc->mfi_commands, M_MFIBUF);
+ sc->mfi_commands = NULL;
}
if (sc->mfi_intr)
@@ -1161,7 +1209,8 @@ mfi_free(struct mfi_softc *sc)
/* End LSIP200113393 */
/* ThunderBolt INIT packet memory Free */
if (sc->mfi_tb_init_busaddr != 0)
- bus_dmamap_unload(sc->mfi_tb_init_dmat, sc->mfi_tb_init_dmamap);
+ bus_dmamap_unload(sc->mfi_tb_init_dmat,
+ sc->mfi_tb_init_dmamap);
if (sc->mfi_tb_init != NULL)
bus_dmamem_free(sc->mfi_tb_init_dmat, sc->mfi_tb_init,
sc->mfi_tb_init_dmamap);
@@ -1178,16 +1227,14 @@ mfi_free(struct mfi_softc *sc)
sc->mfi_tb_ioc_init_dmamap);
if (sc->mfi_tb_ioc_init_dmat != NULL)
bus_dma_tag_destroy(sc->mfi_tb_ioc_init_dmat);
- for (int i = 0; i < sc->mfi_max_fw_cmds; i++) {
- if (sc->mfi_cmd_pool_tbolt != NULL) {
+ if (sc->mfi_cmd_pool_tbolt != NULL) {
+ for (int i = 0; i < sc->mfi_max_fw_cmds; i++) {
if (sc->mfi_cmd_pool_tbolt[i] != NULL) {
free(sc->mfi_cmd_pool_tbolt[i],
M_MFIBUF);
sc->mfi_cmd_pool_tbolt[i] = NULL;
}
}
- }
- if (sc->mfi_cmd_pool_tbolt != NULL) {
free(sc->mfi_cmd_pool_tbolt, M_MFIBUF);
sc->mfi_cmd_pool_tbolt = NULL;
}
@@ -1252,16 +1299,14 @@ restart:
cm->cm_error = 0;
mfi_complete(sc, cm);
}
- if (++ci == (sc->mfi_max_fw_cmds + 1)) {
+ if (++ci == (sc->mfi_max_fw_cmds + 1))
ci = 0;
- }
}
sc->mfi_comms->hw_ci = ci;
/* Give defered I/O a chance to run */
- if (sc->mfi_flags & MFI_FLAGS_QFRZN)
- sc->mfi_flags &= ~MFI_FLAGS_QFRZN;
+ sc->mfi_flags &= ~MFI_FLAGS_QFRZN;
mfi_startio(sc);
mtx_unlock(&sc->mfi_io_lock);
@@ -1284,15 +1329,15 @@ mfi_shutdown(struct mfi_softc *sc)
int error;
- if (sc->mfi_aen_cm)
+ if (sc->mfi_aen_cm != NULL) {
sc->cm_aen_abort = 1;
- if (sc->mfi_aen_cm != NULL)
mfi_abort(sc, &sc->mfi_aen_cm);
+ }
- if (sc->mfi_map_sync_cm)
+ if (sc->mfi_map_sync_cm != NULL) {
sc->cm_map_abort = 1;
- if (sc->mfi_map_sync_cm != NULL)
mfi_abort(sc, &sc->mfi_map_sync_cm);
+ }
mtx_lock(&sc->mfi_io_lock);
error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_SHUTDOWN, NULL, 0);
@@ -1306,9 +1351,8 @@ mfi_shutdown(struct mfi_softc *sc)
cm->cm_flags = MFI_CMD_POLLED;
cm->cm_data = NULL;
- if ((error = mfi_mapcmd(sc, cm)) != 0) {
+ if ((error = mfi_mapcmd(sc, cm)) != 0)
device_printf(sc->mfi_dev, "Failed to shutdown controller\n");
- }
mfi_release_command(cm);
mtx_unlock(&sc->mfi_io_lock);
@@ -1374,8 +1418,10 @@ mfi_syspdprobe(struct mfi_softc *sc)
TAILQ_FOREACH_SAFE(syspd, &sc->mfi_syspd_tqh, pd_link, tmp) {
found = 0;
for (i = 0; i < pdlist->count; i++) {
- if (syspd->pd_id == pdlist->addr[i].device_id)
+ if (syspd->pd_id == pdlist->addr[i].device_id) {
found = 1;
+ break;
+ }
}
if (found == 0) {
printf("DELETE\n");
@@ -1628,6 +1674,8 @@ mfi_aen_register(struct mfi_softc *sc, int seq, int locale)
struct mfi_evt_detail *ed = NULL;
int error = 0;
+ mtx_assert(&sc->mfi_io_lock, MA_OWNED);
+
current_aen.word = locale;
if (sc->mfi_aen_cm != NULL) {
prior_aen.word =
@@ -1646,13 +1694,10 @@ mfi_aen_register(struct mfi_softc *sc, int seq, int locale)
}
}
- mtx_lock(&sc->mfi_io_lock);
error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_EVENT_WAIT,
(void **)&ed, sizeof(*ed));
- mtx_unlock(&sc->mfi_io_lock);
- if (error) {
+ if (error)
goto out;
- }
dcmd = &cm->cm_frame->dcmd;
((uint32_t *)&dcmd->mbox)[0] = seq;
@@ -1663,10 +1708,8 @@ mfi_aen_register(struct mfi_softc *sc, int seq, int locale)
sc->last_seq_num = seq;
sc->mfi_aen_cm = cm;
- mtx_lock(&sc->mfi_io_lock);
mfi_enqueue_ready(cm);
mfi_startio(sc);
- mtx_unlock(&sc->mfi_io_lock);
out:
return (error);
@@ -1684,11 +1727,11 @@ mfi_aen_complete(struct mfi_command *cm)
sc = cm->cm_sc;
mtx_assert(&sc->mfi_io_lock, MA_OWNED);
- hdr = &cm->cm_frame->header;
-
if (sc->mfi_aen_cm == NULL)
return;
+ hdr = &cm->cm_frame->header;
+
if (sc->cm_aen_abort ||
hdr->cmd_status == MFI_STAT_INVALID_STATUS) {
sc->cm_aen_abort = 0;
@@ -1714,16 +1757,13 @@ mfi_aen_complete(struct mfi_command *cm)
}
free(cm->cm_data, M_MFIBUF);
- sc->mfi_aen_cm = NULL;
wakeup(&sc->mfi_aen_cm);
+ sc->mfi_aen_cm = NULL;
mfi_release_command(cm);
/* set it up again so the driver can catch more events */
- if (!aborted) {
- mtx_unlock(&sc->mfi_io_lock);
+ if (!aborted)
mfi_aen_setup(sc, seq);
- mtx_lock(&sc->mfi_io_lock);
- }
}
#define MAX_EVENTS 15
@@ -1737,6 +1777,8 @@ mfi_parse_entries(struct mfi_softc *sc, int start_seq, int stop_seq)
union mfi_evt class_locale;
int error, i, seq, size;
+ mtx_assert(&sc->mfi_io_lock, MA_OWNED);
+
class_locale.members.reserved = 0;
class_locale.members.locale = mfi_event_locale;
class_locale.members.evt_class = mfi_event_class;
@@ -1748,13 +1790,10 @@ mfi_parse_entries(struct mfi_softc *sc, int start_seq, int stop_seq)
return (ENOMEM);
for (seq = start_seq;;) {
- mtx_lock(&sc->mfi_io_lock);
if ((cm = mfi_dequeue_free(sc)) == NULL) {
free(el, M_MFIBUF);
- mtx_unlock(&sc->mfi_io_lock);
return (EBUSY);
}
- mtx_unlock(&sc->mfi_io_lock);
dcmd = &cm->cm_frame->dcmd;
bzero(dcmd->mbox, MFI_MBOX_SIZE);
@@ -1770,38 +1809,30 @@ mfi_parse_entries(struct mfi_softc *sc, int start_seq, int stop_seq)
cm->cm_data = el;
cm->cm_len = size;
- mtx_lock(&sc->mfi_io_lock);
if ((error = mfi_mapcmd(sc, cm)) != 0) {
device_printf(sc->mfi_dev,
"Failed to get controller entries\n");
mfi_release_command(cm);
- mtx_unlock(&sc->mfi_io_lock);
break;
}
- mtx_unlock(&sc->mfi_io_lock);
bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap,
BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap);
if (dcmd->header.cmd_status == MFI_STAT_NOT_FOUND) {
- mtx_lock(&sc->mfi_io_lock);
mfi_release_command(cm);
- mtx_unlock(&sc->mfi_io_lock);
break;
}
if (dcmd->header.cmd_status != MFI_STAT_OK) {
device_printf(sc->mfi_dev,
"Error %d fetching controller entries\n",
dcmd->header.cmd_status);
- mtx_lock(&sc->mfi_io_lock);
mfi_release_command(cm);
- mtx_unlock(&sc->mfi_io_lock);
+ error = EIO;
break;
}
- mtx_lock(&sc->mfi_io_lock);
mfi_release_command(cm);
- mtx_unlock(&sc->mfi_io_lock);
for (i = 0; i < el->count; i++) {
/*
@@ -1817,15 +1848,13 @@ mfi_parse_entries(struct mfi_softc *sc, int start_seq, int stop_seq)
else if (el->event[i].seq < start_seq)
break;
}
- mtx_lock(&sc->mfi_io_lock);
mfi_queue_evt(sc, &el->event[i]);
- mtx_unlock(&sc->mfi_io_lock);
}
seq = el->event[el->count - 1].seq + 1;
}
free(el, M_MFIBUF);
- return (0);
+ return (error);
}
static int
@@ -1942,11 +1971,12 @@ static int mfi_add_sys_pd(struct mfi_softc *sc, int id)
dcmd->mbox[0]=id;
dcmd->header.scsi_status = 0;
dcmd->header.pad0 = 0;
- if (mfi_mapcmd(sc, cm) != 0) {
+ if ((error = mfi_mapcmd(sc, cm)) != 0) {
device_printf(sc->mfi_dev,
"Failed to get physical drive info %d\n", id);
free(pd_info, M_MFIBUF);
- return (0);
+ mfi_release_command(cm);
+ return (error);
}
bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap,
BUS_DMASYNC_POSTREAD);
@@ -2096,6 +2126,8 @@ mfi_build_syspdio(struct mfi_softc *sc, struct bio *bio)
int flags = 0, blkcount = 0, readop;
uint8_t cdb_len;
+ mtx_assert(&sc->mfi_io_lock, MA_OWNED);
+
if ((cm = mfi_dequeue_free(sc)) == NULL)
return (NULL);
@@ -2142,6 +2174,7 @@ mfi_build_syspdio(struct mfi_softc *sc, struct bio *bio)
cm->cm_sg = &pass->sgl;
cm->cm_total_frame_size = MFI_PASS_FRAME_SIZE;
cm->cm_flags = flags;
+
return (cm);
}
@@ -2154,6 +2187,8 @@ mfi_build_ldio(struct mfi_softc *sc, struct bio *bio)
uint32_t blkcount;
uint32_t context = 0;
+ mtx_assert(&sc->mfi_io_lock, MA_OWNED);
+
if ((cm = mfi_dequeue_free(sc)) == NULL)
return (NULL);
@@ -2195,6 +2230,7 @@ mfi_build_ldio(struct mfi_softc *sc, struct bio *bio)
cm->cm_sg = &io->sgl;
cm->cm_total_frame_size = MFI_IO_FRAME_SIZE;
cm->cm_flags = flags;
+
return (cm);
}
@@ -2212,11 +2248,14 @@ mfi_bio_complete(struct mfi_command *cm)
if ((hdr->cmd_status != MFI_STAT_OK) || (hdr->scsi_status != 0)) {
bio->bio_flags |= BIO_ERROR;
bio->bio_error = EIO;
- device_printf(sc->mfi_dev, "I/O error, status= %d "
- "scsi_status= %d\n", hdr->cmd_status, hdr->scsi_status);
+ device_printf(sc->mfi_dev, "I/O error, cmd=%p, status=%#x, "
+ "scsi_status=%#x\n", cm, hdr->cmd_status, hdr->scsi_status);
mfi_print_sense(cm->cm_sc, cm->cm_sense);
} else if (cm->cm_error != 0) {
bio->bio_flags |= BIO_ERROR;
+ bio->bio_error = cm->cm_error;
+ device_printf(sc->mfi_dev, "I/O error, cmd=%p, error=%#x\n",
+ cm, cm->cm_error);
}
mfi_release_command(cm);
@@ -2252,6 +2291,7 @@ mfi_startio(struct mfi_softc *sc)
/* Send the command to the controller */
if (mfi_mapcmd(sc, cm) != 0) {
+ device_printf(sc->mfi_dev, "Failed to startio\n");
mfi_requeue_ready(cm);
break;
}
@@ -2280,10 +2320,7 @@ mfi_mapcmd(struct mfi_softc *sc, struct mfi_command *cm)
return (0);
}
} else {
- if (sc->MFA_enabled)
- error = mfi_tbolt_send_frame(sc, cm);
- else
- error = mfi_send_frame(sc, cm);
+ error = mfi_send_frame(sc, cm);
}
return (error);
@@ -2297,18 +2334,28 @@ mfi_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
union mfi_sgl *sgl;
struct mfi_softc *sc;
int i, j, first, dir;
- int sge_size;
+ int sge_size, locked;
cm = (struct mfi_command *)arg;
sc = cm->cm_sc;
hdr = &cm->cm_frame->header;
sgl = cm->cm_sg;
+ /*
+ * We need to check if we have the lock as this is async
+ * callback so even though our caller mfi_mapcmd asserts
+ * it has the lock, there is no garantee that hasn't been
+ * dropped if bus_dmamap_load returned prior to our
+ * completion.
+ */
+ if ((locked = mtx_owned(&sc->mfi_io_lock)) == 0)
+ mtx_lock(&sc->mfi_io_lock);
+
if (error) {
printf("error %d in callback\n", error);
cm->cm_error = error;
mfi_complete(sc, cm);
- return;
+ goto out;
}
/* Use IEEE sgl only for IO's on a SKINNY controller
* For other commands on a SKINNY controller use either
@@ -2380,10 +2427,17 @@ mfi_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
cm->cm_total_frame_size += (sc->mfi_sge_size * nsegs);
cm->cm_extra_frames = (cm->cm_total_frame_size - 1) / MFI_FRAME_SIZE;
- if (sc->MFA_enabled)
- mfi_tbolt_send_frame(sc, cm);
- else
- mfi_send_frame(sc, cm);
+ if ((error = mfi_send_frame(sc, cm)) != 0) {
+ printf("error %d in callback from mfi_send_frame\n", error);
+ cm->cm_error = error;
+ mfi_complete(sc, cm);
+ goto out;
+ }
+
+out:
+ /* leave the lock in the state we found it */
+ if (locked == 0)
+ mtx_unlock(&sc->mfi_io_lock);
return;
}
@@ -2391,8 +2445,26 @@ mfi_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
static int
mfi_send_frame(struct mfi_softc *sc, struct mfi_command *cm)
{
+ int error;
+
+ mtx_assert(&sc->mfi_io_lock, MA_OWNED);
+
+ if (sc->MFA_enabled)
+ error = mfi_tbolt_send_frame(sc, cm);
+ else
+ error = mfi_std_send_frame(sc, cm);
+
+ if (error != 0 && (cm->cm_flags & MFI_ON_MFIQ_BUSY) != 0)
+ mfi_remove_busy(cm);
+
+ return (error);
+}
+
+static int
+mfi_std_send_frame(struct mfi_softc *sc, struct mfi_command *cm)
+{
struct mfi_frame_header *hdr;
- int tm = MFI_POLL_TIMEOUT_SECS * 1000;
+ int tm = mfi_polled_cmd_timeout * 1000;
hdr = &cm->cm_frame->header;
@@ -2446,6 +2518,7 @@ void
mfi_complete(struct mfi_softc *sc, struct mfi_command *cm)
{
int dir;
+ mtx_assert(&sc->mfi_io_lock, MA_OWNED);
if ((cm->cm_flags & MFI_CMD_MAPPED) != 0) {
dir = 0;
@@ -2473,11 +2546,12 @@ mfi_abort(struct mfi_softc *sc, struct mfi_command **cm_abort)
{
struct mfi_command *cm;
struct mfi_abort_frame *abort;
- int i = 0;
+ int i = 0, error;
uint32_t context = 0;
mtx_lock(&sc->mfi_io_lock);
if ((cm = mfi_dequeue_free(sc)) == NULL) {
+ mtx_unlock(&sc->mfi_io_lock);
return (EBUSY);
}
@@ -2497,7 +2571,8 @@ mfi_abort(struct mfi_softc *sc, struct mfi_command **cm_abort)
cm->cm_data = NULL;
cm->cm_flags = MFI_CMD_POLLED;
- mfi_mapcmd(sc, cm);
+ if ((error = mfi_mapcmd(sc, cm)) != 0)
+ device_printf(sc->mfi_dev, "failed to abort command\n");
mfi_release_command(cm);
mtx_unlock(&sc->mfi_io_lock);
@@ -2513,7 +2588,7 @@ mfi_abort(struct mfi_softc *sc, struct mfi_command **cm_abort)
mtx_unlock(&sc->mfi_io_lock);
}
- return (0);
+ return (error);
}
int
@@ -2551,7 +2626,8 @@ mfi_dump_blocks(struct mfi_softc *sc, int id, uint64_t lba, void *virt,
cm->cm_total_frame_size = MFI_IO_FRAME_SIZE;
cm->cm_flags = MFI_CMD_POLLED | MFI_CMD_DATAOUT;
- error = mfi_mapcmd(sc, cm);
+ if ((error = mfi_mapcmd(sc, cm)) != 0)
+ device_printf(sc->mfi_dev, "failed dump blocks\n");
bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap,
BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap);
@@ -2594,7 +2670,8 @@ mfi_dump_syspd_blocks(struct mfi_softc *sc, int id, uint64_t lba, void *virt,
cm->cm_total_frame_size = MFI_PASS_FRAME_SIZE;
cm->cm_flags = MFI_CMD_POLLED | MFI_CMD_DATAOUT | MFI_CMD_SCSI;
- error = mfi_mapcmd(sc, cm);
+ if ((error = mfi_mapcmd(sc, cm)) != 0)
+ device_printf(sc->mfi_dev, "failed dump blocks\n");
bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap,
BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap);
@@ -3308,8 +3385,10 @@ out:
}
case MFI_SET_AEN:
aen = (struct mfi_ioc_aen *)arg;
+ mtx_lock(&sc->mfi_io_lock);
error = mfi_aen_register(sc, aen->aen_seq_num,
aen->aen_class_locale);
+ mtx_unlock(&sc->mfi_io_lock);
break;
case MFI_LINUX_CMD_2: /* Firmware Linux ioctl shim */
@@ -3638,7 +3717,7 @@ mfi_dump_all(void)
deadline = time_uptime - MFI_CMD_TIMEOUT;
mtx_lock(&sc->mfi_io_lock);
TAILQ_FOREACH(cm, &sc->mfi_busy, cm_link) {
- if (cm->cm_timestamp < deadline) {
+ if (cm->cm_timestamp <= deadline) {
device_printf(sc->mfi_dev,
"COMMAND %p TIMEOUT AFTER %d SECONDS\n",
cm, (int)(time_uptime - cm->cm_timestamp));
@@ -3649,7 +3728,7 @@ mfi_dump_all(void)
#if 0
if (timedout)
- MFI_DUMP_CMDS(SC);
+ MFI_DUMP_CMDS(sc);
#endif
mtx_unlock(&sc->mfi_io_lock);
@@ -3662,7 +3741,7 @@ static void
mfi_timeout(void *data)
{
struct mfi_softc *sc = (struct mfi_softc *)data;
- struct mfi_command *cm;
+ struct mfi_command *cm, *tmp;
time_t deadline;
int timedout = 0;
@@ -3674,10 +3753,10 @@ mfi_timeout(void *data)
}
}
mtx_lock(&sc->mfi_io_lock);
- TAILQ_FOREACH(cm, &sc->mfi_busy, cm_link) {
+ TAILQ_FOREACH_SAFE(cm, &sc->mfi_busy, cm_link, tmp) {
if (sc->mfi_aen_cm == cm || sc->mfi_map_sync_cm == cm)
continue;
- if (cm->cm_timestamp < deadline) {
+ if (cm->cm_timestamp <= deadline) {
if (sc->adpreset != 0 && sc->issuepend_done == 0) {
cm->cm_timestamp = time_uptime;
} else {
@@ -3687,6 +3766,13 @@ mfi_timeout(void *data)
);
MFI_PRINT_CMD(cm);
MFI_VALIDATE_CMD(sc, cm);
+ /*
+ * Fail the command instead of leaving it on
+ * the queue where it could remain stuck forever
+ */
+ mfi_remove_busy(cm);
+ cm->cm_error = ETIMEDOUT;
+ mfi_complete(sc, cm);
timedout++;
}
}
@@ -3694,7 +3780,7 @@ mfi_timeout(void *data)
#if 0
if (timedout)
- MFI_DUMP_CMDS(SC);
+ MFI_DUMP_CMDS(sc);
#endif
mtx_unlock(&sc->mfi_io_lock);
diff --git a/sys/dev/mfi/mfi_cam.c b/sys/dev/mfi/mfi_cam.c
index 325b064..0ea2326 100644
--- a/sys/dev/mfi/mfi_cam.c
+++ b/sys/dev/mfi/mfi_cam.c
@@ -145,6 +145,7 @@ mfip_attach(device_t dev)
MFI_SCSI_MAX_CMDS, sc->devq);
if (sc->sim == NULL) {
cam_simq_free(sc->devq);
+ sc->devq = NULL;
device_printf(dev, "CAM SIM attach failed\n");
return (EINVAL);
}
@@ -155,7 +156,9 @@ mfip_attach(device_t dev)
if (xpt_bus_register(sc->sim, dev, 0) != 0) {
device_printf(dev, "XPT bus registration failed\n");
cam_sim_free(sc->sim, FALSE);
+ sc->sim = NULL;
cam_simq_free(sc->devq);
+ sc->devq = NULL;
mtx_unlock(&mfisc->mfi_io_lock);
return (EINVAL);
}
@@ -187,11 +190,14 @@ mfip_detach(device_t dev)
mtx_lock(&sc->mfi_sc->mfi_io_lock);
xpt_bus_deregister(cam_sim_path(sc->sim));
cam_sim_free(sc->sim, FALSE);
+ sc->sim = NULL;
mtx_unlock(&sc->mfi_sc->mfi_io_lock);
}
- if (sc->devq != NULL)
+ if (sc->devq != NULL) {
cam_simq_free(sc->devq);
+ sc->devq = NULL;
+ }
return (0);
}
diff --git a/sys/dev/mfi/mfi_debug.c b/sys/dev/mfi/mfi_debug.c
index 2e66e19..4aec4f7 100644
--- a/sys/dev/mfi/mfi_debug.c
+++ b/sys/dev/mfi/mfi_debug.c
@@ -57,14 +57,7 @@ __FBSDID("$FreeBSD$");
static void
mfi_print_frame_flags(device_t dev, uint32_t flags)
{
- device_printf(dev, "flags=%b\n", flags,
- "\20"
- "\1NOPOST"
- "\2SGL64"
- "\3SENSE64"
- "\4WRITE"
- "\5READ"
- "\6IEEESGL");
+ device_printf(dev, "flags=%b\n", flags, MFI_FRAME_FMT);
}
static void
@@ -205,16 +198,7 @@ mfi_print_cmd(struct mfi_command *cm)
device_printf(dev, "cm=%p index=%d total_frame_size=%d "
"extra_frames=%d\n", cm, cm->cm_index, cm->cm_total_frame_size,
cm->cm_extra_frames);
- device_printf(dev, "flags=%b\n", cm->cm_flags,
- "\20"
- "\1MAPPED"
- "\2DATAIN"
- "\3DATAOUT"
- "\4COMPLETED"
- "\5POLLED"
- "\6Q_FREE"
- "\7Q_READY"
- "\10Q_BUSY");
+ device_printf(dev, "flags=%b\n", cm->cm_flags, MFI_CMD_FLAGS_FMT);
switch (cm->cm_frame->header.cmd) {
case MFI_CMD_DCMD:
@@ -237,7 +221,7 @@ mfi_dump_cmds(struct mfi_softc *sc)
{
int i;
- for (i = 0; i < sc->mfi_total_cmds; i++)
+ for (i = 0; i < sc->mfi_max_fw_cmds; i++)
mfi_print_generic_frame(sc, &sc->mfi_commands[i]);
}
diff --git a/sys/dev/mfi/mfi_tbolt.c b/sys/dev/mfi/mfi_tbolt.c
index cce63c0..9d29ea0 100644
--- a/sys/dev/mfi/mfi_tbolt.c
+++ b/sys/dev/mfi/mfi_tbolt.c
@@ -55,14 +55,12 @@ __FBSDID("$FreeBSD$");
#include <dev/mfi/mfi_ioctl.h>
#include <dev/mfi/mfivar.h>
-struct mfi_cmd_tbolt *mfi_tbolt_get_cmd(struct mfi_softc *sc);
+struct mfi_cmd_tbolt *mfi_tbolt_get_cmd(struct mfi_softc *sc, struct mfi_command *);
union mfi_mpi2_request_descriptor *
mfi_tbolt_get_request_descriptor(struct mfi_softc *sc, uint16_t index);
void mfi_tbolt_complete_cmd(struct mfi_softc *sc);
int mfi_tbolt_build_io(struct mfi_softc *sc, struct mfi_command *mfi_cmd,
struct mfi_cmd_tbolt *cmd);
-static inline void mfi_tbolt_return_cmd(struct mfi_softc *sc,
- struct mfi_cmd_tbolt *cmd);
union mfi_mpi2_request_descriptor *mfi_tbolt_build_mpt_cmd(struct mfi_softc
*sc, struct mfi_command *cmd);
uint8_t
@@ -84,6 +82,15 @@ static void mfi_queue_map_sync(struct mfi_softc *sc);
#define MFI_FUSION_ENABLE_INTERRUPT_MASK (0x00000008)
+
+extern int mfi_polled_cmd_timeout;
+static int mfi_fw_reset_test = 0;
+#ifdef MFI_DEBUG
+TUNABLE_INT("hw.mfi.fw_reset_test", &mfi_fw_reset_test);
+SYSCTL_INT(_hw_mfi, OID_AUTO, fw_reset_test, CTLFLAG_RWTUN, &mfi_fw_reset_test,
+ 0, "Force a firmware reset condition");
+#endif
+
void
mfi_tbolt_enable_intr_ppc(struct mfi_softc *sc)
{
@@ -162,14 +169,14 @@ mfi_tbolt_adp_reset(struct mfi_softc *sc)
while (!( HostDiag & DIAG_WRITE_ENABLE)) {
for (i = 0; i < 1000; i++);
HostDiag = (uint32_t)MFI_READ4(sc, MFI_HDR);
- device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: retry time=%x, "
- "hostdiag=%x\n", retry, HostDiag);
+ device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: retry time=%d, "
+ "hostdiag=%#x\n", retry, HostDiag);
if (retry++ >= 100)
return 1;
}
- device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: HostDiag=%x\n", HostDiag);
+ device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: HostDiag=%#x\n", HostDiag);
MFI_WRITE4(sc, MFI_HDR, (HostDiag | DIAG_RESET_ADAPTER));
@@ -181,8 +188,8 @@ mfi_tbolt_adp_reset(struct mfi_softc *sc)
while (HostDiag & DIAG_RESET_ADAPTER) {
for (i = 0; i < 1000; i++) ;
HostDiag = (uint32_t)MFI_READ4(sc, MFI_RSR);
- device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: retry time=%x, "
- "hostdiag=%x\n", retry, HostDiag);
+ device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: retry time=%d, "
+ "hostdiag=%#x\n", retry, HostDiag);
if (retry++ >= 1000)
return 1;
@@ -311,6 +318,8 @@ mfi_tbolt_init_desc_pool(struct mfi_softc *sc, uint8_t* mem_location,
sc->sg_frame_busaddr = sc->reply_frame_busaddr + offset;
/* initialize the last_reply_idx to 0 */
sc->last_reply_idx = 0;
+ MFI_WRITE4(sc, MFI_RFPI, sc->mfi_max_fw_cmds - 1);
+ MFI_WRITE4(sc, MFI_RPI, sc->last_reply_idx);
offset = (sc->sg_frame_busaddr + (MEGASAS_MAX_SZ_CHAIN_FRAME *
sc->mfi_max_fw_cmds)) - sc->mfi_tb_busaddr;
if (offset > tbolt_contg_length)
@@ -327,30 +336,35 @@ int
mfi_tbolt_init_MFI_queue(struct mfi_softc *sc)
{
struct MPI2_IOC_INIT_REQUEST *mpi2IocInit;
- struct mfi_init_frame *mfi_init;
+ struct mfi_init_frame *mfi_init;
uintptr_t offset = 0;
bus_addr_t phyAddress;
MFI_ADDRESS *mfiAddressTemp;
- struct mfi_command *cm;
+ struct mfi_command *cm, cmd_tmp;
int error;
- mpi2IocInit = (struct MPI2_IOC_INIT_REQUEST *)sc->mfi_tb_ioc_init_desc;
+ mtx_assert(&sc->mfi_io_lock, MA_OWNED);
+
/* Check if initialization is already completed */
if (sc->MFA_enabled) {
+ device_printf(sc->mfi_dev, "tbolt_init already initialised!\n");
return 1;
}
- mtx_lock(&sc->mfi_io_lock);
if ((cm = mfi_dequeue_free(sc)) == NULL) {
- mtx_unlock(&sc->mfi_io_lock);
+ device_printf(sc->mfi_dev, "tbolt_init failed to get command "
+ " entry!\n");
return (EBUSY);
}
+
+ cmd_tmp.cm_frame = cm->cm_frame;
+ cmd_tmp.cm_frame_busaddr = cm->cm_frame_busaddr;
+ cmd_tmp.cm_dmamap = cm->cm_dmamap;
+
cm->cm_frame = (union mfi_frame *)((uintptr_t)sc->mfi_tb_init);
cm->cm_frame_busaddr = sc->mfi_tb_init_busaddr;
cm->cm_dmamap = sc->mfi_tb_init_dmamap;
cm->cm_frame->header.context = 0;
- cm->cm_sc = sc;
- cm->cm_index = 0;
/*
* Abuse the SG list area of the frame to hold the init_qinfo
@@ -358,6 +372,7 @@ mfi_tbolt_init_MFI_queue(struct mfi_softc *sc)
*/
mfi_init = &cm->cm_frame->init;
+ mpi2IocInit = (struct MPI2_IOC_INIT_REQUEST *)sc->mfi_tb_ioc_init_desc;
bzero(mpi2IocInit, sizeof(struct MPI2_IOC_INIT_REQUEST));
mpi2IocInit->Function = MPI2_FUNCTION_IOC_INIT;
mpi2IocInit->WhoInit = MPI2_WHOINIT_HOST_DRIVER;
@@ -411,23 +426,25 @@ mfi_tbolt_init_MFI_queue(struct mfi_softc *sc)
if ((error = mfi_mapcmd(sc, cm)) != 0) {
device_printf(sc->mfi_dev, "failed to send IOC init2 "
"command %d at %lx\n", error, (long)cm->cm_frame_busaddr);
- mfi_release_command(cm);
- mtx_unlock(&sc->mfi_io_lock);
- return (error);
+ goto out;
}
- mfi_release_command(cm);
- mtx_unlock(&sc->mfi_io_lock);
- if (mfi_init->header.cmd_status == 0) {
+ if (mfi_init->header.cmd_status == MFI_STAT_OK) {
sc->MFA_enabled = 1;
- }
- else {
- device_printf(sc->mfi_dev, "Init command Failed %x\n",
+ } else {
+ device_printf(sc->mfi_dev, "Init command Failed %#x\n",
mfi_init->header.cmd_status);
- return 1;
+ error = mfi_init->header.cmd_status;
+ goto out;
}
- return 0;
+out:
+ cm->cm_frame = cmd_tmp.cm_frame;
+ cm->cm_frame_busaddr = cmd_tmp.cm_frame_busaddr;
+ cm->cm_dmamap = cmd_tmp.cm_dmamap;
+ mfi_release_command(cm);
+
+ return (error);
}
@@ -447,13 +464,21 @@ mfi_tbolt_alloc_cmd(struct mfi_softc *sc)
sc->request_desc_pool = malloc(sizeof(
union mfi_mpi2_request_descriptor) * sc->mfi_max_fw_cmds,
M_MFIBUF, M_NOWAIT|M_ZERO);
+
+ if (sc->request_desc_pool == NULL) {
+ device_printf(sc->mfi_dev, "Could not alloc "
+ "memory for request_desc_pool\n");
+ return (ENOMEM);
+ }
+
sc->mfi_cmd_pool_tbolt = malloc(sizeof(struct mfi_cmd_tbolt*)
* sc->mfi_max_fw_cmds, M_MFIBUF, M_NOWAIT|M_ZERO);
- if (!sc->mfi_cmd_pool_tbolt) {
- device_printf(sc->mfi_dev, "out of memory. Could not alloc "
- "memory for cmd_list_fusion\n");
- return 1;
+ if (sc->mfi_cmd_pool_tbolt == NULL) {
+ free(sc->request_desc_pool, M_MFIBUF);
+ device_printf(sc->mfi_dev, "Could not alloc "
+ "memory for cmd_pool_tbolt\n");
+ return (ENOMEM);
}
for (i = 0; i < sc->mfi_max_fw_cmds; i++) {
@@ -461,20 +486,24 @@ mfi_tbolt_alloc_cmd(struct mfi_softc *sc)
struct mfi_cmd_tbolt),M_MFIBUF, M_NOWAIT|M_ZERO);
if (!sc->mfi_cmd_pool_tbolt[i]) {
- device_printf(sc->mfi_dev, "Could not alloc cmd list "
- "fusion\n");
+ device_printf(sc->mfi_dev, "Could not alloc "
+ "cmd_pool_tbolt entry\n");
for (j = 0; j < i; j++)
free(sc->mfi_cmd_pool_tbolt[j], M_MFIBUF);
+ free(sc->request_desc_pool, M_MFIBUF);
+ sc->request_desc_pool = NULL;
free(sc->mfi_cmd_pool_tbolt, M_MFIBUF);
sc->mfi_cmd_pool_tbolt = NULL;
+
+ return (ENOMEM);
}
}
/*
* The first 256 bytes (SMID 0) is not used. Don't add to the cmd
- *list
+ * list
*/
io_req_base = sc->request_message_pool_align
+ MEGASAS_THUNDERBOLT_NEW_MSG_SIZE;
@@ -520,7 +549,8 @@ mfi_tbolt_reset(struct mfi_softc *sc)
if (sc->mfi_flags & MFI_FLAGS_TBOLT) {
fw_state = sc->mfi_read_fw_status(sc);
- if ((fw_state & MFI_FWSTATE_FAULT) == MFI_FWSTATE_FAULT) {
+ if ((fw_state & MFI_FWSTATE_FAULT) == MFI_FWSTATE_FAULT ||
+ mfi_fw_reset_test) {
if ((sc->disableOnlineCtrlReset == 0)
&& (sc->adpreset == 0)) {
device_printf(sc->mfi_dev, "Adapter RESET "
@@ -554,8 +584,7 @@ mfi_intr_tbolt(void *arg)
return;
mtx_lock(&sc->mfi_io_lock);
mfi_tbolt_complete_cmd(sc);
- if (sc->mfi_flags & MFI_FLAGS_QFRZN)
- sc->mfi_flags &= ~MFI_FLAGS_QFRZN;
+ sc->mfi_flags &= ~MFI_FLAGS_QFRZN;
mfi_startio(sc);
mtx_unlock(&sc->mfi_io_lock);
return;
@@ -573,58 +602,63 @@ map_tbolt_cmd_status(struct mfi_command *mfi_cmd, uint8_t status,
uint8_t ext_status)
{
switch (status) {
- case MFI_STAT_OK:
- mfi_cmd->cm_frame->header.cmd_status = MFI_STAT_OK;
- mfi_cmd->cm_frame->dcmd.header.cmd_status = MFI_STAT_OK;
- mfi_cmd->cm_error = MFI_STAT_OK;
- break;
-
- case MFI_STAT_SCSI_IO_FAILED:
- case MFI_STAT_LD_INIT_IN_PROGRESS:
- mfi_cmd->cm_frame->header.cmd_status = status;
- mfi_cmd->cm_frame->header.scsi_status = ext_status;
- mfi_cmd->cm_frame->dcmd.header.cmd_status = status;
- mfi_cmd->cm_frame->dcmd.header.scsi_status
- = ext_status;
- break;
-
- case MFI_STAT_SCSI_DONE_WITH_ERROR:
- mfi_cmd->cm_frame->header.cmd_status = ext_status;
- mfi_cmd->cm_frame->dcmd.header.cmd_status = ext_status;
- break;
-
- case MFI_STAT_LD_OFFLINE:
- case MFI_STAT_DEVICE_NOT_FOUND:
- mfi_cmd->cm_frame->header.cmd_status = status;
- mfi_cmd->cm_frame->dcmd.header.cmd_status = status;
- break;
-
- default:
- mfi_cmd->cm_frame->header.cmd_status = status;
- mfi_cmd->cm_frame->dcmd.header.cmd_status = status;
- break;
- }
+ case MFI_STAT_OK:
+ mfi_cmd->cm_frame->header.cmd_status = MFI_STAT_OK;
+ mfi_cmd->cm_frame->dcmd.header.cmd_status = MFI_STAT_OK;
+ mfi_cmd->cm_error = MFI_STAT_OK;
+ break;
+
+ case MFI_STAT_SCSI_IO_FAILED:
+ case MFI_STAT_LD_INIT_IN_PROGRESS:
+ mfi_cmd->cm_frame->header.cmd_status = status;
+ mfi_cmd->cm_frame->header.scsi_status = ext_status;
+ mfi_cmd->cm_frame->dcmd.header.cmd_status = status;
+ mfi_cmd->cm_frame->dcmd.header.scsi_status
+ = ext_status;
+ break;
+
+ case MFI_STAT_SCSI_DONE_WITH_ERROR:
+ mfi_cmd->cm_frame->header.cmd_status = ext_status;
+ mfi_cmd->cm_frame->dcmd.header.cmd_status = ext_status;
+ break;
+
+ case MFI_STAT_LD_OFFLINE:
+ case MFI_STAT_DEVICE_NOT_FOUND:
+ mfi_cmd->cm_frame->header.cmd_status = status;
+ mfi_cmd->cm_frame->dcmd.header.cmd_status = status;
+ break;
+
+ default:
+ mfi_cmd->cm_frame->header.cmd_status = status;
+ mfi_cmd->cm_frame->dcmd.header.cmd_status = status;
+ break;
+ }
}
/*
* mfi_tbolt_return_cmd - Return a cmd to free command pool
* @instance: Adapter soft state
- * @cmd: Command packet to be returned to free command pool
+ * @tbolt_cmd: Tbolt command packet to be returned to free command pool
+ * @mfi_cmd: Oning MFI command packe
*/
-static inline void
-mfi_tbolt_return_cmd(struct mfi_softc *sc, struct mfi_cmd_tbolt *cmd)
+void
+mfi_tbolt_return_cmd(struct mfi_softc *sc, struct mfi_cmd_tbolt *tbolt_cmd,
+ struct mfi_command *mfi_cmd)
{
mtx_assert(&sc->mfi_io_lock, MA_OWNED);
- cmd->sync_cmd_idx = sc->mfi_max_fw_cmds;
- TAILQ_INSERT_TAIL(&sc->mfi_cmd_tbolt_tqh, cmd, next);
+ mfi_cmd->cm_flags &= ~MFI_CMD_TBOLT;
+ mfi_cmd->cm_extra_frames = 0;
+ tbolt_cmd->sync_cmd_idx = sc->mfi_max_fw_cmds;
+
+ TAILQ_INSERT_TAIL(&sc->mfi_cmd_tbolt_tqh, tbolt_cmd, next);
}
void
mfi_tbolt_complete_cmd(struct mfi_softc *sc)
{
struct mfi_mpi2_reply_header *desc, *reply_desc;
- struct mfi_command *cmd_mfi, *cmd_mfi_check; /* For MFA Cmds */
+ struct mfi_command *cmd_mfi; /* For MFA Cmds */
struct mfi_cmd_tbolt *cmd_tbolt;
uint16_t smid;
uint8_t reply_descript_type;
@@ -632,14 +666,17 @@ mfi_tbolt_complete_cmd(struct mfi_softc *sc)
uint32_t status, extStatus;
uint16_t num_completed;
union desc_value val;
+ mtx_assert(&sc->mfi_io_lock, MA_OWNED);
desc = (struct mfi_mpi2_reply_header *)
((uintptr_t)sc->reply_frame_pool_align
+ sc->last_reply_idx * sc->reply_size);
reply_desc = desc;
- if (!reply_desc)
+ if (reply_desc == NULL) {
device_printf(sc->mfi_dev, "reply desc is NULL!!\n");
+ return;
+ }
reply_descript_type = reply_desc->ReplyFlags
& MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK;
@@ -652,13 +689,18 @@ mfi_tbolt_complete_cmd(struct mfi_softc *sc)
/* Read Reply descriptor */
while ((val.u.low != 0xFFFFFFFF) && (val.u.high != 0xFFFFFFFF)) {
smid = reply_desc->SMID;
- if (!smid || smid > sc->mfi_max_fw_cmds + 1) {
- device_printf(sc->mfi_dev, "smid is %x. Cannot "
- "proceed. Returning \n", smid);
- return;
+ if (smid == 0 || smid > sc->mfi_max_fw_cmds) {
+ device_printf(sc->mfi_dev, "smid is %d cannot "
+ "proceed - skipping\n", smid);
+ goto next;
}
-
cmd_tbolt = sc->mfi_cmd_pool_tbolt[smid - 1];
+ if (cmd_tbolt->sync_cmd_idx == sc->mfi_max_fw_cmds) {
+ device_printf(sc->mfi_dev, "cmd_tbolt %p "
+ "has invalid sync_cmd_idx=%d - skipping\n",
+ cmd_tbolt, cmd_tbolt->sync_cmd_idx);
+ goto next;
+ }
cmd_mfi = &sc->mfi_commands[cmd_tbolt->sync_cmd_idx];
scsi_io_req = cmd_tbolt->io_request;
@@ -666,33 +708,30 @@ mfi_tbolt_complete_cmd(struct mfi_softc *sc)
extStatus = cmd_mfi->cm_frame->dcmd.header.scsi_status;
map_tbolt_cmd_status(cmd_mfi, status, extStatus);
- if (cmd_mfi->cm_flags & MFI_CMD_SCSI &&
+ /* mfi_tbolt_return_cmd is handled by mfi complete / return */
+ if ((cmd_mfi->cm_flags & MFI_CMD_SCSI) != 0 &&
(cmd_mfi->cm_flags & MFI_CMD_POLLED) != 0) {
/* polled LD/SYSPD IO command */
- mfi_tbolt_return_cmd(sc, cmd_tbolt);
/* XXX mark okay for now DJA */
cmd_mfi->cm_frame->header.cmd_status = MFI_STAT_OK;
- } else {
+ } else {
/* remove command from busy queue if not polled */
- TAILQ_FOREACH(cmd_mfi_check, &sc->mfi_busy, cm_link) {
- if (cmd_mfi_check == cmd_mfi) {
- mfi_remove_busy(cmd_mfi);
- break;
- }
- }
+ if ((cmd_mfi->cm_flags & MFI_ON_MFIQ_BUSY) != 0)
+ mfi_remove_busy(cmd_mfi);
/* complete the command */
mfi_complete(sc, cmd_mfi);
- mfi_tbolt_return_cmd(sc, cmd_tbolt);
}
+next:
sc->last_reply_idx++;
if (sc->last_reply_idx >= sc->mfi_max_fw_cmds) {
MFI_WRITE4(sc, MFI_RPI, sc->last_reply_idx);
sc->last_reply_idx = 0;
}
- /*set it back to all 0xfff.*/
+
+ /* Set it back to all 0xfff */
((union mfi_mpi2_reply_descriptor*)desc)->words =
~((uint64_t)0x00);
@@ -728,17 +767,23 @@ mfi_tbolt_complete_cmd(struct mfi_softc *sc)
*/
struct mfi_cmd_tbolt *
-mfi_tbolt_get_cmd(struct mfi_softc *sc)
+mfi_tbolt_get_cmd(struct mfi_softc *sc, struct mfi_command *mfi_cmd)
{
struct mfi_cmd_tbolt *cmd = NULL;
mtx_assert(&sc->mfi_io_lock, MA_OWNED);
- cmd = TAILQ_FIRST(&sc->mfi_cmd_tbolt_tqh);
+ if ((cmd = TAILQ_FIRST(&sc->mfi_cmd_tbolt_tqh)) == NULL)
+ return (NULL);
TAILQ_REMOVE(&sc->mfi_cmd_tbolt_tqh, cmd, next);
memset((uint8_t *)cmd->sg_frame, 0, MEGASAS_MAX_SZ_CHAIN_FRAME);
memset((uint8_t *)cmd->io_request, 0,
MEGASAS_THUNDERBOLT_NEW_MSG_SIZE);
+
+ cmd->sync_cmd_idx = mfi_cmd->cm_index;
+ mfi_cmd->cm_extra_frames = cmd->index; /* Frame count used as SMID */
+ mfi_cmd->cm_flags |= MFI_CMD_TBOLT;
+
return cmd;
}
@@ -767,11 +812,9 @@ mfi_build_mpt_pass_thru(struct mfi_softc *sc, struct mfi_command *mfi_cmd)
struct mfi_mpi2_request_raid_scsi_io *io_req;
struct mfi_cmd_tbolt *cmd;
- cmd = mfi_tbolt_get_cmd(sc);
+ cmd = mfi_tbolt_get_cmd(sc, mfi_cmd);
if (!cmd)
return EBUSY;
- mfi_cmd->cm_extra_frames = cmd->index; /* Frame count used as SMID */
- cmd->sync_cmd_idx = mfi_cmd->cm_index;
io_req = cmd->io_request;
mpi25_ieee_chain = (MPI25_IEEE_SGE_CHAIN64 *)&io_req->SGL.IeeeChain;
@@ -980,16 +1023,21 @@ mfi_build_and_issue_cmd(struct mfi_softc *sc, struct mfi_command *mfi_cmd)
struct mfi_cmd_tbolt *cmd;
union mfi_mpi2_request_descriptor *req_desc = NULL;
uint16_t index;
- cmd = mfi_tbolt_get_cmd(sc);
- if (!cmd)
- return NULL;
- mfi_cmd->cm_extra_frames = cmd->index;
- cmd->sync_cmd_idx = mfi_cmd->cm_index;
+ cmd = mfi_tbolt_get_cmd(sc, mfi_cmd);
+ if (cmd == NULL)
+ return (NULL);
index = cmd->index;
req_desc = mfi_tbolt_get_request_descriptor(sc, index-1);
- if (mfi_tbolt_build_io(sc, mfi_cmd, cmd))
- return NULL;
+ if (req_desc == NULL) {
+ mfi_tbolt_return_cmd(sc, cmd, mfi_cmd);
+ return (NULL);
+ }
+
+ if (mfi_tbolt_build_io(sc, mfi_cmd, cmd) != 0) {
+ mfi_tbolt_return_cmd(sc, cmd, mfi_cmd);
+ return (NULL);
+ }
req_desc->header.SMID = index;
return req_desc;
}
@@ -1008,7 +1056,7 @@ mfi_tbolt_build_mpt_cmd(struct mfi_softc *sc, struct mfi_command *cmd)
index = cmd->cm_extra_frames;
req_desc = mfi_tbolt_get_request_descriptor(sc, index - 1);
- if (!req_desc)
+ if (req_desc == NULL)
return NULL;
bzero(req_desc, sizeof(*req_desc));
@@ -1024,7 +1072,7 @@ mfi_tbolt_send_frame(struct mfi_softc *sc, struct mfi_command *cm)
struct mfi_frame_header *hdr;
uint8_t *cdb;
union mfi_mpi2_request_descriptor *req_desc = NULL;
- int tm = MFI_POLL_TIMEOUT_SECS * 1000;
+ int tm = mfi_polled_cmd_timeout * 1000;
hdr = &cm->cm_frame->header;
cdb = cm->cm_frame->pass.cdb;
@@ -1058,9 +1106,8 @@ mfi_tbolt_send_frame(struct mfi_softc *sc, struct mfi_command *cm)
return 1;
}
} else if ((req_desc = mfi_tbolt_build_mpt_cmd(sc, cm)) == NULL) {
- device_printf(sc->mfi_dev, "Mapping from MFI to MPT "
- "Failed\n");
- return 1;
+ device_printf(sc->mfi_dev, "Mapping from MFI to MPT Failed\n");
+ return (1);
}
if (cm->cm_flags & MFI_CMD_SCSI) {
@@ -1078,23 +1125,30 @@ mfi_tbolt_send_frame(struct mfi_softc *sc, struct mfi_command *cm)
if ((cm->cm_flags & MFI_CMD_POLLED) == 0)
return 0;
- if (cm->cm_flags & MFI_CMD_SCSI) {
- /* check reply queue */
- mfi_tbolt_complete_cmd(sc);
- }
-
- /* This is a polled command, so busy-wait for it to complete. */
+ /*
+ * This is a polled command, so busy-wait for it to complete.
+ *
+ * The value of hdr->cmd_status is updated directly by the hardware
+ * so there is no garantee that mfi_tbolt_complete_cmd is called
+ * prior to this value changing.
+ */
while (hdr->cmd_status == MFI_STAT_INVALID_STATUS) {
DELAY(1000);
tm -= 1;
if (tm <= 0)
break;
if (cm->cm_flags & MFI_CMD_SCSI) {
- /* check reply queue */
+ /*
+ * Force check reply queue.
+ * This ensures that dump works correctly
+ */
mfi_tbolt_complete_cmd(sc);
}
}
+ /* ensure the command cleanup has been processed before returning */
+ mfi_tbolt_complete_cmd(sc);
+
if (hdr->cmd_status == MFI_STAT_INVALID_STATUS) {
device_printf(sc->mfi_dev, "Frame %p timed out "
"command 0x%X\n", hdr, cm->cm_frame->dcmd.opcode);
@@ -1104,9 +1158,10 @@ mfi_tbolt_send_frame(struct mfi_softc *sc, struct mfi_command *cm)
}
static void
-mfi_issue_pending_cmds_again (struct mfi_softc *sc)
+mfi_issue_pending_cmds_again(struct mfi_softc *sc)
{
struct mfi_command *cm, *tmp;
+ struct mfi_cmd_tbolt *cmd;
mtx_assert(&sc->mfi_io_lock, MA_OWNED);
TAILQ_FOREACH_REVERSE_SAFE(cm, &sc->mfi_busy, BUSYQ, cm_link, tmp) {
@@ -1119,50 +1174,51 @@ mfi_issue_pending_cmds_again (struct mfi_softc *sc)
* should be performed on the controller
*/
if (cm->retry_for_fw_reset == 3) {
- device_printf(sc->mfi_dev, "megaraid_sas: command %d "
- "was tried multiple times during adapter reset"
- "Shutting down the HBA\n", cm->cm_index);
+ device_printf(sc->mfi_dev, "megaraid_sas: command %p "
+ "index=%d was tried multiple times during adapter "
+ "reset - Shutting down the HBA\n", cm, cm->cm_index);
mfi_kill_hba(sc);
sc->hw_crit_error = 1;
return;
}
- if ((cm->cm_flags & MFI_ON_MFIQ_BUSY) != 0) {
- struct mfi_cmd_tbolt *cmd;
- mfi_remove_busy(cm);
- cmd = sc->mfi_cmd_pool_tbolt[cm->cm_extra_frames -
- 1 ];
- mfi_tbolt_return_cmd(sc, cmd);
- if ((cm->cm_flags & MFI_ON_MFIQ_MASK) == 0) {
- if (cm->cm_frame->dcmd.opcode !=
- MFI_DCMD_CTRL_EVENT_WAIT) {
- device_printf(sc->mfi_dev,
- "APJ ****requeue command %d \n",
- cm->cm_index);
- mfi_requeue_ready(cm);
- }
+ mfi_remove_busy(cm);
+ if ((cm->cm_flags & MFI_CMD_TBOLT) != 0) {
+ if (cm->cm_extra_frames != 0 && cm->cm_extra_frames <=
+ sc->mfi_max_fw_cmds) {
+ cmd = sc->mfi_cmd_pool_tbolt[cm->cm_extra_frames - 1];
+ mfi_tbolt_return_cmd(sc, cmd, cm);
+ } else {
+ device_printf(sc->mfi_dev,
+ "Invalid extra_frames: %d detected\n",
+ cm->cm_extra_frames);
}
- else
- mfi_release_command(cm);
}
+
+ if (cm->cm_frame->dcmd.opcode != MFI_DCMD_CTRL_EVENT_WAIT) {
+ device_printf(sc->mfi_dev,
+ "APJ ****requeue command %p index=%d\n",
+ cm, cm->cm_index);
+ mfi_requeue_ready(cm);
+ } else
+ mfi_release_command(cm);
}
mfi_startio(sc);
}
static void
-mfi_kill_hba (struct mfi_softc *sc)
+mfi_kill_hba(struct mfi_softc *sc)
{
if (sc->mfi_flags & MFI_FLAGS_TBOLT)
- MFI_WRITE4 (sc, 0x00,MFI_STOP_ADP);
+ MFI_WRITE4(sc, 0x00, MFI_STOP_ADP);
else
- MFI_WRITE4 (sc, MFI_IDB,MFI_STOP_ADP);
+ MFI_WRITE4(sc, MFI_IDB, MFI_STOP_ADP);
}
static void
mfi_process_fw_state_chg_isr(void *arg)
{
struct mfi_softc *sc= (struct mfi_softc *)arg;
- struct mfi_cmd_tbolt *cmd;
int error, status;
if (sc->adpreset == 1) {
@@ -1191,26 +1247,32 @@ mfi_process_fw_state_chg_isr(void *arg)
device_printf(sc->mfi_dev, "controller is not in "
"ready state\n");
mfi_kill_hba(sc);
- sc->hw_crit_error= 1;
- return ;
+ sc->hw_crit_error = 1;
+ return;
+ }
+ if ((error = mfi_tbolt_init_MFI_queue(sc)) != 0) {
+ device_printf(sc->mfi_dev, "Failed to initialise MFI "
+ "queue\n");
+ mfi_kill_hba(sc);
+ sc->hw_crit_error = 1;
+ return;
}
- if ((error = mfi_tbolt_init_MFI_queue(sc)) != 0)
- return;
- mtx_lock(&sc->mfi_io_lock);
+ /* Init last reply index and max */
+ MFI_WRITE4(sc, MFI_RFPI, sc->mfi_max_fw_cmds - 1);
+ MFI_WRITE4(sc, MFI_RPI, sc->last_reply_idx);
sc->mfi_enable_intr(sc);
sc->adpreset = 0;
- free(sc->mfi_aen_cm->cm_data, M_MFIBUF);
- mfi_remove_busy(sc->mfi_aen_cm);
- cmd = sc->mfi_cmd_pool_tbolt[sc->mfi_aen_cm->cm_extra_frames
- - 1];
- mfi_tbolt_return_cmd(sc, cmd);
- if (sc->mfi_aen_cm) {
+ if (sc->mfi_aen_cm != NULL) {
+ free(sc->mfi_aen_cm->cm_data, M_MFIBUF);
+ mfi_remove_busy(sc->mfi_aen_cm);
mfi_release_command(sc->mfi_aen_cm);
sc->mfi_aen_cm = NULL;
}
- if (sc->mfi_map_sync_cm) {
+
+ if (sc->mfi_map_sync_cm != NULL) {
+ mfi_remove_busy(sc->mfi_map_sync_cm);
mfi_release_command(sc->mfi_map_sync_cm);
sc->mfi_map_sync_cm = NULL;
}
@@ -1223,9 +1285,12 @@ mfi_process_fw_state_chg_isr(void *arg)
*/
if (!sc->hw_crit_error) {
/*
- * Initiate AEN (Asynchronous Event Notification)
+ * Initiate AEN (Asynchronous Event Notification) &
+ * Sync Map
*/
mfi_aen_setup(sc, sc->last_seq_num);
+ mfi_tbolt_sync_map_info(sc);
+
sc->issuepend_done = 1;
device_printf(sc->mfi_dev, "second stage of reset "
"complete, FW is ready now.\n");
@@ -1237,7 +1302,6 @@ mfi_process_fw_state_chg_isr(void *arg)
device_printf(sc->mfi_dev, "mfi_process_fw_state_chg_isr "
"called with unhandled value:%d\n", sc->adpreset);
}
- mtx_unlock(&sc->mfi_io_lock);
}
/*
@@ -1276,25 +1340,27 @@ void
mfi_tbolt_sync_map_info(struct mfi_softc *sc)
{
int error = 0, i;
- struct mfi_command *cmd;
- struct mfi_dcmd_frame *dcmd;
+ struct mfi_command *cmd = NULL;
+ struct mfi_dcmd_frame *dcmd = NULL;
uint32_t context = 0;
- union mfi_ld_ref *ld_sync;
+ union mfi_ld_ref *ld_sync = NULL;
size_t ld_size;
struct mfi_frame_header *hdr;
struct mfi_command *cm = NULL;
struct mfi_ld_list *list = NULL;
+ mtx_assert(&sc->mfi_io_lock, MA_OWNED);
+
if (sc->mfi_map_sync_cm != NULL || sc->cm_map_abort)
return;
- mtx_lock(&sc->mfi_io_lock);
error = mfi_dcmd_command(sc, &cm, MFI_DCMD_LD_GET_LIST,
(void **)&list, sizeof(*list));
if (error)
goto out;
cm->cm_flags = MFI_CMD_POLLED | MFI_CMD_DATAIN;
+
if (mfi_wait_command(sc, cm) != 0) {
device_printf(sc->mfi_dev, "Failed to get device listing\n");
goto out;
@@ -1308,18 +1374,15 @@ mfi_tbolt_sync_map_info(struct mfi_softc *sc)
}
ld_size = sizeof(*ld_sync) * list->ld_count;
- mtx_unlock(&sc->mfi_io_lock);
ld_sync = (union mfi_ld_ref *) malloc(ld_size, M_MFIBUF,
- M_WAITOK | M_ZERO);
+ M_NOWAIT | M_ZERO);
if (ld_sync == NULL) {
device_printf(sc->mfi_dev, "Failed to allocate sync\n");
goto out;
}
- for (i = 0; i < list->ld_count; i++) {
+ for (i = 0; i < list->ld_count; i++)
ld_sync[i].ref = list->ld_list[i].ld.ref;
- }
- mtx_lock(&sc->mfi_io_lock);
if ((cmd = mfi_dequeue_free(sc)) == NULL) {
device_printf(sc->mfi_dev, "Failed to get command\n");
free(ld_sync, M_MFIBUF);
@@ -1355,7 +1418,7 @@ mfi_tbolt_sync_map_info(struct mfi_softc *sc)
device_printf(sc->mfi_dev, "failed to send map sync\n");
free(ld_sync, M_MFIBUF);
sc->mfi_map_sync_cm = NULL;
- mfi_requeue_ready(cmd);
+ mfi_release_command(cmd);
goto out;
}
@@ -1364,7 +1427,6 @@ out:
free(list, M_MFIBUF);
if (cm)
mfi_release_command(cm);
- mtx_unlock(&sc->mfi_io_lock);
}
static void
@@ -1389,14 +1451,13 @@ mfi_sync_map_complete(struct mfi_command *cm)
}
free(cm->cm_data, M_MFIBUF);
- sc->mfi_map_sync_cm = NULL;
wakeup(&sc->mfi_map_sync_cm);
+ sc->mfi_map_sync_cm = NULL;
mfi_release_command(cm);
/* set it up again so the driver can catch more events */
- if (!aborted) {
+ if (!aborted)
mfi_queue_map_sync(sc);
- }
}
static void
@@ -1412,5 +1473,7 @@ mfi_handle_map_sync(void *context, int pending)
struct mfi_softc *sc;
sc = context;
+ mtx_lock(&sc->mfi_io_lock);
mfi_tbolt_sync_map_info(sc);
+ mtx_unlock(&sc->mfi_io_lock);
}
diff --git a/sys/dev/mfi/mfireg.h b/sys/dev/mfi/mfireg.h
index dab9cf7..52ddafe 100644
--- a/sys/dev/mfi/mfireg.h
+++ b/sys/dev/mfi/mfireg.h
@@ -86,6 +86,7 @@ __FBSDID("$FreeBSD$");
* ThunderBolt specific Register
*/
+#define MFI_RFPI 0x48 /* reply_free_post_host_index */
#define MFI_RPI 0x6c /* reply_post_host_index */
#define MFI_ILQP 0xc0 /* inbound_low_queue_port */
#define MFI_IHQP 0xc4 /* inbound_high_queue_port */
@@ -259,6 +260,13 @@ typedef enum {
#define MFI_FRAME_DIR_READ 0x0010
#define MFI_FRAME_DIR_BOTH 0x0018
#define MFI_FRAME_IEEE_SGL 0x0020
+#define MFI_FRAME_FMT "\20" \
+ "\1NOPOST" \
+ "\2SGL64" \
+ "\3SENSE64" \
+ "\4WRITE" \
+ "\5READ" \
+ "\6IEEESGL"
/* ThunderBolt Specific */
@@ -456,8 +464,8 @@ typedef enum {
#define MFI_FRAME_SIZE 64
#define MFI_MBOX_SIZE 12
-/* Firmware flashing can take 40s */
-#define MFI_POLL_TIMEOUT_SECS 50
+/* Firmware flashing can take 50+ seconds */
+#define MFI_POLL_TIMEOUT_SECS 60
/* Allow for speedier math calculations */
#define MFI_SECTOR_LEN 512
diff --git a/sys/dev/mfi/mfivar.h b/sys/dev/mfi/mfivar.h
index bb2a324..664ede9 100644
--- a/sys/dev/mfi/mfivar.h
+++ b/sys/dev/mfi/mfivar.h
@@ -102,12 +102,25 @@ struct mfi_command {
#define MFI_CMD_DATAOUT (1<<2)
#define MFI_CMD_COMPLETED (1<<3)
#define MFI_CMD_POLLED (1<<4)
-#define MFI_ON_MFIQ_FREE (1<<5)
-#define MFI_ON_MFIQ_READY (1<<6)
-#define MFI_ON_MFIQ_BUSY (1<<7)
-#define MFI_ON_MFIQ_MASK ((1<<5)|(1<<6)|(1<<7))
-#define MFI_CMD_SCSI (1<<8)
-#define MFI_CMD_CCB (1<<9)
+#define MFI_CMD_SCSI (1<<5)
+#define MFI_CMD_CCB (1<<6)
+#define MFI_CMD_TBOLT (1<<7)
+#define MFI_ON_MFIQ_FREE (1<<8)
+#define MFI_ON_MFIQ_READY (1<<9)
+#define MFI_ON_MFIQ_BUSY (1<<10)
+#define MFI_ON_MFIQ_MASK (MFI_ON_MFIQ_FREE | MFI_ON_MFIQ_READY| \
+ MFI_ON_MFIQ_BUSY)
+#define MFI_CMD_FLAGS_FMT "\20" \
+ "\1MAPPED" \
+ "\2DATAIN" \
+ "\3DATAOUT" \
+ "\4COMPLETED" \
+ "\5POLLED" \
+ "\6SCSI" \
+ "\7TBOLT" \
+ "\10Q_FREE" \
+ "\11Q_READY" \
+ "\12Q_BUSY"
uint8_t retry_for_fw_reset;
void (* cm_complete)(struct mfi_command *cm);
void *cm_private;
@@ -268,10 +281,6 @@ struct mfi_softc {
*/
struct mfi_command *mfi_commands;
/*
- * How many commands were actually allocated
- */
- int mfi_total_cmds;
- /*
* How many commands the firmware can handle. Also how big the reply
* queue is, minus 1.
*/
@@ -470,9 +479,8 @@ extern int mfi_build_cdb(int, uint8_t, u_int64_t, u_int32_t, uint8_t *);
mfi_enqueue_ ## name (struct mfi_command *cm) \
{ \
if ((cm->cm_flags & MFI_ON_MFIQ_MASK) != 0) { \
- printf("command %p is on another queue, " \
+ panic("command %p is on another queue, " \
"flags = %#x\n", cm, cm->cm_flags); \
- panic("command is on another queue"); \
} \
TAILQ_INSERT_TAIL(&cm->cm_sc->mfi_ ## name, cm, cm_link); \
cm->cm_flags |= MFI_ON_ ## index; \
@@ -482,9 +490,8 @@ extern int mfi_build_cdb(int, uint8_t, u_int64_t, u_int32_t, uint8_t *);
mfi_requeue_ ## name (struct mfi_command *cm) \
{ \
if ((cm->cm_flags & MFI_ON_MFIQ_MASK) != 0) { \
- printf("command %p is on another queue, " \
+ panic("command %p is on another queue, " \
"flags = %#x\n", cm, cm->cm_flags); \
- panic("command is on another queue"); \
} \
TAILQ_INSERT_HEAD(&cm->cm_sc->mfi_ ## name, cm, cm_link); \
cm->cm_flags |= MFI_ON_ ## index; \
@@ -497,10 +504,9 @@ extern int mfi_build_cdb(int, uint8_t, u_int64_t, u_int32_t, uint8_t *);
\
if ((cm = TAILQ_FIRST(&sc->mfi_ ## name)) != NULL) { \
if ((cm->cm_flags & MFI_ON_ ## index) == 0) { \
- printf("command %p not in queue, " \
+ panic("command %p not in queue, " \
"flags = %#x, bit = %#x\n", cm, \
cm->cm_flags, MFI_ON_ ## index); \
- panic("command not in queue"); \
} \
TAILQ_REMOVE(&sc->mfi_ ## name, cm, cm_link); \
cm->cm_flags &= ~MFI_ON_ ## index; \
@@ -512,10 +518,9 @@ extern int mfi_build_cdb(int, uint8_t, u_int64_t, u_int32_t, uint8_t *);
mfi_remove_ ## name (struct mfi_command *cm) \
{ \
if ((cm->cm_flags & MFI_ON_ ## index) == 0) { \
- printf("command %p not in queue, flags = %#x, " \
+ panic("command %p not in queue, flags = %#x, " \
"bit = %#x\n", cm, cm->cm_flags, \
MFI_ON_ ## index); \
- panic("command not in queue"); \
} \
TAILQ_REMOVE(&cm->cm_sc->mfi_ ## name, cm, cm_link); \
cm->cm_flags &= ~MFI_ON_ ## index; \
@@ -608,7 +613,8 @@ SYSCTL_DECL(_hw_mfi);
#ifdef MFI_DEBUG
extern void mfi_print_cmd(struct mfi_command *cm);
extern void mfi_dump_cmds(struct mfi_softc *sc);
-extern void mfi_validate_sg(struct mfi_softc *, struct mfi_command *, const char *, int );
+extern void mfi_validate_sg(struct mfi_softc *, struct mfi_command *,
+ const char *, int);
#define MFI_PRINT_CMD(cm) mfi_print_cmd(cm)
#define MFI_DUMP_CMDS(sc) mfi_dump_cmds(sc)
#define MFI_VALIDATE_CMD(sc, cm) mfi_validate_sg(sc, cm, __FUNCTION__, __LINE__)
@@ -618,6 +624,8 @@ extern void mfi_validate_sg(struct mfi_softc *, struct mfi_command *, const char
#define MFI_VALIDATE_CMD(sc, cm)
#endif
-extern void mfi_release_command(struct mfi_command *cm);
+extern void mfi_release_command(struct mfi_command *);
+extern void mfi_tbolt_return_cmd(struct mfi_softc *,
+ struct mfi_cmd_tbolt *, struct mfi_command *);
#endif /* _MFIVAR_H */
diff --git a/sys/dev/msk/if_msk.c b/sys/dev/msk/if_msk.c
index d0ca808..664575c 100644
--- a/sys/dev/msk/if_msk.c
+++ b/sys/dev/msk/if_msk.c
@@ -1695,6 +1695,12 @@ msk_attach(device_t dev)
ifp->if_capabilities |= IFCAP_VLAN_HWCSUM;
}
ifp->if_capenable = ifp->if_capabilities;
+ /*
+ * Disable RX checksum offloading on controllers that don't use
+ * new descriptor format but give chance to enable it.
+ */
+ if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0)
+ ifp->if_capenable &= ~IFCAP_RXCSUM;
/*
* Tell the upper layer(s) we support long frames.
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 66da0d0..6d110ab 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1269,6 +1269,15 @@ brelse(struct buf *bp)
KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)),
("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
+ if (BUF_LOCKRECURSED(bp)) {
+ /*
+ * Do not process, in particular, do not handle the
+ * B_INVAL/B_RELBUF and do not release to free list.
+ */
+ BUF_UNLOCK(bp);
+ return;
+ }
+
if (bp->b_flags & B_MANAGED) {
bqrelse(bp);
return;
@@ -1445,12 +1454,6 @@ brelse(struct buf *bp)
brelvp(bp);
}
- if (BUF_LOCKRECURSED(bp)) {
- /* do not release to free list */
- BUF_UNLOCK(bp);
- return;
- }
-
/* enqueue */
mtx_lock(&bqlock);
/* Handle delayed bremfree() processing. */
@@ -2682,6 +2685,9 @@ loop:
/* We timed out or were interrupted. */
else if (error)
return (NULL);
+ /* If recursed, assume caller knows the rules. */
+ else if (BUF_LOCKRECURSED(bp))
+ goto end;
/*
* The buffer is locked. B_CACHE is cleared if the buffer is
@@ -2865,6 +2871,7 @@ loop:
}
CTR4(KTR_BUF, "getblk(%p, %ld, %d) = %p", vp, (long)blkno, size, bp);
BUF_ASSERT_HELD(bp);
+end:
KASSERT(bp->b_bufobj == bo,
("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
return (bp);
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index b54dc04..0696edd 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -385,6 +385,7 @@ extern int vttoif_tab[];
#define SKIPSYSTEM 0x0001 /* vflush: skip vnodes marked VSYSTEM */
#define FORCECLOSE 0x0002 /* vflush: force file closure */
#define WRITECLOSE 0x0004 /* vflush: only close writable files */
+#define EARLYFLUSH 0x0008 /* vflush: early call for ffs_flushfiles */
#define V_SAVE 0x0001 /* vinvalbuf: sync file first */
#define V_ALT 0x0002 /* vinvalbuf: invalidate only alternate bufs */
#define V_NORMAL 0x0004 /* vinvalbuf: invalidate only regular bufs */
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index abe4073..789a7cf 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -1790,6 +1790,17 @@ fail:
return (0);
}
+static inline struct buf *
+getinobuf(struct inode *ip, u_int cg, u_int32_t cginoblk, int gbflags)
+{
+ struct fs *fs;
+
+ fs = ip->i_fs;
+ return (getblk(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs,
+ cg * fs->fs_ipg + cginoblk)), (int)fs->fs_bsize, 0, 0,
+ gbflags));
+}
+
/*
* Determine whether an inode can be allocated.
*
@@ -1814,9 +1825,11 @@ ffs_nodealloccg(ip, cg, ipref, mode, unused)
u_int8_t *inosused, *loc;
struct ufs2_dinode *dp2;
int error, start, len, i;
+ u_int32_t old_initediblk;
fs = ip->i_fs;
ump = ip->i_ump;
+check_nifree:
if (fs->fs_cs(fs, cg).cs_nifree == 0)
return (0);
UFS_UNLOCK(ump);
@@ -1828,13 +1841,13 @@ ffs_nodealloccg(ip, cg, ipref, mode, unused)
return (0);
}
cgp = (struct cg *)bp->b_data;
+restart:
if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
brelse(bp);
UFS_LOCK(ump);
return (0);
}
bp->b_xflags |= BX_BKGRDWRITE;
- cgp->cg_old_time = cgp->cg_time = time_second;
inosused = cg_inosused(cgp);
if (ipref) {
ipref %= fs->fs_ipg;
@@ -1856,7 +1869,6 @@ ffs_nodealloccg(ip, cg, ipref, mode, unused)
}
}
ipref = (loc - inosused) * NBBY + ffs(~*loc) - 1;
- cgp->cg_irotor = ipref;
gotit:
/*
* Check to see if we need to initialize more inodes.
@@ -1864,9 +1876,37 @@ gotit:
if (fs->fs_magic == FS_UFS2_MAGIC &&
ipref + INOPB(fs) > cgp->cg_initediblk &&
cgp->cg_initediblk < cgp->cg_niblk) {
- ibp = getblk(ip->i_devvp, fsbtodb(fs,
- ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)),
- (int)fs->fs_bsize, 0, 0, 0);
+ old_initediblk = cgp->cg_initediblk;
+
+ /*
+ * Free the cylinder group lock before writing the
+ * initialized inode block. Entering the
+ * babarrierwrite() with the cylinder group lock
+ * causes lock order violation between the lock and
+ * snaplk.
+ *
+ * Another thread can decide to initialize the same
+ * inode block, but whichever thread first gets the
+ * cylinder group lock after writing the newly
+ * allocated inode block will update it and the other
+ * will realize that it has lost and leave the
+ * cylinder group unchanged.
+ */
+ ibp = getinobuf(ip, cg, old_initediblk, GB_LOCK_NOWAIT);
+ brelse(bp);
+ if (ibp == NULL) {
+ /*
+ * The inode block buffer is already owned by
+ * another thread, which must initialize it.
+ * Wait on the buffer to allow another thread
+ * to finish the updates, with dropped cg
+ * buffer lock, then retry.
+ */
+ ibp = getinobuf(ip, cg, old_initediblk, 0);
+ brelse(ibp);
+ UFS_LOCK(ump);
+ goto check_nifree;
+ }
bzero(ibp->b_data, (int)fs->fs_bsize);
dp2 = (struct ufs2_dinode *)(ibp->b_data);
for (i = 0; i < INOPB(fs); i++) {
@@ -1883,8 +1923,29 @@ gotit:
* loading of newly created filesystems.
*/
babarrierwrite(ibp);
- cgp->cg_initediblk += INOPB(fs);
+
+ /*
+ * After the inode block is written, try to update the
+ * cg initediblk pointer. If another thread beat us
+ * to it, then leave it unchanged as the other thread
+ * has already set it correctly.
+ */
+ error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+ (int)fs->fs_cgsize, NOCRED, &bp);
+ UFS_LOCK(ump);
+ ACTIVECLEAR(fs, cg);
+ UFS_UNLOCK(ump);
+ if (error != 0) {
+ brelse(bp);
+ return (error);
+ }
+ cgp = (struct cg *)bp->b_data;
+ if (cgp->cg_initediblk == old_initediblk)
+ cgp->cg_initediblk += INOPB(fs);
+ goto restart;
}
+ cgp->cg_old_time = cgp->cg_time = time_second;
+ cgp->cg_irotor = ipref;
UFS_LOCK(ump);
ACTIVECLEAR(fs, cg);
setbit(inosused, ipref);
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 16fe134..e39fd46 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -1908,7 +1908,12 @@ softdep_flushfiles(oldmnt, flags, td)
int flags;
struct thread *td;
{
- int error, depcount, loopcnt, retry_flush_count, retry;
+#ifdef QUOTA
+ struct ufsmount *ump;
+ int i;
+#endif
+ int error, early, depcount, loopcnt, retry_flush_count, retry;
+ int morework;
loopcnt = 10;
retry_flush_count = 3;
@@ -1926,7 +1931,9 @@ retry_flush:
* Do another flush in case any vnodes were brought in
* as part of the cleanup operations.
*/
- if ((error = ffs_flushfiles(oldmnt, flags, td)) != 0)
+ early = retry_flush_count == 1 || (oldmnt->mnt_kern_flag &
+ MNTK_UNMOUNT) == 0 ? 0 : EARLYFLUSH;
+ if ((error = ffs_flushfiles(oldmnt, flags | early, td)) != 0)
break;
if ((error = softdep_flushworklist(oldmnt, &depcount, td)) != 0 ||
depcount == 0)
@@ -1950,7 +1957,17 @@ retry_flush:
MNT_ILOCK(oldmnt);
KASSERT((oldmnt->mnt_kern_flag & MNTK_NOINSMNTQ) != 0,
("softdep_flushfiles: !MNTK_NOINSMNTQ"));
- if (oldmnt->mnt_nvnodelistsize > 0) {
+ morework = oldmnt->mnt_nvnodelistsize > 0;
+#ifdef QUOTA
+ ump = VFSTOUFS(oldmnt);
+ UFS_LOCK(ump);
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if (ump->um_quotas[i] != NULLVP)
+ morework = 1;
+ }
+ UFS_UNLOCK(ump);
+#endif
+ if (morework) {
if (--retry_flush_count > 0) {
retry = 1;
loopcnt = 3;
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 0204613..b3292d0 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -1351,9 +1351,10 @@ ffs_flushfiles(mp, flags, td)
struct thread *td;
{
struct ufsmount *ump;
- int error;
+ int qerror, error;
ump = VFSTOUFS(mp);
+ qerror = 0;
#ifdef QUOTA
if (mp->mnt_flag & MNT_QUOTA) {
int i;
@@ -1361,11 +1362,19 @@ ffs_flushfiles(mp, flags, td)
if (error)
return (error);
for (i = 0; i < MAXQUOTAS; i++) {
- quotaoff(td, mp, i);
+ error = quotaoff(td, mp, i);
+ if (error != 0) {
+ if ((flags & EARLYFLUSH) == 0)
+ return (error);
+ else
+ qerror = error;
+ }
}
+
/*
- * Here we fall through to vflush again to ensure
- * that we have gotten rid of all the system vnodes.
+ * Here we fall through to vflush again to ensure that
+ * we have gotten rid of all the system vnodes, unless
+ * quotas must not be closed.
*/
}
#endif
@@ -1380,11 +1389,21 @@ ffs_flushfiles(mp, flags, td)
* that we have gotten rid of all the system vnodes.
*/
}
- /*
- * Flush all the files.
+
+ /*
+ * Do not close system files if quotas were not closed, to be
+ * able to sync the remaining dquots. The freeblks softupdate
+ * workitems might hold a reference on a dquot, preventing
+ * quotaoff() from completing. Next round of
+ * softdep_flushworklist() iteration should process the
+ * blockers, allowing the next run of quotaoff() to finally
+ * flush held dquots.
+ *
+ * Otherwise, flush all the files.
*/
- if ((error = vflush(mp, 0, flags, td)) != 0)
+ if (qerror == 0 && (error = vflush(mp, 0, flags, td)) != 0)
return (error);
+
/*
* Flush filesystem metadata.
*/
diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c
index 87ac9a1..a949898 100644
--- a/sys/ufs/ufs/ufs_quota.c
+++ b/sys/ufs/ufs/ufs_quota.c
@@ -80,7 +80,7 @@ static int dqopen(struct vnode *, struct ufsmount *, int);
static int dqget(struct vnode *,
u_long, struct ufsmount *, int, struct dquot **);
static int dqsync(struct vnode *, struct dquot *);
-static void dqflush(struct vnode *);
+static int dqflush(struct vnode *);
static int quotaoff1(struct thread *td, struct mount *mp, int type);
static int quotaoff_inchange(struct thread *td, struct mount *mp, int type);
@@ -674,8 +674,12 @@ again:
vrele(vp);
}
- dqflush(qvp);
- /* Clear um_quotas before closing the quota vnode to prevent
+ error = dqflush(qvp);
+ if (error != 0)
+ return (error);
+
+ /*
+ * Clear um_quotas before closing the quota vnode to prevent
* access to the closed vnode from dqget/dqsync
*/
UFS_LOCK(ump);
@@ -1594,17 +1598,19 @@ out:
/*
* Flush all entries from the cache for a particular vnode.
*/
-static void
+static int
dqflush(struct vnode *vp)
{
struct dquot *dq, *nextdq;
struct dqhash *dqh;
+ int error;
/*
* Move all dquot's that used to refer to this quota
* file off their hash chains (they will eventually
* fall off the head of the free list and be re-used).
*/
+ error = 0;
DQH_LOCK();
for (dqh = &dqhashtbl[dqhash]; dqh >= dqhashtbl; dqh--) {
for (dq = LIST_FIRST(dqh); dq; dq = nextdq) {
@@ -1612,12 +1618,15 @@ dqflush(struct vnode *vp)
if (dq->dq_ump->um_quotas[dq->dq_type] != vp)
continue;
if (dq->dq_cnt)
- panic("dqflush: stray dquot");
- LIST_REMOVE(dq, dq_hash);
- dq->dq_ump = (struct ufsmount *)0;
+ error = EBUSY;
+ else {
+ LIST_REMOVE(dq, dq_hash);
+ dq->dq_ump = NULL;
+ }
}
}
DQH_UNLOCK();
+ return (error);
}
/*
OpenPOWER on IntegriCloud