diff options
author | attilio <attilio@FreeBSD.org> | 2013-02-27 18:17:34 +0000 |
---|---|---|
committer | attilio <attilio@FreeBSD.org> | 2013-02-27 18:17:34 +0000 |
commit | 52c57fbbdb554a7ce0cdbb6bf27051ef70834bdf (patch) | |
tree | d0908474209a17865e044675940a2f62f9ff2493 /sys | |
parent | c74a3afc6a5d7d1ced989c36d4ba0a7d2bbc43b9 (diff) | |
download | FreeBSD-src-52c57fbbdb554a7ce0cdbb6bf27051ef70834bdf.zip FreeBSD-src-52c57fbbdb554a7ce0cdbb6bf27051ef70834bdf.tar.gz |
MFC
Diffstat (limited to 'sys')
25 files changed, 1064 insertions, 571 deletions
diff --git a/sys/arm/ti/ti_gpio.c b/sys/arm/ti/ti_gpio.c index 58de516..4edb10e 100644 --- a/sys/arm/ti/ti_gpio.c +++ b/sys/arm/ti/ti_gpio.c @@ -653,6 +653,9 @@ ti_gpio_attach(device_t dev) struct ti_gpio_softc *sc = device_get_softc(dev); unsigned int i; int err = 0; + int pin; + uint32_t flags; + uint32_t reg_oe; sc->sc_dev = dev; @@ -720,6 +723,17 @@ ti_gpio_attach(device_t dev) /* Disable interrupts for all pins */ ti_gpio_write_4(sc, i, TI_GPIO_CLEARIRQENABLE1, 0xffffffff); ti_gpio_write_4(sc, i, TI_GPIO_CLEARIRQENABLE2, 0xffffffff); + + /* Init OE register based on pads configuration */ + reg_oe = 0xffffffff; + for (pin = 0; pin < 32; pin++) { + ti_scm_padconf_get_gpioflags( + PINS_PER_BANK*i + pin, &flags); + if (flags & GPIO_PIN_OUTPUT) + reg_oe &= ~(1U << pin); + } + + ti_gpio_write_4(sc, i, TI_GPIO_OE, reg_oe); } } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c index e81dc02..d6651f9 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c @@ -48,6 +48,14 @@ uint64_t metaslab_aliquot = 512ULL << 10; uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */ /* + * The in-core space map representation is more compact than its on-disk form. + * The zfs_condense_pct determines how much more compact the in-core + * space_map representation must be before we compact it on-disk. + * Values should be greater than or equal to 100. + */ +int zfs_condense_pct = 200; + +/* * This value defines the number of allowed allocation failures per vdev. * If a device reaches this threshold in a given txg then we consider skipping * allocations on that device. @@ -215,9 +223,9 @@ metaslab_compare(const void *x1, const void *x2) /* * If the weights are identical, use the offset to force uniqueness. */ - if (m1->ms_map.sm_start < m2->ms_map.sm_start) + if (m1->ms_map->sm_start < m2->ms_map->sm_start) return (-1); - if (m1->ms_map.sm_start > m2->ms_map.sm_start) + if (m1->ms_map->sm_start > m2->ms_map->sm_start) return (1); ASSERT3P(m1, ==, m2); @@ -732,14 +740,15 @@ metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo, * addition of new space; and for debugging, it ensures that we'd * data fault on any attempt to use this metaslab before it's ready. */ - space_map_create(&msp->ms_map, start, size, + msp->ms_map = kmem_zalloc(sizeof (space_map_t), KM_SLEEP); + space_map_create(msp->ms_map, start, size, vd->vdev_ashift, &msp->ms_lock); metaslab_group_add(mg, msp); if (metaslab_debug && smo->smo_object != 0) { mutex_enter(&msp->ms_lock); - VERIFY(space_map_load(&msp->ms_map, mg->mg_class->mc_ops, + VERIFY(space_map_load(msp->ms_map, mg->mg_class->mc_ops, SM_FREE, smo, spa_meta_objset(vd->vdev_spa)) == 0); mutex_exit(&msp->ms_lock); } @@ -767,22 +776,27 @@ metaslab_fini(metaslab_t *msp) metaslab_group_t *mg = msp->ms_group; vdev_space_update(mg->mg_vd, - -msp->ms_smo.smo_alloc, 0, -msp->ms_map.sm_size); + -msp->ms_smo.smo_alloc, 0, -msp->ms_map->sm_size); metaslab_group_remove(mg, msp); mutex_enter(&msp->ms_lock); - space_map_unload(&msp->ms_map); - space_map_destroy(&msp->ms_map); + space_map_unload(msp->ms_map); + space_map_destroy(msp->ms_map); + kmem_free(msp->ms_map, sizeof (*msp->ms_map)); for (int t = 0; t < TXG_SIZE; t++) { - space_map_destroy(&msp->ms_allocmap[t]); - space_map_destroy(&msp->ms_freemap[t]); + space_map_destroy(msp->ms_allocmap[t]); + space_map_destroy(msp->ms_freemap[t]); + kmem_free(msp->ms_allocmap[t], sizeof (*msp->ms_allocmap[t])); + kmem_free(msp->ms_freemap[t], sizeof (*msp->ms_freemap[t])); } - for (int t = 0; t < TXG_DEFER_SIZE; t++) - space_map_destroy(&msp->ms_defermap[t]); + for (int t = 0; t < TXG_DEFER_SIZE; t++) { + space_map_destroy(msp->ms_defermap[t]); + kmem_free(msp->ms_defermap[t], sizeof (*msp->ms_defermap[t])); + } ASSERT0(msp->ms_deferspace); @@ -801,7 +815,7 @@ static uint64_t metaslab_weight(metaslab_t *msp) { metaslab_group_t *mg = msp->ms_group; - space_map_t *sm = &msp->ms_map; + space_map_t *sm = msp->ms_map; space_map_obj_t *smo = &msp->ms_smo; vdev_t *vd = mg->mg_vd; uint64_t weight, space; @@ -809,6 +823,16 @@ metaslab_weight(metaslab_t *msp) ASSERT(MUTEX_HELD(&msp->ms_lock)); /* + * This vdev is in the process of being removed so there is nothing + * for us to do here. + */ + if (vd->vdev_removing) { + ASSERT0(smo->smo_alloc); + ASSERT0(vd->vdev_ms_shift); + return (0); + } + + /* * The baseline weight is the metaslab's free space. */ space = sm->sm_size - smo->smo_alloc; @@ -861,7 +885,7 @@ metaslab_prefetch(metaslab_group_t *mg) * Prefetch the next potential metaslabs */ for (msp = avl_first(t), m = 0; msp; msp = AVL_NEXT(t, msp), m++) { - space_map_t *sm = &msp->ms_map; + space_map_t *sm = msp->ms_map; space_map_obj_t *smo = &msp->ms_smo; /* If we have reached our prefetch limit then we're done */ @@ -882,7 +906,7 @@ static int metaslab_activate(metaslab_t *msp, uint64_t activation_weight) { metaslab_group_t *mg = msp->ms_group; - space_map_t *sm = &msp->ms_map; + space_map_t *sm = msp->ms_map; space_map_ops_t *sm_ops = msp->ms_group->mg_class->mc_ops; ASSERT(MUTEX_HELD(&msp->ms_lock)); @@ -899,7 +923,7 @@ metaslab_activate(metaslab_t *msp, uint64_t activation_weight) return (error); } for (int t = 0; t < TXG_DEFER_SIZE; t++) - space_map_walk(&msp->ms_defermap[t], + space_map_walk(msp->ms_defermap[t], space_map_claim, sm); } @@ -930,12 +954,158 @@ metaslab_passivate(metaslab_t *msp, uint64_t size) * this metaslab again. In that case, it had better be empty, * or we would be leaving space on the table. */ - ASSERT(size >= SPA_MINBLOCKSIZE || msp->ms_map.sm_space == 0); + ASSERT(size >= SPA_MINBLOCKSIZE || msp->ms_map->sm_space == 0); metaslab_group_sort(msp->ms_group, msp, MIN(msp->ms_weight, size)); ASSERT((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0); } /* + * Determine if the in-core space map representation can be condensed on-disk. + * We would like to use the following criteria to make our decision: + * + * 1. The size of the space map object should not dramatically increase as a + * result of writing out our in-core free map. + * + * 2. The minimal on-disk space map representation is zfs_condense_pct/100 + * times the size than the in-core representation (i.e. zfs_condense_pct = 110 + * and in-core = 1MB, minimal = 1.1.MB). + * + * Checking the first condition is tricky since we don't want to walk + * the entire AVL tree calculating the estimated on-disk size. Instead we + * use the size-ordered AVL tree in the space map and calculate the + * size required for the largest segment in our in-core free map. If the + * size required to represent that segment on disk is larger than the space + * map object then we avoid condensing this map. + * + * To determine the second criterion we use a best-case estimate and assume + * each segment can be represented on-disk as a single 64-bit entry. We refer + * to this best-case estimate as the space map's minimal form. + */ +static boolean_t +metaslab_should_condense(metaslab_t *msp) +{ + space_map_t *sm = msp->ms_map; + space_map_obj_t *smo = &msp->ms_smo_syncing; + space_seg_t *ss; + uint64_t size, entries, segsz; + + ASSERT(MUTEX_HELD(&msp->ms_lock)); + ASSERT(sm->sm_loaded); + + /* + * Use the sm_pp_root AVL tree, which is ordered by size, to obtain + * the largest segment in the in-core free map. If the tree is + * empty then we should condense the map. + */ + ss = avl_last(sm->sm_pp_root); + if (ss == NULL) + return (B_TRUE); + + /* + * Calculate the number of 64-bit entries this segment would + * require when written to disk. If this single segment would be + * larger on-disk than the entire current on-disk structure, then + * clearly condensing will increase the on-disk structure size. + */ + size = (ss->ss_end - ss->ss_start) >> sm->sm_shift; + entries = size / (MIN(size, SM_RUN_MAX)); + segsz = entries * sizeof (uint64_t); + + return (segsz <= smo->smo_objsize && + smo->smo_objsize >= (zfs_condense_pct * + sizeof (uint64_t) * avl_numnodes(&sm->sm_root)) / 100); +} + +/* + * Condense the on-disk space map representation to its minimized form. + * The minimized form consists of a small number of allocations followed by + * the in-core free map. + */ +static void +metaslab_condense(metaslab_t *msp, uint64_t txg, dmu_tx_t *tx) +{ + spa_t *spa = msp->ms_group->mg_vd->vdev_spa; + space_map_t *freemap = msp->ms_freemap[txg & TXG_MASK]; + space_map_t condense_map; + space_map_t *sm = msp->ms_map; + objset_t *mos = spa_meta_objset(spa); + space_map_obj_t *smo = &msp->ms_smo_syncing; + + ASSERT(MUTEX_HELD(&msp->ms_lock)); + ASSERT3U(spa_sync_pass(spa), ==, 1); + ASSERT(sm->sm_loaded); + + spa_dbgmsg(spa, "condensing: txg %llu, msp[%llu] %p, " + "smo size %llu, segments %lu", txg, + (msp->ms_map->sm_start / msp->ms_map->sm_size), msp, + smo->smo_objsize, avl_numnodes(&sm->sm_root)); + + /* + * Create an map that is a 100% allocated map. We remove segments + * that have been freed in this txg, any deferred frees that exist, + * and any allocation in the future. Removing segments should be + * a relatively inexpensive operation since we expect these maps to + * a small number of nodes. + */ + space_map_create(&condense_map, sm->sm_start, sm->sm_size, + sm->sm_shift, sm->sm_lock); + space_map_add(&condense_map, condense_map.sm_start, + condense_map.sm_size); + + /* + * Remove what's been freed in this txg from the condense_map. + * Since we're in sync_pass 1, we know that all the frees from + * this txg are in the freemap. + */ + space_map_walk(freemap, space_map_remove, &condense_map); + + for (int t = 0; t < TXG_DEFER_SIZE; t++) + space_map_walk(msp->ms_defermap[t], + space_map_remove, &condense_map); + + for (int t = 1; t < TXG_CONCURRENT_STATES; t++) + space_map_walk(msp->ms_allocmap[(txg + t) & TXG_MASK], + space_map_remove, &condense_map); + + /* + * We're about to drop the metaslab's lock thus allowing + * other consumers to change it's content. Set the + * space_map's sm_condensing flag to ensure that + * allocations on this metaslab do not occur while we're + * in the middle of committing it to disk. This is only critical + * for the ms_map as all other space_maps use per txg + * views of their content. + */ + sm->sm_condensing = B_TRUE; + + mutex_exit(&msp->ms_lock); + space_map_truncate(smo, mos, tx); + mutex_enter(&msp->ms_lock); + + /* + * While we would ideally like to create a space_map representation + * that consists only of allocation records, doing so can be + * prohibitively expensive because the in-core free map can be + * large, and therefore computationally expensive to subtract + * from the condense_map. Instead we sync out two maps, a cheap + * allocation only map followed by the in-core free map. While not + * optimal, this is typically close to optimal, and much cheaper to + * compute. + */ + space_map_sync(&condense_map, SM_ALLOC, smo, mos, tx); + space_map_vacate(&condense_map, NULL, NULL); + space_map_destroy(&condense_map); + + space_map_sync(sm, SM_FREE, smo, mos, tx); + sm->sm_condensing = B_FALSE; + + spa_dbgmsg(spa, "condensed: txg %llu, msp[%llu] %p, " + "smo size %llu", txg, + (msp->ms_map->sm_start / msp->ms_map->sm_size), msp, + smo->smo_objsize); +} + +/* * Write a metaslab to disk in the context of the specified transaction group. */ void @@ -944,17 +1114,29 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) vdev_t *vd = msp->ms_group->mg_vd; spa_t *spa = vd->vdev_spa; objset_t *mos = spa_meta_objset(spa); - space_map_t *allocmap = &msp->ms_allocmap[txg & TXG_MASK]; - space_map_t *freemap = &msp->ms_freemap[txg & TXG_MASK]; - space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK]; - space_map_t *sm = &msp->ms_map; + space_map_t *allocmap = msp->ms_allocmap[txg & TXG_MASK]; + space_map_t **freemap = &msp->ms_freemap[txg & TXG_MASK]; + space_map_t **freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK]; + space_map_t *sm = msp->ms_map; space_map_obj_t *smo = &msp->ms_smo_syncing; dmu_buf_t *db; dmu_tx_t *tx; ASSERT(!vd->vdev_ishole); - if (allocmap->sm_space == 0 && freemap->sm_space == 0) + /* + * This metaslab has just been added so there's no work to do now. + */ + if (*freemap == NULL) { + ASSERT3P(allocmap, ==, NULL); + return; + } + + ASSERT3P(allocmap, !=, NULL); + ASSERT3P(*freemap, !=, NULL); + ASSERT3P(*freed_map, !=, NULL); + + if (allocmap->sm_space == 0 && (*freemap)->sm_space == 0) return; /* @@ -982,49 +1164,36 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) mutex_enter(&msp->ms_lock); - space_map_walk(freemap, space_map_add, freed_map); - - if (sm->sm_loaded && spa_sync_pass(spa) == 1 && smo->smo_objsize >= - 2 * sizeof (uint64_t) * avl_numnodes(&sm->sm_root)) { - /* - * The in-core space map representation is twice as compact - * as the on-disk one, so it's time to condense the latter - * by generating a pure allocmap from first principles. - * - * This metaslab is 100% allocated, - * minus the content of the in-core map (sm), - * minus what's been freed this txg (freed_map), - * minus deferred frees (ms_defermap[]), - * minus allocations from txgs in the future - * (because they haven't been committed yet). - */ - space_map_vacate(allocmap, NULL, NULL); - space_map_vacate(freemap, NULL, NULL); - - space_map_add(allocmap, allocmap->sm_start, allocmap->sm_size); - - space_map_walk(sm, space_map_remove, allocmap); - space_map_walk(freed_map, space_map_remove, allocmap); + if (sm->sm_loaded && spa_sync_pass(spa) == 1 && + metaslab_should_condense(msp)) { + metaslab_condense(msp, txg, tx); + } else { + space_map_sync(allocmap, SM_ALLOC, smo, mos, tx); + space_map_sync(*freemap, SM_FREE, smo, mos, tx); + } - for (int t = 0; t < TXG_DEFER_SIZE; t++) - space_map_walk(&msp->ms_defermap[t], - space_map_remove, allocmap); + space_map_vacate(allocmap, NULL, NULL); - for (int t = 1; t < TXG_CONCURRENT_STATES; t++) - space_map_walk(&msp->ms_allocmap[(txg + t) & TXG_MASK], - space_map_remove, allocmap); - - mutex_exit(&msp->ms_lock); - space_map_truncate(smo, mos, tx); - mutex_enter(&msp->ms_lock); + /* + * For sync pass 1, we avoid walking the entire space map and + * instead will just swap the pointers for freemap and + * freed_map. We can safely do this since the freed_map is + * guaranteed to be empty on the initial pass. + */ + if (spa_sync_pass(spa) == 1) { + ASSERT0((*freed_map)->sm_space); + ASSERT0(avl_numnodes(&(*freed_map)->sm_root)); + space_map_swap(freemap, freed_map); + } else { + space_map_vacate(*freemap, space_map_add, *freed_map); } - space_map_sync(allocmap, SM_ALLOC, smo, mos, tx); - space_map_sync(freemap, SM_FREE, smo, mos, tx); + ASSERT0(msp->ms_allocmap[txg & TXG_MASK]->sm_space); + ASSERT0(msp->ms_freemap[txg & TXG_MASK]->sm_space); mutex_exit(&msp->ms_lock); - VERIFY(0 == dmu_bonus_hold(mos, smo->smo_object, FTAG, &db)); + VERIFY0(dmu_bonus_hold(mos, smo->smo_object, FTAG, &db)); dmu_buf_will_dirty(db, tx); ASSERT3U(db->db_size, >=, sizeof (*smo)); bcopy(smo, db->db_data, sizeof (*smo)); @@ -1042,9 +1211,9 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) { space_map_obj_t *smo = &msp->ms_smo; space_map_obj_t *smosync = &msp->ms_smo_syncing; - space_map_t *sm = &msp->ms_map; - space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK]; - space_map_t *defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE]; + space_map_t *sm = msp->ms_map; + space_map_t **freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK]; + space_map_t **defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE]; metaslab_group_t *mg = msp->ms_group; vdev_t *vd = mg->mg_vd; int64_t alloc_delta, defer_delta; @@ -1055,40 +1224,57 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) /* * If this metaslab is just becoming available, initialize its - * allocmaps and freemaps and add its capacity to the vdev. + * allocmaps, freemaps, and defermap and add its capacity to the vdev. */ - if (freed_map->sm_size == 0) { + if (*freed_map == NULL) { + ASSERT(*defer_map == NULL); for (int t = 0; t < TXG_SIZE; t++) { - space_map_create(&msp->ms_allocmap[t], sm->sm_start, + msp->ms_allocmap[t] = kmem_zalloc(sizeof (space_map_t), + KM_SLEEP); + space_map_create(msp->ms_allocmap[t], sm->sm_start, sm->sm_size, sm->sm_shift, sm->sm_lock); - space_map_create(&msp->ms_freemap[t], sm->sm_start, + msp->ms_freemap[t] = kmem_zalloc(sizeof (space_map_t), + KM_SLEEP); + space_map_create(msp->ms_freemap[t], sm->sm_start, sm->sm_size, sm->sm_shift, sm->sm_lock); } - for (int t = 0; t < TXG_DEFER_SIZE; t++) - space_map_create(&msp->ms_defermap[t], sm->sm_start, + for (int t = 0; t < TXG_DEFER_SIZE; t++) { + msp->ms_defermap[t] = kmem_zalloc(sizeof (space_map_t), + KM_SLEEP); + space_map_create(msp->ms_defermap[t], sm->sm_start, sm->sm_size, sm->sm_shift, sm->sm_lock); + } + + freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK]; + defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE]; vdev_space_update(vd, 0, 0, sm->sm_size); } alloc_delta = smosync->smo_alloc - smo->smo_alloc; - defer_delta = freed_map->sm_space - defer_map->sm_space; + defer_delta = (*freed_map)->sm_space - (*defer_map)->sm_space; vdev_space_update(vd, alloc_delta + defer_delta, defer_delta, 0); - ASSERT(msp->ms_allocmap[txg & TXG_MASK].sm_space == 0); - ASSERT(msp->ms_freemap[txg & TXG_MASK].sm_space == 0); + ASSERT(msp->ms_allocmap[txg & TXG_MASK]->sm_space == 0); + ASSERT(msp->ms_freemap[txg & TXG_MASK]->sm_space == 0); /* * If there's a space_map_load() in progress, wait for it to complete * so that we have a consistent view of the in-core space map. - * Then, add defer_map (oldest deferred frees) to this map and - * transfer freed_map (this txg's frees) to defer_map. */ space_map_load_wait(sm); - space_map_vacate(defer_map, sm->sm_loaded ? space_map_free : NULL, sm); - space_map_vacate(freed_map, space_map_add, defer_map); + + /* + * Move the frees from the defer_map to this map (if it's loaded). + * Swap the freed_map and the defer_map -- this is safe to do + * because we've just emptied out the defer_map. + */ + space_map_vacate(*defer_map, sm->sm_loaded ? space_map_free : NULL, sm); + ASSERT0((*defer_map)->sm_space); + ASSERT0(avl_numnodes(&(*defer_map)->sm_root)); + space_map_swap(freed_map, defer_map); *smo = *smosync; @@ -1112,7 +1298,7 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) int evictable = 1; for (int t = 1; t < TXG_CONCURRENT_STATES; t++) - if (msp->ms_allocmap[(txg + t) & TXG_MASK].sm_space) + if (msp->ms_allocmap[(txg + t) & TXG_MASK]->sm_space) evictable = 0; if (evictable && !metaslab_debug) @@ -1137,7 +1323,7 @@ metaslab_sync_reassess(metaslab_group_t *mg) for (int m = 0; m < vd->vdev_ms_count; m++) { metaslab_t *msp = vd->vdev_ms[m]; - if (msp->ms_map.sm_start > mg->mg_bonus_area) + if (msp->ms_map->sm_start > mg->mg_bonus_area) break; mutex_enter(&msp->ms_lock); @@ -1158,7 +1344,7 @@ metaslab_distance(metaslab_t *msp, dva_t *dva) { uint64_t ms_shift = msp->ms_group->mg_vd->vdev_ms_shift; uint64_t offset = DVA_GET_OFFSET(dva) >> ms_shift; - uint64_t start = msp->ms_map.sm_start >> ms_shift; + uint64_t start = msp->ms_map->sm_start >> ms_shift; if (msp->ms_group->mg_vd->vdev_id != DVA_GET_VDEV(dva)) return (1ULL << 63); @@ -1206,6 +1392,13 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize, mutex_exit(&mg->mg_lock); return (-1ULL); } + + /* + * If the selected metaslab is condensing, skip it. + */ + if (msp->ms_map->sm_condensing) + continue; + was_active = msp->ms_weight & METASLAB_ACTIVE_MASK; if (activation_weight == METASLAB_WEIGHT_PRIMARY) break; @@ -1271,20 +1464,30 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize, continue; } - if ((offset = space_map_alloc(&msp->ms_map, asize)) != -1ULL) + /* + * If this metaslab is currently condensing then pick again as + * we can't manipulate this metaslab until it's committed + * to disk. + */ + if (msp->ms_map->sm_condensing) { + mutex_exit(&msp->ms_lock); + continue; + } + + if ((offset = space_map_alloc(msp->ms_map, asize)) != -1ULL) break; atomic_inc_64(&mg->mg_alloc_failures); - metaslab_passivate(msp, space_map_maxsize(&msp->ms_map)); + metaslab_passivate(msp, space_map_maxsize(msp->ms_map)); mutex_exit(&msp->ms_lock); } - if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0) + if (msp->ms_allocmap[txg & TXG_MASK]->sm_space == 0) vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg); - space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, asize); + space_map_add(msp->ms_allocmap[txg & TXG_MASK], offset, asize); mutex_exit(&msp->ms_lock); @@ -1516,13 +1719,13 @@ metaslab_free_dva(spa_t *spa, const dva_t *dva, uint64_t txg, boolean_t now) mutex_enter(&msp->ms_lock); if (now) { - space_map_remove(&msp->ms_allocmap[txg & TXG_MASK], + space_map_remove(msp->ms_allocmap[txg & TXG_MASK], offset, size); - space_map_free(&msp->ms_map, offset, size); + space_map_free(msp->ms_map, offset, size); } else { - if (msp->ms_freemap[txg & TXG_MASK].sm_space == 0) + if (msp->ms_freemap[txg & TXG_MASK]->sm_space == 0) vdev_dirty(vd, VDD_METASLAB, msp, txg); - space_map_add(&msp->ms_freemap[txg & TXG_MASK], offset, size); + space_map_add(msp->ms_freemap[txg & TXG_MASK], offset, size); } mutex_exit(&msp->ms_lock); @@ -1557,10 +1760,10 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg) mutex_enter(&msp->ms_lock); - if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map.sm_loaded) + if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map->sm_loaded) error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY); - if (error == 0 && !space_map_contains(&msp->ms_map, offset, size)) + if (error == 0 && !space_map_contains(msp->ms_map, offset, size)) error = ENOENT; if (error || txg == 0) { /* txg == 0 indicates dry run */ @@ -1568,12 +1771,12 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg) return (error); } - space_map_claim(&msp->ms_map, offset, size); + space_map_claim(msp->ms_map, offset, size); if (spa_writeable(spa)) { /* don't dirty if we're zdb(1M) */ - if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0) + if (msp->ms_allocmap[txg & TXG_MASK]->sm_space == 0) vdev_dirty(vd, VDD_METASLAB, msp, txg); - space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size); + space_map_add(msp->ms_allocmap[txg & TXG_MASK], offset, size); } mutex_exit(&msp->ms_lock); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c index bebb0f3..190fefe 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c @@ -114,6 +114,7 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size) int merge_before, merge_after; ASSERT(MUTEX_HELD(sm->sm_lock)); + VERIFY(!sm->sm_condensing); VERIFY(size != 0); VERIFY3U(start, >=, sm->sm_start); VERIFY3U(end, <=, sm->sm_start + sm->sm_size); @@ -198,6 +199,7 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size) int left_over, right_over; ASSERT(MUTEX_HELD(sm->sm_lock)); + VERIFY(!sm->sm_condensing); VERIFY(size != 0); VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0); VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0); @@ -267,6 +269,20 @@ space_map_contains(space_map_t *sm, uint64_t start, uint64_t size) } void +space_map_swap(space_map_t **msrc, space_map_t **mdst) +{ + space_map_t *sm; + + ASSERT(MUTEX_HELD((*msrc)->sm_lock)); + ASSERT0((*mdst)->sm_space); + ASSERT0(avl_numnodes(&(*mdst)->sm_root)); + + sm = *msrc; + *msrc = *mdst; + *mdst = sm; +} + +void space_map_vacate(space_map_t *sm, space_map_func_t *func, space_map_t *mdest) { space_seg_t *ss; @@ -447,9 +463,9 @@ space_map_sync(space_map_t *sm, uint8_t maptype, space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx) { spa_t *spa = dmu_objset_spa(os); - void *cookie = NULL; + avl_tree_t *t = &sm->sm_root; space_seg_t *ss; - uint64_t bufsize, start, size, run_len, delta, sm_space; + uint64_t bufsize, start, size, run_len, total, sm_space, nodes; uint64_t *entry, *entry_map, *entry_map_end; ASSERT(MUTEX_HELD(sm->sm_lock)); @@ -478,13 +494,14 @@ space_map_sync(space_map_t *sm, uint8_t maptype, SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(spa)) | SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx)); - delta = 0; + total = 0; + nodes = avl_numnodes(&sm->sm_root); sm_space = sm->sm_space; - while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) { + for (ss = avl_first(t); ss != NULL; ss = AVL_NEXT(t, ss)) { size = ss->ss_end - ss->ss_start; start = (ss->ss_start - sm->sm_start) >> sm->sm_shift; - delta += size; + total += size; size >>= sm->sm_shift; while (size) { @@ -506,7 +523,6 @@ space_map_sync(space_map_t *sm, uint8_t maptype, start += run_len; size -= run_len; } - kmem_cache_free(space_seg_cache, ss); } if (entry != entry_map) { @@ -522,12 +538,11 @@ space_map_sync(space_map_t *sm, uint8_t maptype, * Ensure that the space_map's accounting wasn't changed * while we were in the middle of writing it out. */ + VERIFY3U(nodes, ==, avl_numnodes(&sm->sm_root)); VERIFY3U(sm->sm_space, ==, sm_space); + VERIFY3U(sm->sm_space, ==, total); zio_buf_free(entry_map, bufsize); - - sm->sm_space -= delta; - VERIFY0(sm->sm_space); } void diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h index f1f1b38..138e14e 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h @@ -66,20 +66,38 @@ struct metaslab_group { }; /* - * Each metaslab's free space is tracked in space map object in the MOS, - * which is only updated in syncing context. Each time we sync a txg, + * Each metaslab maintains an in-core free map (ms_map) that contains the + * current list of free segments. As blocks are allocated, the allocated + * segment is removed from the ms_map and added to a per txg allocation map. + * As blocks are freed, they are added to the per txg free map. These per + * txg maps allow us to process all allocations and frees in syncing context + * where it is safe to update the on-disk space maps. + * + * Each metaslab's free space is tracked in a space map object in the MOS, + * which is only updated in syncing context. Each time we sync a txg, * we append the allocs and frees from that txg to the space map object. * When the txg is done syncing, metaslab_sync_done() updates ms_smo - * to ms_smo_syncing. Everything in ms_smo is always safe to allocate. + * to ms_smo_syncing. Everything in ms_smo is always safe to allocate. + * + * To load the in-core free map we read the space map object from disk. + * This object contains a series of alloc and free records that are + * combined to make up the list of all free segments in this metaslab. These + * segments are represented in-core by the ms_map and are stored in an + * AVL tree. + * + * As the space map objects grows (as a result of the appends) it will + * eventually become space-inefficient. When the space map object is + * zfs_condense_pct/100 times the size of the minimal on-disk representation, + * we rewrite it in its minimized form. */ struct metaslab { kmutex_t ms_lock; /* metaslab lock */ space_map_obj_t ms_smo; /* synced space map object */ space_map_obj_t ms_smo_syncing; /* syncing space map object */ - space_map_t ms_allocmap[TXG_SIZE]; /* allocated this txg */ - space_map_t ms_freemap[TXG_SIZE]; /* freed this txg */ - space_map_t ms_defermap[TXG_DEFER_SIZE]; /* deferred frees */ - space_map_t ms_map; /* in-core free space map */ + space_map_t *ms_allocmap[TXG_SIZE]; /* allocated this txg */ + space_map_t *ms_freemap[TXG_SIZE]; /* freed this txg */ + space_map_t *ms_defermap[TXG_DEFER_SIZE]; /* deferred frees */ + space_map_t *ms_map; /* in-core free space map */ int64_t ms_deferspace; /* sum of ms_defermap[] space */ uint64_t ms_weight; /* weight vs. others in group */ metaslab_group_t *ms_group; /* metaslab group */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h index 463b6bb..2da50fb 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h @@ -40,17 +40,17 @@ extern "C" { typedef struct space_map_ops space_map_ops_t; typedef struct space_map { - avl_tree_t sm_root; /* AVL tree of map segments */ + avl_tree_t sm_root; /* offset-ordered segment AVL tree */ uint64_t sm_space; /* sum of all segments in the map */ uint64_t sm_start; /* start of map */ uint64_t sm_size; /* size of map */ uint8_t sm_shift; /* unit shift */ - uint8_t sm_pad[3]; /* unused */ uint8_t sm_loaded; /* map loaded? */ uint8_t sm_loading; /* map loading? */ + uint8_t sm_condensing; /* map condensing? */ kcondvar_t sm_load_cv; /* map load completion */ space_map_ops_t *sm_ops; /* space map block picker ops vector */ - avl_tree_t *sm_pp_root; /* picker-private AVL tree */ + avl_tree_t *sm_pp_root; /* size-ordered, picker-private tree */ void *sm_ppd; /* picker-private data */ kmutex_t *sm_lock; /* pointer to lock that protects map */ } space_map_t; @@ -149,6 +149,7 @@ extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size); extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size); extern boolean_t space_map_contains(space_map_t *sm, uint64_t start, uint64_t size); +extern void space_map_swap(space_map_t **msrc, space_map_t **mdest); extern void space_map_vacate(space_map_t *sm, space_map_func_t *func, space_map_t *mdest); extern void space_map_walk(space_map_t *sm, diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c index 59b461b..be5b0bf 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c @@ -1847,6 +1847,7 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg) space_map_truncate(smo, mos, tx); space_map_sync(&smsync, SM_ALLOC, smo, mos, tx); + space_map_vacate(&smsync, NULL, NULL); space_map_destroy(&smsync); diff --git a/sys/dev/ath/ath_hal/ah.c b/sys/dev/ath/ath_hal/ah.c index d1ce7a8..551c225 100644 --- a/sys/dev/ath/ath_hal/ah.c +++ b/sys/dev/ath/ath_hal/ah.c @@ -692,6 +692,10 @@ ath_hal_getcapability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type, return pCap->hal4AddrAggrSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_EXT_CHAN_DFS: return pCap->halExtChanDfsSupport ? HAL_OK : HAL_ENOTSUPP; + case HAL_CAP_RX_STBC: + return pCap->halRxStbcSupport ? HAL_OK : HAL_ENOTSUPP; + case HAL_CAP_TX_STBC: + return pCap->halTxStbcSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_COMBINED_RADAR_RSSI: return pCap->halUseCombinedRadarRssi ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_AUTO_SLEEP: diff --git a/sys/dev/ath/ath_hal/ah.h b/sys/dev/ath/ath_hal/ah.h index 0e3d5ab..ca2e7ca 100644 --- a/sys/dev/ath/ath_hal/ah.h +++ b/sys/dev/ath/ath_hal/ah.h @@ -137,6 +137,9 @@ typedef enum { HAL_CAP_RIFS_RX_ENABLED = 53, HAL_CAP_BB_DFS_HANG = 54, + HAL_CAP_RX_STBC = 58, + HAL_CAP_TX_STBC = 59, + HAL_CAP_BT_COEX = 60, /* hardware is capable of bluetooth coexistence */ HAL_CAP_DYNAMIC_SMPS = 61, /* Dynamic MIMO Power Save hardware support */ diff --git a/sys/dev/ath/ath_rate/sample/sample.c b/sys/dev/ath/ath_rate/sample/sample.c index a7d6af6..b3f82fa 100644 --- a/sys/dev/ath/ath_rate/sample/sample.c +++ b/sys/dev/ath/ath_rate/sample/sample.c @@ -708,71 +708,6 @@ ath_rate_setupxtxdesc(struct ath_softc *sc, struct ath_node *an, s3code, sched->t3); /* series 3 */ } -/* - * Update the EWMA percentage. - * - * This is a simple hack to track an EWMA based on the current - * rate scenario. For the rate codes which failed, this will - * record a 0% against it. For the rate code which succeeded, - * EWMA will record the nbad*100/nframes percentage against it. - */ -static void -update_ewma_stats(struct ath_softc *sc, struct ath_node *an, - int frame_size, - int rix0, int tries0, - int rix1, int tries1, - int rix2, int tries2, - int rix3, int tries3, - int short_tries, int tries, int status, - int nframes, int nbad) -{ - struct sample_node *sn = ATH_NODE_SAMPLE(an); - struct sample_softc *ssc = ATH_SOFTC_SAMPLE(sc); - const int size_bin = size_to_bin(frame_size); - int tries_so_far; - int pct; - int rix = rix0; - - /* Calculate percentage based on current rate */ - if (nframes == 0) - nframes = nbad = 1; - pct = ((nframes - nbad) * 1000) / nframes; - - /* Figure out which rate index succeeded */ - tries_so_far = tries0; - - if (tries1 && tries_so_far < tries) { - tries_so_far += tries1; - rix = rix1; - /* XXX bump ewma pct */ - } - - if (tries2 && tries_so_far < tries) { - tries_so_far += tries2; - rix = rix2; - /* XXX bump ewma pct */ - } - - if (tries3 && tries_so_far < tries) { - rix = rix3; - /* XXX bump ewma pct */ - } - - /* rix is the successful rate, update EWMA for final rix */ - if (sn->stats[size_bin][rix].total_packets < - ssc->smoothing_minpackets) { - /* just average the first few packets */ - int a_pct = (sn->stats[size_bin][rix].packets_acked * 1000) / - (sn->stats[size_bin][rix].total_packets); - sn->stats[size_bin][rix].ewma_pct = a_pct; - } else { - /* use a ewma */ - sn->stats[size_bin][rix].ewma_pct = - ((sn->stats[size_bin][rix].ewma_pct * ssc->smoothing_rate) + - (pct * (100 - ssc->smoothing_rate))) / 100; - } -} - static void update_stats(struct ath_softc *sc, struct ath_node *an, int frame_size, @@ -792,6 +727,7 @@ update_stats(struct ath_softc *sc, struct ath_node *an, const int size = bin_to_size(size_bin); int tt, tries_so_far; int is_ht40 = (an->an_node.ni_chw == 40); + int pct; if (!IS_RATE_DEFINED(sn, rix0)) return; @@ -865,6 +801,27 @@ update_stats(struct ath_softc *sc, struct ath_node *an, sn->stats[size_bin][rix0].last_tx = ticks; sn->stats[size_bin][rix0].total_packets += nframes; + /* update EWMA for this rix */ + + /* Calculate percentage based on current rate */ + if (nframes == 0) + nframes = nbad = 1; + pct = ((nframes - nbad) * 1000) / nframes; + + if (sn->stats[size_bin][rix0].total_packets < + ssc->smoothing_minpackets) { + /* just average the first few packets */ + int a_pct = (sn->stats[size_bin][rix0].packets_acked * 1000) / + (sn->stats[size_bin][rix0].total_packets); + sn->stats[size_bin][rix0].ewma_pct = a_pct; + } else { + /* use a ewma */ + sn->stats[size_bin][rix0].ewma_pct = + ((sn->stats[size_bin][rix0].ewma_pct * ssc->smoothing_rate) + + (pct * (100 - ssc->smoothing_rate))) / 100; + } + + if (rix0 == sn->current_sample_rix[size_bin]) { IEEE80211_NOTE(an->an_node.ni_vap, IEEE80211_MSG_RATECTL, &an->an_node, @@ -907,6 +864,11 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an, short_tries = ts->ts_shortretry; long_tries = ts->ts_longretry + 1; + if (nframes == 0) { + device_printf(sc->sc_dev, "%s: nframes=0?\n", __func__); + return; + } + if (frame_size == 0) /* NB: should not happen */ frame_size = 1500; @@ -950,13 +912,6 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an, 0, 0, short_tries, long_tries, status, nframes, nbad); - update_ewma_stats(sc, an, frame_size, - final_rix, long_tries, - 0, 0, - 0, 0, - 0, 0, - short_tries, long_tries, status, - nframes, nbad); } else { int finalTSIdx = ts->ts_finaltsi; @@ -1008,15 +963,6 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an, short_tries, long_tries, long_tries > rc[0].tries, nframes, nbad); - update_ewma_stats(sc, an, frame_size, - rc[0].rix, rc[0].tries, - rc[1].rix, rc[1].tries, - rc[2].rix, rc[2].tries, - rc[3].rix, rc[3].tries, - short_tries, long_tries, - long_tries > rc[0].tries, - nframes, nbad); - long_tries -= rc[0].tries; } @@ -1029,14 +975,6 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an, short_tries, long_tries, status, nframes, nbad); - update_ewma_stats(sc, an, frame_size, - rc[1].rix, rc[1].tries, - rc[2].rix, rc[2].tries, - rc[3].rix, rc[3].tries, - 0, 0, - short_tries, long_tries, - status, - nframes, nbad); long_tries -= rc[1].tries; } @@ -1049,14 +987,6 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an, short_tries, long_tries, status, nframes, nbad); - update_ewma_stats(sc, an, frame_size, - rc[2].rix, rc[2].tries, - rc[3].rix, rc[3].tries, - 0, 0, - 0, 0, - short_tries, long_tries, - status, - nframes, nbad); long_tries -= rc[2].tries; } @@ -1069,14 +999,6 @@ ath_rate_tx_complete(struct ath_softc *sc, struct ath_node *an, short_tries, long_tries, status, nframes, nbad); - update_ewma_stats(sc, an, frame_size, - rc[3].rix, rc[3].tries, - 0, 0, - 0, 0, - 0, 0, - short_tries, long_tries, - status, - nframes, nbad); } } } diff --git a/sys/dev/ath/if_ath.c b/sys/dev/ath/if_ath.c index a614d6f..fd1a7c3 100644 --- a/sys/dev/ath/if_ath.c +++ b/sys/dev/ath/if_ath.c @@ -781,6 +781,28 @@ ath_attach(u_int16_t devid, struct ath_softc *sc) ic->ic_txstream = txs; ic->ic_rxstream = rxs; + /* + * Setup TX and RX STBC based on what the HAL allows and + * the currently configured chainmask set. + * Ie - don't enable STBC TX if only one chain is enabled. + * STBC RX is fine on a single RX chain; it just won't + * provide any real benefit. + */ + if (ath_hal_getcapability(ah, HAL_CAP_RX_STBC, 0, + NULL) == HAL_OK) { + sc->sc_rx_stbc = 1; + device_printf(sc->sc_dev, + "[HT] 1 stream STBC receive enabled\n"); + ic->ic_htcaps |= IEEE80211_HTCAP_RXSTBC_1STREAM; + } + if (txs > 1 && ath_hal_getcapability(ah, HAL_CAP_TX_STBC, 0, + NULL) == HAL_OK) { + sc->sc_tx_stbc = 1; + device_printf(sc->sc_dev, + "[HT] 1 stream STBC transmit enabled\n"); + ic->ic_htcaps |= IEEE80211_HTCAP_TXSTBC; + } + (void) ath_hal_getcapability(ah, HAL_CAP_RTS_AGGR_LIMIT, 1, &sc->sc_rts_aggr_limit); if (sc->sc_rts_aggr_limit != (64 * 1024)) diff --git a/sys/dev/ath/if_ath_tx_ht.c b/sys/dev/ath/if_ath_tx_ht.c index c0e72ac..d382f8f 100644 --- a/sys/dev/ath/if_ath_tx_ht.c +++ b/sys/dev/ath/if_ath_tx_ht.c @@ -536,16 +536,29 @@ ath_rateseries_setup(struct ath_softc *sc, struct ieee80211_node *ni, series[i].RateFlags |= HAL_RATESERIES_HALFGI; /* - * XXX TODO: STBC if it's possible + * Setup rate and TX power cap for this series. */ + series[i].Rate = rt->info[rc[i].rix].rateCode; + series[i].RateIndex = rc[i].rix; + series[i].tx_power_cap = 0x3f; /* XXX for now */ + /* - * XXX TODO: LDPC if it's possible + * If we have STBC TX enabled and the receiver + * can receive (at least) 1 stream STBC, AND it's + * MCS 0-7, AND we have at least two chains enabled, + * enable STBC. */ + if (ic->ic_htcaps & IEEE80211_HTCAP_TXSTBC && + ni->ni_htcap & IEEE80211_HTCAP_RXSTBC_1STREAM && + (sc->sc_cur_txchainmask > 1) && + HT_RC_2_STREAMS(series[i].Rate) == 1) { + series[i].RateFlags |= HAL_RATESERIES_STBC; + } - series[i].Rate = rt->info[rc[i].rix].rateCode; - series[i].RateIndex = rc[i].rix; - series[i].tx_power_cap = 0x3f; /* XXX for now */ + /* + * XXX TODO: LDPC if it's possible + */ /* * PktDuration doesn't include slot, ACK, RTS, etc timing - diff --git a/sys/dev/ath/if_athvar.h b/sys/dev/ath/if_athvar.h index e8fdeff..42442de 100644 --- a/sys/dev/ath/if_athvar.h +++ b/sys/dev/ath/if_athvar.h @@ -567,7 +567,9 @@ struct ath_softc { /* * Second set of flags. */ - u_int32_t sc_use_ent : 1; + u_int32_t sc_use_ent : 1, + sc_rx_stbc : 1, + sc_tx_stbc : 1; /* * Enterprise mode configuration for AR9380 and later chipsets. diff --git a/sys/dev/mfi/mfi.c b/sys/dev/mfi/mfi.c index ed759fc..e799b9d 100644 --- a/sys/dev/mfi/mfi.c +++ b/sys/dev/mfi/mfi.c @@ -108,6 +108,7 @@ static void mfi_bio_complete(struct mfi_command *); static struct mfi_command *mfi_build_ldio(struct mfi_softc *,struct bio*); static struct mfi_command *mfi_build_syspdio(struct mfi_softc *,struct bio*); static int mfi_send_frame(struct mfi_softc *, struct mfi_command *); +static int mfi_std_send_frame(struct mfi_softc *, struct mfi_command *); static int mfi_abort(struct mfi_softc *, struct mfi_command **); static int mfi_linux_ioctl_int(struct cdev *, u_long, caddr_t, int, struct thread *); static void mfi_timeout(void *); @@ -132,24 +133,30 @@ static int mfi_check_for_sscd(struct mfi_softc *sc, struct mfi_command *cm); SYSCTL_NODE(_hw, OID_AUTO, mfi, CTLFLAG_RD, 0, "MFI driver parameters"); static int mfi_event_locale = MFI_EVT_LOCALE_ALL; TUNABLE_INT("hw.mfi.event_locale", &mfi_event_locale); -SYSCTL_INT(_hw_mfi, OID_AUTO, event_locale, CTLFLAG_RW, &mfi_event_locale, - 0, "event message locale"); +SYSCTL_INT(_hw_mfi, OID_AUTO, event_locale, CTLFLAG_RWTUN, &mfi_event_locale, + 0, "event message locale"); static int mfi_event_class = MFI_EVT_CLASS_INFO; TUNABLE_INT("hw.mfi.event_class", &mfi_event_class); -SYSCTL_INT(_hw_mfi, OID_AUTO, event_class, CTLFLAG_RW, &mfi_event_class, - 0, "event message class"); +SYSCTL_INT(_hw_mfi, OID_AUTO, event_class, CTLFLAG_RWTUN, &mfi_event_class, + 0, "event message class"); static int mfi_max_cmds = 128; TUNABLE_INT("hw.mfi.max_cmds", &mfi_max_cmds); -SYSCTL_INT(_hw_mfi, OID_AUTO, max_cmds, CTLFLAG_RD, &mfi_max_cmds, - 0, "Max commands"); +SYSCTL_INT(_hw_mfi, OID_AUTO, max_cmds, CTLFLAG_RDTUN, &mfi_max_cmds, + 0, "Max commands limit (-1 = controller limit)"); static int mfi_detect_jbod_change = 1; TUNABLE_INT("hw.mfi.detect_jbod_change", &mfi_detect_jbod_change); -SYSCTL_INT(_hw_mfi, OID_AUTO, detect_jbod_change, CTLFLAG_RW, +SYSCTL_INT(_hw_mfi, OID_AUTO, detect_jbod_change, CTLFLAG_RWTUN, &mfi_detect_jbod_change, 0, "Detect a change to a JBOD"); +int mfi_polled_cmd_timeout = MFI_POLL_TIMEOUT_SECS; +TUNABLE_INT("hw.mfi.polled_cmd_timeout", &mfi_polled_cmd_timeout); +SYSCTL_INT(_hw_mfi, OID_AUTO, polled_cmd_timeout, CTLFLAG_RWTUN, + &mfi_polled_cmd_timeout, 0, + "Polled command timeout - used for firmware flash etc (in seconds)"); + /* Management interface */ static d_open_t mfi_open; static d_close_t mfi_close; @@ -361,7 +368,7 @@ mfi_attach(struct mfi_softc *sc) { uint32_t status; int error, commsz, framessz, sensesz; - int frames, unit, max_fw_sge; + int frames, unit, max_fw_sge, max_fw_cmds; uint32_t tb_mem_size = 0; if (sc == NULL) @@ -456,7 +463,14 @@ mfi_attach(struct mfi_softc *sc) * instead of compile time. */ status = sc->mfi_read_fw_status(sc); - sc->mfi_max_fw_cmds = status & MFI_FWSTATE_MAXCMD_MASK; + max_fw_cmds = status & MFI_FWSTATE_MAXCMD_MASK; + if (mfi_max_cmds > 0 && mfi_max_cmds < max_fw_cmds) { + device_printf(sc->mfi_dev, "FW MaxCmds = %d, limiting to %d\n", + max_fw_cmds, mfi_max_cmds); + sc->mfi_max_fw_cmds = mfi_max_cmds; + } else { + sc->mfi_max_fw_cmds = max_fw_cmds; + } max_fw_sge = (status & MFI_FWSTATE_MAXSGL_MASK) >> 16; sc->mfi_max_sge = min(max_fw_sge, ((MFI_MAXPHYS / PAGE_SIZE) + 1)); @@ -464,7 +478,8 @@ mfi_attach(struct mfi_softc *sc) if (sc->mfi_flags & MFI_FLAGS_TBOLT) { mfi_tbolt_init_globals(sc); - device_printf(sc->mfi_dev, "MaxCmd = %x MaxSgl = %x state = %x \n", + device_printf(sc->mfi_dev, "MaxCmd = %d, Drv MaxCmd = %d, " + "MaxSgl = %d, state = %#x\n", max_fw_cmds, sc->mfi_max_fw_cmds, sc->mfi_max_sge, status); tb_mem_size = mfi_tbolt_get_memory_requirement(sc); @@ -503,8 +518,8 @@ mfi_attach(struct mfi_softc *sc) 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->mfi_tb_init_dmat)) { - device_printf(sc->mfi_dev, "Cannot allocate init DMA tag\n"); - return (ENOMEM); + device_printf(sc->mfi_dev, "Cannot allocate init DMA tag\n"); + return (ENOMEM); } if (bus_dmamem_alloc(sc->mfi_tb_init_dmat, (void **)&sc->mfi_tb_init, BUS_DMA_NOWAIT, &sc->mfi_tb_init_dmamap)) { @@ -683,11 +698,14 @@ mfi_attach(struct mfi_softc *sc) /* ThunderBolt MFI_IOC2 INIT */ if (sc->mfi_flags & MFI_FLAGS_TBOLT) { sc->mfi_disable_intr(sc); + mtx_lock(&sc->mfi_io_lock); if ((error = mfi_tbolt_init_MFI_queue(sc)) != 0) { device_printf(sc->mfi_dev, "TB Init has failed with error %d\n",error); + mtx_unlock(&sc->mfi_io_lock); return error; } + mtx_unlock(&sc->mfi_io_lock); if ((error = mfi_tbolt_alloc_cmd(sc)) != 0) return error; @@ -723,10 +741,12 @@ mfi_attach(struct mfi_softc *sc) "hook\n"); return (EINVAL); } + mtx_lock(&sc->mfi_io_lock); if ((error = mfi_aen_setup(sc, 0), 0) != 0) { mtx_unlock(&sc->mfi_io_lock); return (error); } + mtx_unlock(&sc->mfi_io_lock); /* * Register a shutdown handler. @@ -766,7 +786,9 @@ mfi_attach(struct mfi_softc *sc) mfi_timeout, sc); if (sc->mfi_flags & MFI_FLAGS_TBOLT) { + mtx_lock(&sc->mfi_io_lock); mfi_tbolt_sync_map_info(sc); + mtx_unlock(&sc->mfi_io_lock); } return (0); @@ -776,21 +798,16 @@ static int mfi_alloc_commands(struct mfi_softc *sc) { struct mfi_command *cm; - int i, ncmds; + int i, j; /* * XXX Should we allocate all the commands up front, or allocate on * demand later like 'aac' does? */ - ncmds = MIN(mfi_max_cmds, sc->mfi_max_fw_cmds); - if (bootverbose) - device_printf(sc->mfi_dev, "Max fw cmds= %d, sizing driver " - "pool to %d\n", sc->mfi_max_fw_cmds, ncmds); - - sc->mfi_commands = malloc(sizeof(struct mfi_command) * ncmds, M_MFIBUF, - M_WAITOK | M_ZERO); + sc->mfi_commands = malloc(sizeof(sc->mfi_commands[0]) * + sc->mfi_max_fw_cmds, M_MFIBUF, M_WAITOK | M_ZERO); - for (i = 0; i < ncmds; i++) { + for (i = 0; i < sc->mfi_max_fw_cmds; i++) { cm = &sc->mfi_commands[i]; cm->cm_frame = (union mfi_frame *)((uintptr_t)sc->mfi_frames + sc->mfi_cmd_size * i); @@ -806,10 +823,20 @@ mfi_alloc_commands(struct mfi_softc *sc) mtx_lock(&sc->mfi_io_lock); mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); + } else { + device_printf(sc->mfi_dev, "Failed to allocate %d " + "command blocks, only allocated %d\n", + sc->mfi_max_fw_cmds, i - 1); + for (j = 0; j < i; j++) { + cm = &sc->mfi_commands[i]; + bus_dmamap_destroy(sc->mfi_buffer_dmat, + cm->cm_dmamap); + } + free(sc->mfi_commands, M_MFIBUF); + sc->mfi_commands = NULL; + + return (ENOMEM); } - else - break; - sc->mfi_total_cmds++; } return (0); @@ -834,6 +861,29 @@ mfi_release_command(struct mfi_command *cm) cm->cm_sg->sg32[0].addr = 0; } + /* + * Command may be on other queues e.g. busy queue depending on the + * flow of a previous call to mfi_mapcmd, so ensure its dequeued + * properly + */ + if ((cm->cm_flags & MFI_ON_MFIQ_BUSY) != 0) + mfi_remove_busy(cm); + if ((cm->cm_flags & MFI_ON_MFIQ_READY) != 0) + mfi_remove_ready(cm); + + /* We're not expecting it to be on any other queue but check */ + if ((cm->cm_flags & MFI_ON_MFIQ_MASK) != 0) { + panic("Command %p is still on another queue, flags = %#x", + cm, cm->cm_flags); + } + + /* tbolt cleanup */ + if ((cm->cm_flags & MFI_CMD_TBOLT) != 0) { + mfi_tbolt_return_cmd(cm->cm_sc, + cm->cm_sc->mfi_cmd_pool_tbolt[cm->cm_extra_frames - 1], + cm); + } + hdr_data = (uint32_t *)cm->cm_frame; hdr_data[0] = 0; /* cmd, sense_len, cmd_status, scsi_status */ hdr_data[1] = 0; /* target_id, lun_id, cdb_len, sg_count */ @@ -916,8 +966,10 @@ mfi_comms_init(struct mfi_softc *sc) uint32_t context = 0; mtx_lock(&sc->mfi_io_lock); - if ((cm = mfi_dequeue_free(sc)) == NULL) + if ((cm = mfi_dequeue_free(sc)) == NULL) { + mtx_unlock(&sc->mfi_io_lock); return (EBUSY); + } /* Zero out the MFI frame */ context = cm->cm_frame->header.context; @@ -946,15 +998,12 @@ mfi_comms_init(struct mfi_softc *sc) cm->cm_data = NULL; cm->cm_flags = MFI_CMD_POLLED; - if ((error = mfi_mapcmd(sc, cm)) != 0) { + if ((error = mfi_mapcmd(sc, cm)) != 0) device_printf(sc->mfi_dev, "failed to send init command\n"); - mtx_unlock(&sc->mfi_io_lock); - return (error); - } mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); - return (0); + return (error); } static int @@ -1005,7 +1054,7 @@ mfi_get_log_state(struct mfi_softc *sc, struct mfi_evt_log_state **log_state) struct mfi_command *cm = NULL; int error; - mtx_lock(&sc->mfi_io_lock); + mtx_assert(&sc->mfi_io_lock, MA_OWNED); error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_EVENT_GETINFO, (void **)log_state, sizeof(**log_state)); if (error) @@ -1024,7 +1073,6 @@ mfi_get_log_state(struct mfi_softc *sc, struct mfi_evt_log_state **log_state) out: if (cm) mfi_release_command(cm); - mtx_unlock(&sc->mfi_io_lock); return (error); } @@ -1037,32 +1085,32 @@ mfi_aen_setup(struct mfi_softc *sc, uint32_t seq_start) int error = 0; uint32_t seq; + mtx_assert(&sc->mfi_io_lock, MA_OWNED); + class_locale.members.reserved = 0; class_locale.members.locale = mfi_event_locale; class_locale.members.evt_class = mfi_event_class; if (seq_start == 0) { - error = mfi_get_log_state(sc, &log_state); + if ((error = mfi_get_log_state(sc, &log_state)) != 0) + goto out; sc->mfi_boot_seq_num = log_state->boot_seq_num; - if (error) { - if (log_state) - free(log_state, M_MFIBUF); - return (error); - } /* * Walk through any events that fired since the last * shutdown. */ - mfi_parse_entries(sc, log_state->shutdown_seq_num, - log_state->newest_seq_num); + if ((error = mfi_parse_entries(sc, log_state->shutdown_seq_num, + log_state->newest_seq_num)) != 0) + goto out; seq = log_state->newest_seq_num; } else seq = seq_start; - mfi_aen_register(sc, seq, class_locale.word); + error = mfi_aen_register(sc, seq, class_locale.word); +out: free(log_state, M_MFIBUF); - return 0; + return (error); } int @@ -1072,7 +1120,6 @@ mfi_wait_command(struct mfi_softc *sc, struct mfi_command *cm) mtx_assert(&sc->mfi_io_lock, MA_OWNED); cm->cm_complete = NULL; - /* * MegaCli can issue a DCMD of 0. In this case do nothing * and return 0 to it as status @@ -1100,12 +1147,13 @@ mfi_free(struct mfi_softc *sc) if (sc->mfi_cdev != NULL) destroy_dev(sc->mfi_cdev); - if (sc->mfi_total_cmds != 0) { - for (i = 0; i < sc->mfi_total_cmds; i++) { + if (sc->mfi_commands != NULL) { + for (i = 0; i < sc->mfi_max_fw_cmds; i++) { cm = &sc->mfi_commands[i]; bus_dmamap_destroy(sc->mfi_buffer_dmat, cm->cm_dmamap); } free(sc->mfi_commands, M_MFIBUF); + sc->mfi_commands = NULL; } if (sc->mfi_intr) @@ -1161,7 +1209,8 @@ mfi_free(struct mfi_softc *sc) /* End LSIP200113393 */ /* ThunderBolt INIT packet memory Free */ if (sc->mfi_tb_init_busaddr != 0) - bus_dmamap_unload(sc->mfi_tb_init_dmat, sc->mfi_tb_init_dmamap); + bus_dmamap_unload(sc->mfi_tb_init_dmat, + sc->mfi_tb_init_dmamap); if (sc->mfi_tb_init != NULL) bus_dmamem_free(sc->mfi_tb_init_dmat, sc->mfi_tb_init, sc->mfi_tb_init_dmamap); @@ -1178,16 +1227,14 @@ mfi_free(struct mfi_softc *sc) sc->mfi_tb_ioc_init_dmamap); if (sc->mfi_tb_ioc_init_dmat != NULL) bus_dma_tag_destroy(sc->mfi_tb_ioc_init_dmat); - for (int i = 0; i < sc->mfi_max_fw_cmds; i++) { - if (sc->mfi_cmd_pool_tbolt != NULL) { + if (sc->mfi_cmd_pool_tbolt != NULL) { + for (int i = 0; i < sc->mfi_max_fw_cmds; i++) { if (sc->mfi_cmd_pool_tbolt[i] != NULL) { free(sc->mfi_cmd_pool_tbolt[i], M_MFIBUF); sc->mfi_cmd_pool_tbolt[i] = NULL; } } - } - if (sc->mfi_cmd_pool_tbolt != NULL) { free(sc->mfi_cmd_pool_tbolt, M_MFIBUF); sc->mfi_cmd_pool_tbolt = NULL; } @@ -1252,16 +1299,14 @@ restart: cm->cm_error = 0; mfi_complete(sc, cm); } - if (++ci == (sc->mfi_max_fw_cmds + 1)) { + if (++ci == (sc->mfi_max_fw_cmds + 1)) ci = 0; - } } sc->mfi_comms->hw_ci = ci; /* Give defered I/O a chance to run */ - if (sc->mfi_flags & MFI_FLAGS_QFRZN) - sc->mfi_flags &= ~MFI_FLAGS_QFRZN; + sc->mfi_flags &= ~MFI_FLAGS_QFRZN; mfi_startio(sc); mtx_unlock(&sc->mfi_io_lock); @@ -1284,15 +1329,15 @@ mfi_shutdown(struct mfi_softc *sc) int error; - if (sc->mfi_aen_cm) + if (sc->mfi_aen_cm != NULL) { sc->cm_aen_abort = 1; - if (sc->mfi_aen_cm != NULL) mfi_abort(sc, &sc->mfi_aen_cm); + } - if (sc->mfi_map_sync_cm) + if (sc->mfi_map_sync_cm != NULL) { sc->cm_map_abort = 1; - if (sc->mfi_map_sync_cm != NULL) mfi_abort(sc, &sc->mfi_map_sync_cm); + } mtx_lock(&sc->mfi_io_lock); error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_SHUTDOWN, NULL, 0); @@ -1306,9 +1351,8 @@ mfi_shutdown(struct mfi_softc *sc) cm->cm_flags = MFI_CMD_POLLED; cm->cm_data = NULL; - if ((error = mfi_mapcmd(sc, cm)) != 0) { + if ((error = mfi_mapcmd(sc, cm)) != 0) device_printf(sc->mfi_dev, "Failed to shutdown controller\n"); - } mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); @@ -1374,8 +1418,10 @@ mfi_syspdprobe(struct mfi_softc *sc) TAILQ_FOREACH_SAFE(syspd, &sc->mfi_syspd_tqh, pd_link, tmp) { found = 0; for (i = 0; i < pdlist->count; i++) { - if (syspd->pd_id == pdlist->addr[i].device_id) + if (syspd->pd_id == pdlist->addr[i].device_id) { found = 1; + break; + } } if (found == 0) { printf("DELETE\n"); @@ -1628,6 +1674,8 @@ mfi_aen_register(struct mfi_softc *sc, int seq, int locale) struct mfi_evt_detail *ed = NULL; int error = 0; + mtx_assert(&sc->mfi_io_lock, MA_OWNED); + current_aen.word = locale; if (sc->mfi_aen_cm != NULL) { prior_aen.word = @@ -1646,13 +1694,10 @@ mfi_aen_register(struct mfi_softc *sc, int seq, int locale) } } - mtx_lock(&sc->mfi_io_lock); error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_EVENT_WAIT, (void **)&ed, sizeof(*ed)); - mtx_unlock(&sc->mfi_io_lock); - if (error) { + if (error) goto out; - } dcmd = &cm->cm_frame->dcmd; ((uint32_t *)&dcmd->mbox)[0] = seq; @@ -1663,10 +1708,8 @@ mfi_aen_register(struct mfi_softc *sc, int seq, int locale) sc->last_seq_num = seq; sc->mfi_aen_cm = cm; - mtx_lock(&sc->mfi_io_lock); mfi_enqueue_ready(cm); mfi_startio(sc); - mtx_unlock(&sc->mfi_io_lock); out: return (error); @@ -1684,11 +1727,11 @@ mfi_aen_complete(struct mfi_command *cm) sc = cm->cm_sc; mtx_assert(&sc->mfi_io_lock, MA_OWNED); - hdr = &cm->cm_frame->header; - if (sc->mfi_aen_cm == NULL) return; + hdr = &cm->cm_frame->header; + if (sc->cm_aen_abort || hdr->cmd_status == MFI_STAT_INVALID_STATUS) { sc->cm_aen_abort = 0; @@ -1714,16 +1757,13 @@ mfi_aen_complete(struct mfi_command *cm) } free(cm->cm_data, M_MFIBUF); - sc->mfi_aen_cm = NULL; wakeup(&sc->mfi_aen_cm); + sc->mfi_aen_cm = NULL; mfi_release_command(cm); /* set it up again so the driver can catch more events */ - if (!aborted) { - mtx_unlock(&sc->mfi_io_lock); + if (!aborted) mfi_aen_setup(sc, seq); - mtx_lock(&sc->mfi_io_lock); - } } #define MAX_EVENTS 15 @@ -1737,6 +1777,8 @@ mfi_parse_entries(struct mfi_softc *sc, int start_seq, int stop_seq) union mfi_evt class_locale; int error, i, seq, size; + mtx_assert(&sc->mfi_io_lock, MA_OWNED); + class_locale.members.reserved = 0; class_locale.members.locale = mfi_event_locale; class_locale.members.evt_class = mfi_event_class; @@ -1748,13 +1790,10 @@ mfi_parse_entries(struct mfi_softc *sc, int start_seq, int stop_seq) return (ENOMEM); for (seq = start_seq;;) { - mtx_lock(&sc->mfi_io_lock); if ((cm = mfi_dequeue_free(sc)) == NULL) { free(el, M_MFIBUF); - mtx_unlock(&sc->mfi_io_lock); return (EBUSY); } - mtx_unlock(&sc->mfi_io_lock); dcmd = &cm->cm_frame->dcmd; bzero(dcmd->mbox, MFI_MBOX_SIZE); @@ -1770,38 +1809,30 @@ mfi_parse_entries(struct mfi_softc *sc, int start_seq, int stop_seq) cm->cm_data = el; cm->cm_len = size; - mtx_lock(&sc->mfi_io_lock); if ((error = mfi_mapcmd(sc, cm)) != 0) { device_printf(sc->mfi_dev, "Failed to get controller entries\n"); mfi_release_command(cm); - mtx_unlock(&sc->mfi_io_lock); break; } - mtx_unlock(&sc->mfi_io_lock); bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); if (dcmd->header.cmd_status == MFI_STAT_NOT_FOUND) { - mtx_lock(&sc->mfi_io_lock); mfi_release_command(cm); - mtx_unlock(&sc->mfi_io_lock); break; } if (dcmd->header.cmd_status != MFI_STAT_OK) { device_printf(sc->mfi_dev, "Error %d fetching controller entries\n", dcmd->header.cmd_status); - mtx_lock(&sc->mfi_io_lock); mfi_release_command(cm); - mtx_unlock(&sc->mfi_io_lock); + error = EIO; break; } - mtx_lock(&sc->mfi_io_lock); mfi_release_command(cm); - mtx_unlock(&sc->mfi_io_lock); for (i = 0; i < el->count; i++) { /* @@ -1817,15 +1848,13 @@ mfi_parse_entries(struct mfi_softc *sc, int start_seq, int stop_seq) else if (el->event[i].seq < start_seq) break; } - mtx_lock(&sc->mfi_io_lock); mfi_queue_evt(sc, &el->event[i]); - mtx_unlock(&sc->mfi_io_lock); } seq = el->event[el->count - 1].seq + 1; } free(el, M_MFIBUF); - return (0); + return (error); } static int @@ -1942,11 +1971,12 @@ static int mfi_add_sys_pd(struct mfi_softc *sc, int id) dcmd->mbox[0]=id; dcmd->header.scsi_status = 0; dcmd->header.pad0 = 0; - if (mfi_mapcmd(sc, cm) != 0) { + if ((error = mfi_mapcmd(sc, cm)) != 0) { device_printf(sc->mfi_dev, "Failed to get physical drive info %d\n", id); free(pd_info, M_MFIBUF); - return (0); + mfi_release_command(cm); + return (error); } bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTREAD); @@ -2096,6 +2126,8 @@ mfi_build_syspdio(struct mfi_softc *sc, struct bio *bio) int flags = 0, blkcount = 0, readop; uint8_t cdb_len; + mtx_assert(&sc->mfi_io_lock, MA_OWNED); + if ((cm = mfi_dequeue_free(sc)) == NULL) return (NULL); @@ -2142,6 +2174,7 @@ mfi_build_syspdio(struct mfi_softc *sc, struct bio *bio) cm->cm_sg = &pass->sgl; cm->cm_total_frame_size = MFI_PASS_FRAME_SIZE; cm->cm_flags = flags; + return (cm); } @@ -2154,6 +2187,8 @@ mfi_build_ldio(struct mfi_softc *sc, struct bio *bio) uint32_t blkcount; uint32_t context = 0; + mtx_assert(&sc->mfi_io_lock, MA_OWNED); + if ((cm = mfi_dequeue_free(sc)) == NULL) return (NULL); @@ -2195,6 +2230,7 @@ mfi_build_ldio(struct mfi_softc *sc, struct bio *bio) cm->cm_sg = &io->sgl; cm->cm_total_frame_size = MFI_IO_FRAME_SIZE; cm->cm_flags = flags; + return (cm); } @@ -2212,11 +2248,14 @@ mfi_bio_complete(struct mfi_command *cm) if ((hdr->cmd_status != MFI_STAT_OK) || (hdr->scsi_status != 0)) { bio->bio_flags |= BIO_ERROR; bio->bio_error = EIO; - device_printf(sc->mfi_dev, "I/O error, status= %d " - "scsi_status= %d\n", hdr->cmd_status, hdr->scsi_status); + device_printf(sc->mfi_dev, "I/O error, cmd=%p, status=%#x, " + "scsi_status=%#x\n", cm, hdr->cmd_status, hdr->scsi_status); mfi_print_sense(cm->cm_sc, cm->cm_sense); } else if (cm->cm_error != 0) { bio->bio_flags |= BIO_ERROR; + bio->bio_error = cm->cm_error; + device_printf(sc->mfi_dev, "I/O error, cmd=%p, error=%#x\n", + cm, cm->cm_error); } mfi_release_command(cm); @@ -2252,6 +2291,7 @@ mfi_startio(struct mfi_softc *sc) /* Send the command to the controller */ if (mfi_mapcmd(sc, cm) != 0) { + device_printf(sc->mfi_dev, "Failed to startio\n"); mfi_requeue_ready(cm); break; } @@ -2280,10 +2320,7 @@ mfi_mapcmd(struct mfi_softc *sc, struct mfi_command *cm) return (0); } } else { - if (sc->MFA_enabled) - error = mfi_tbolt_send_frame(sc, cm); - else - error = mfi_send_frame(sc, cm); + error = mfi_send_frame(sc, cm); } return (error); @@ -2297,18 +2334,28 @@ mfi_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) union mfi_sgl *sgl; struct mfi_softc *sc; int i, j, first, dir; - int sge_size; + int sge_size, locked; cm = (struct mfi_command *)arg; sc = cm->cm_sc; hdr = &cm->cm_frame->header; sgl = cm->cm_sg; + /* + * We need to check if we have the lock as this is async + * callback so even though our caller mfi_mapcmd asserts + * it has the lock, there is no garantee that hasn't been + * dropped if bus_dmamap_load returned prior to our + * completion. + */ + if ((locked = mtx_owned(&sc->mfi_io_lock)) == 0) + mtx_lock(&sc->mfi_io_lock); + if (error) { printf("error %d in callback\n", error); cm->cm_error = error; mfi_complete(sc, cm); - return; + goto out; } /* Use IEEE sgl only for IO's on a SKINNY controller * For other commands on a SKINNY controller use either @@ -2380,10 +2427,17 @@ mfi_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) cm->cm_total_frame_size += (sc->mfi_sge_size * nsegs); cm->cm_extra_frames = (cm->cm_total_frame_size - 1) / MFI_FRAME_SIZE; - if (sc->MFA_enabled) - mfi_tbolt_send_frame(sc, cm); - else - mfi_send_frame(sc, cm); + if ((error = mfi_send_frame(sc, cm)) != 0) { + printf("error %d in callback from mfi_send_frame\n", error); + cm->cm_error = error; + mfi_complete(sc, cm); + goto out; + } + +out: + /* leave the lock in the state we found it */ + if (locked == 0) + mtx_unlock(&sc->mfi_io_lock); return; } @@ -2391,8 +2445,26 @@ mfi_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) static int mfi_send_frame(struct mfi_softc *sc, struct mfi_command *cm) { + int error; + + mtx_assert(&sc->mfi_io_lock, MA_OWNED); + + if (sc->MFA_enabled) + error = mfi_tbolt_send_frame(sc, cm); + else + error = mfi_std_send_frame(sc, cm); + + if (error != 0 && (cm->cm_flags & MFI_ON_MFIQ_BUSY) != 0) + mfi_remove_busy(cm); + + return (error); +} + +static int +mfi_std_send_frame(struct mfi_softc *sc, struct mfi_command *cm) +{ struct mfi_frame_header *hdr; - int tm = MFI_POLL_TIMEOUT_SECS * 1000; + int tm = mfi_polled_cmd_timeout * 1000; hdr = &cm->cm_frame->header; @@ -2446,6 +2518,7 @@ void mfi_complete(struct mfi_softc *sc, struct mfi_command *cm) { int dir; + mtx_assert(&sc->mfi_io_lock, MA_OWNED); if ((cm->cm_flags & MFI_CMD_MAPPED) != 0) { dir = 0; @@ -2473,11 +2546,12 @@ mfi_abort(struct mfi_softc *sc, struct mfi_command **cm_abort) { struct mfi_command *cm; struct mfi_abort_frame *abort; - int i = 0; + int i = 0, error; uint32_t context = 0; mtx_lock(&sc->mfi_io_lock); if ((cm = mfi_dequeue_free(sc)) == NULL) { + mtx_unlock(&sc->mfi_io_lock); return (EBUSY); } @@ -2497,7 +2571,8 @@ mfi_abort(struct mfi_softc *sc, struct mfi_command **cm_abort) cm->cm_data = NULL; cm->cm_flags = MFI_CMD_POLLED; - mfi_mapcmd(sc, cm); + if ((error = mfi_mapcmd(sc, cm)) != 0) + device_printf(sc->mfi_dev, "failed to abort command\n"); mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); @@ -2513,7 +2588,7 @@ mfi_abort(struct mfi_softc *sc, struct mfi_command **cm_abort) mtx_unlock(&sc->mfi_io_lock); } - return (0); + return (error); } int @@ -2551,7 +2626,8 @@ mfi_dump_blocks(struct mfi_softc *sc, int id, uint64_t lba, void *virt, cm->cm_total_frame_size = MFI_IO_FRAME_SIZE; cm->cm_flags = MFI_CMD_POLLED | MFI_CMD_DATAOUT; - error = mfi_mapcmd(sc, cm); + if ((error = mfi_mapcmd(sc, cm)) != 0) + device_printf(sc->mfi_dev, "failed dump blocks\n"); bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); @@ -2594,7 +2670,8 @@ mfi_dump_syspd_blocks(struct mfi_softc *sc, int id, uint64_t lba, void *virt, cm->cm_total_frame_size = MFI_PASS_FRAME_SIZE; cm->cm_flags = MFI_CMD_POLLED | MFI_CMD_DATAOUT | MFI_CMD_SCSI; - error = mfi_mapcmd(sc, cm); + if ((error = mfi_mapcmd(sc, cm)) != 0) + device_printf(sc->mfi_dev, "failed dump blocks\n"); bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); @@ -3308,8 +3385,10 @@ out: } case MFI_SET_AEN: aen = (struct mfi_ioc_aen *)arg; + mtx_lock(&sc->mfi_io_lock); error = mfi_aen_register(sc, aen->aen_seq_num, aen->aen_class_locale); + mtx_unlock(&sc->mfi_io_lock); break; case MFI_LINUX_CMD_2: /* Firmware Linux ioctl shim */ @@ -3638,7 +3717,7 @@ mfi_dump_all(void) deadline = time_uptime - MFI_CMD_TIMEOUT; mtx_lock(&sc->mfi_io_lock); TAILQ_FOREACH(cm, &sc->mfi_busy, cm_link) { - if (cm->cm_timestamp < deadline) { + if (cm->cm_timestamp <= deadline) { device_printf(sc->mfi_dev, "COMMAND %p TIMEOUT AFTER %d SECONDS\n", cm, (int)(time_uptime - cm->cm_timestamp)); @@ -3649,7 +3728,7 @@ mfi_dump_all(void) #if 0 if (timedout) - MFI_DUMP_CMDS(SC); + MFI_DUMP_CMDS(sc); #endif mtx_unlock(&sc->mfi_io_lock); @@ -3662,7 +3741,7 @@ static void mfi_timeout(void *data) { struct mfi_softc *sc = (struct mfi_softc *)data; - struct mfi_command *cm; + struct mfi_command *cm, *tmp; time_t deadline; int timedout = 0; @@ -3674,10 +3753,10 @@ mfi_timeout(void *data) } } mtx_lock(&sc->mfi_io_lock); - TAILQ_FOREACH(cm, &sc->mfi_busy, cm_link) { + TAILQ_FOREACH_SAFE(cm, &sc->mfi_busy, cm_link, tmp) { if (sc->mfi_aen_cm == cm || sc->mfi_map_sync_cm == cm) continue; - if (cm->cm_timestamp < deadline) { + if (cm->cm_timestamp <= deadline) { if (sc->adpreset != 0 && sc->issuepend_done == 0) { cm->cm_timestamp = time_uptime; } else { @@ -3687,6 +3766,13 @@ mfi_timeout(void *data) ); MFI_PRINT_CMD(cm); MFI_VALIDATE_CMD(sc, cm); + /* + * Fail the command instead of leaving it on + * the queue where it could remain stuck forever + */ + mfi_remove_busy(cm); + cm->cm_error = ETIMEDOUT; + mfi_complete(sc, cm); timedout++; } } @@ -3694,7 +3780,7 @@ mfi_timeout(void *data) #if 0 if (timedout) - MFI_DUMP_CMDS(SC); + MFI_DUMP_CMDS(sc); #endif mtx_unlock(&sc->mfi_io_lock); diff --git a/sys/dev/mfi/mfi_cam.c b/sys/dev/mfi/mfi_cam.c index 325b064..0ea2326 100644 --- a/sys/dev/mfi/mfi_cam.c +++ b/sys/dev/mfi/mfi_cam.c @@ -145,6 +145,7 @@ mfip_attach(device_t dev) MFI_SCSI_MAX_CMDS, sc->devq); if (sc->sim == NULL) { cam_simq_free(sc->devq); + sc->devq = NULL; device_printf(dev, "CAM SIM attach failed\n"); return (EINVAL); } @@ -155,7 +156,9 @@ mfip_attach(device_t dev) if (xpt_bus_register(sc->sim, dev, 0) != 0) { device_printf(dev, "XPT bus registration failed\n"); cam_sim_free(sc->sim, FALSE); + sc->sim = NULL; cam_simq_free(sc->devq); + sc->devq = NULL; mtx_unlock(&mfisc->mfi_io_lock); return (EINVAL); } @@ -187,11 +190,14 @@ mfip_detach(device_t dev) mtx_lock(&sc->mfi_sc->mfi_io_lock); xpt_bus_deregister(cam_sim_path(sc->sim)); cam_sim_free(sc->sim, FALSE); + sc->sim = NULL; mtx_unlock(&sc->mfi_sc->mfi_io_lock); } - if (sc->devq != NULL) + if (sc->devq != NULL) { cam_simq_free(sc->devq); + sc->devq = NULL; + } return (0); } diff --git a/sys/dev/mfi/mfi_debug.c b/sys/dev/mfi/mfi_debug.c index 2e66e19..4aec4f7 100644 --- a/sys/dev/mfi/mfi_debug.c +++ b/sys/dev/mfi/mfi_debug.c @@ -57,14 +57,7 @@ __FBSDID("$FreeBSD$"); static void mfi_print_frame_flags(device_t dev, uint32_t flags) { - device_printf(dev, "flags=%b\n", flags, - "\20" - "\1NOPOST" - "\2SGL64" - "\3SENSE64" - "\4WRITE" - "\5READ" - "\6IEEESGL"); + device_printf(dev, "flags=%b\n", flags, MFI_FRAME_FMT); } static void @@ -205,16 +198,7 @@ mfi_print_cmd(struct mfi_command *cm) device_printf(dev, "cm=%p index=%d total_frame_size=%d " "extra_frames=%d\n", cm, cm->cm_index, cm->cm_total_frame_size, cm->cm_extra_frames); - device_printf(dev, "flags=%b\n", cm->cm_flags, - "\20" - "\1MAPPED" - "\2DATAIN" - "\3DATAOUT" - "\4COMPLETED" - "\5POLLED" - "\6Q_FREE" - "\7Q_READY" - "\10Q_BUSY"); + device_printf(dev, "flags=%b\n", cm->cm_flags, MFI_CMD_FLAGS_FMT); switch (cm->cm_frame->header.cmd) { case MFI_CMD_DCMD: @@ -237,7 +221,7 @@ mfi_dump_cmds(struct mfi_softc *sc) { int i; - for (i = 0; i < sc->mfi_total_cmds; i++) + for (i = 0; i < sc->mfi_max_fw_cmds; i++) mfi_print_generic_frame(sc, &sc->mfi_commands[i]); } diff --git a/sys/dev/mfi/mfi_tbolt.c b/sys/dev/mfi/mfi_tbolt.c index cce63c0..9d29ea0 100644 --- a/sys/dev/mfi/mfi_tbolt.c +++ b/sys/dev/mfi/mfi_tbolt.c @@ -55,14 +55,12 @@ __FBSDID("$FreeBSD$"); #include <dev/mfi/mfi_ioctl.h> #include <dev/mfi/mfivar.h> -struct mfi_cmd_tbolt *mfi_tbolt_get_cmd(struct mfi_softc *sc); +struct mfi_cmd_tbolt *mfi_tbolt_get_cmd(struct mfi_softc *sc, struct mfi_command *); union mfi_mpi2_request_descriptor * mfi_tbolt_get_request_descriptor(struct mfi_softc *sc, uint16_t index); void mfi_tbolt_complete_cmd(struct mfi_softc *sc); int mfi_tbolt_build_io(struct mfi_softc *sc, struct mfi_command *mfi_cmd, struct mfi_cmd_tbolt *cmd); -static inline void mfi_tbolt_return_cmd(struct mfi_softc *sc, - struct mfi_cmd_tbolt *cmd); union mfi_mpi2_request_descriptor *mfi_tbolt_build_mpt_cmd(struct mfi_softc *sc, struct mfi_command *cmd); uint8_t @@ -84,6 +82,15 @@ static void mfi_queue_map_sync(struct mfi_softc *sc); #define MFI_FUSION_ENABLE_INTERRUPT_MASK (0x00000008) + +extern int mfi_polled_cmd_timeout; +static int mfi_fw_reset_test = 0; +#ifdef MFI_DEBUG +TUNABLE_INT("hw.mfi.fw_reset_test", &mfi_fw_reset_test); +SYSCTL_INT(_hw_mfi, OID_AUTO, fw_reset_test, CTLFLAG_RWTUN, &mfi_fw_reset_test, + 0, "Force a firmware reset condition"); +#endif + void mfi_tbolt_enable_intr_ppc(struct mfi_softc *sc) { @@ -162,14 +169,14 @@ mfi_tbolt_adp_reset(struct mfi_softc *sc) while (!( HostDiag & DIAG_WRITE_ENABLE)) { for (i = 0; i < 1000; i++); HostDiag = (uint32_t)MFI_READ4(sc, MFI_HDR); - device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: retry time=%x, " - "hostdiag=%x\n", retry, HostDiag); + device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: retry time=%d, " + "hostdiag=%#x\n", retry, HostDiag); if (retry++ >= 100) return 1; } - device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: HostDiag=%x\n", HostDiag); + device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: HostDiag=%#x\n", HostDiag); MFI_WRITE4(sc, MFI_HDR, (HostDiag | DIAG_RESET_ADAPTER)); @@ -181,8 +188,8 @@ mfi_tbolt_adp_reset(struct mfi_softc *sc) while (HostDiag & DIAG_RESET_ADAPTER) { for (i = 0; i < 1000; i++) ; HostDiag = (uint32_t)MFI_READ4(sc, MFI_RSR); - device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: retry time=%x, " - "hostdiag=%x\n", retry, HostDiag); + device_printf(sc->mfi_dev, "ADP_RESET_TBOLT: retry time=%d, " + "hostdiag=%#x\n", retry, HostDiag); if (retry++ >= 1000) return 1; @@ -311,6 +318,8 @@ mfi_tbolt_init_desc_pool(struct mfi_softc *sc, uint8_t* mem_location, sc->sg_frame_busaddr = sc->reply_frame_busaddr + offset; /* initialize the last_reply_idx to 0 */ sc->last_reply_idx = 0; + MFI_WRITE4(sc, MFI_RFPI, sc->mfi_max_fw_cmds - 1); + MFI_WRITE4(sc, MFI_RPI, sc->last_reply_idx); offset = (sc->sg_frame_busaddr + (MEGASAS_MAX_SZ_CHAIN_FRAME * sc->mfi_max_fw_cmds)) - sc->mfi_tb_busaddr; if (offset > tbolt_contg_length) @@ -327,30 +336,35 @@ int mfi_tbolt_init_MFI_queue(struct mfi_softc *sc) { struct MPI2_IOC_INIT_REQUEST *mpi2IocInit; - struct mfi_init_frame *mfi_init; + struct mfi_init_frame *mfi_init; uintptr_t offset = 0; bus_addr_t phyAddress; MFI_ADDRESS *mfiAddressTemp; - struct mfi_command *cm; + struct mfi_command *cm, cmd_tmp; int error; - mpi2IocInit = (struct MPI2_IOC_INIT_REQUEST *)sc->mfi_tb_ioc_init_desc; + mtx_assert(&sc->mfi_io_lock, MA_OWNED); + /* Check if initialization is already completed */ if (sc->MFA_enabled) { + device_printf(sc->mfi_dev, "tbolt_init already initialised!\n"); return 1; } - mtx_lock(&sc->mfi_io_lock); if ((cm = mfi_dequeue_free(sc)) == NULL) { - mtx_unlock(&sc->mfi_io_lock); + device_printf(sc->mfi_dev, "tbolt_init failed to get command " + " entry!\n"); return (EBUSY); } + + cmd_tmp.cm_frame = cm->cm_frame; + cmd_tmp.cm_frame_busaddr = cm->cm_frame_busaddr; + cmd_tmp.cm_dmamap = cm->cm_dmamap; + cm->cm_frame = (union mfi_frame *)((uintptr_t)sc->mfi_tb_init); cm->cm_frame_busaddr = sc->mfi_tb_init_busaddr; cm->cm_dmamap = sc->mfi_tb_init_dmamap; cm->cm_frame->header.context = 0; - cm->cm_sc = sc; - cm->cm_index = 0; /* * Abuse the SG list area of the frame to hold the init_qinfo @@ -358,6 +372,7 @@ mfi_tbolt_init_MFI_queue(struct mfi_softc *sc) */ mfi_init = &cm->cm_frame->init; + mpi2IocInit = (struct MPI2_IOC_INIT_REQUEST *)sc->mfi_tb_ioc_init_desc; bzero(mpi2IocInit, sizeof(struct MPI2_IOC_INIT_REQUEST)); mpi2IocInit->Function = MPI2_FUNCTION_IOC_INIT; mpi2IocInit->WhoInit = MPI2_WHOINIT_HOST_DRIVER; @@ -411,23 +426,25 @@ mfi_tbolt_init_MFI_queue(struct mfi_softc *sc) if ((error = mfi_mapcmd(sc, cm)) != 0) { device_printf(sc->mfi_dev, "failed to send IOC init2 " "command %d at %lx\n", error, (long)cm->cm_frame_busaddr); - mfi_release_command(cm); - mtx_unlock(&sc->mfi_io_lock); - return (error); + goto out; } - mfi_release_command(cm); - mtx_unlock(&sc->mfi_io_lock); - if (mfi_init->header.cmd_status == 0) { + if (mfi_init->header.cmd_status == MFI_STAT_OK) { sc->MFA_enabled = 1; - } - else { - device_printf(sc->mfi_dev, "Init command Failed %x\n", + } else { + device_printf(sc->mfi_dev, "Init command Failed %#x\n", mfi_init->header.cmd_status); - return 1; + error = mfi_init->header.cmd_status; + goto out; } - return 0; +out: + cm->cm_frame = cmd_tmp.cm_frame; + cm->cm_frame_busaddr = cmd_tmp.cm_frame_busaddr; + cm->cm_dmamap = cmd_tmp.cm_dmamap; + mfi_release_command(cm); + + return (error); } @@ -447,13 +464,21 @@ mfi_tbolt_alloc_cmd(struct mfi_softc *sc) sc->request_desc_pool = malloc(sizeof( union mfi_mpi2_request_descriptor) * sc->mfi_max_fw_cmds, M_MFIBUF, M_NOWAIT|M_ZERO); + + if (sc->request_desc_pool == NULL) { + device_printf(sc->mfi_dev, "Could not alloc " + "memory for request_desc_pool\n"); + return (ENOMEM); + } + sc->mfi_cmd_pool_tbolt = malloc(sizeof(struct mfi_cmd_tbolt*) * sc->mfi_max_fw_cmds, M_MFIBUF, M_NOWAIT|M_ZERO); - if (!sc->mfi_cmd_pool_tbolt) { - device_printf(sc->mfi_dev, "out of memory. Could not alloc " - "memory for cmd_list_fusion\n"); - return 1; + if (sc->mfi_cmd_pool_tbolt == NULL) { + free(sc->request_desc_pool, M_MFIBUF); + device_printf(sc->mfi_dev, "Could not alloc " + "memory for cmd_pool_tbolt\n"); + return (ENOMEM); } for (i = 0; i < sc->mfi_max_fw_cmds; i++) { @@ -461,20 +486,24 @@ mfi_tbolt_alloc_cmd(struct mfi_softc *sc) struct mfi_cmd_tbolt),M_MFIBUF, M_NOWAIT|M_ZERO); if (!sc->mfi_cmd_pool_tbolt[i]) { - device_printf(sc->mfi_dev, "Could not alloc cmd list " - "fusion\n"); + device_printf(sc->mfi_dev, "Could not alloc " + "cmd_pool_tbolt entry\n"); for (j = 0; j < i; j++) free(sc->mfi_cmd_pool_tbolt[j], M_MFIBUF); + free(sc->request_desc_pool, M_MFIBUF); + sc->request_desc_pool = NULL; free(sc->mfi_cmd_pool_tbolt, M_MFIBUF); sc->mfi_cmd_pool_tbolt = NULL; + + return (ENOMEM); } } /* * The first 256 bytes (SMID 0) is not used. Don't add to the cmd - *list + * list */ io_req_base = sc->request_message_pool_align + MEGASAS_THUNDERBOLT_NEW_MSG_SIZE; @@ -520,7 +549,8 @@ mfi_tbolt_reset(struct mfi_softc *sc) if (sc->mfi_flags & MFI_FLAGS_TBOLT) { fw_state = sc->mfi_read_fw_status(sc); - if ((fw_state & MFI_FWSTATE_FAULT) == MFI_FWSTATE_FAULT) { + if ((fw_state & MFI_FWSTATE_FAULT) == MFI_FWSTATE_FAULT || + mfi_fw_reset_test) { if ((sc->disableOnlineCtrlReset == 0) && (sc->adpreset == 0)) { device_printf(sc->mfi_dev, "Adapter RESET " @@ -554,8 +584,7 @@ mfi_intr_tbolt(void *arg) return; mtx_lock(&sc->mfi_io_lock); mfi_tbolt_complete_cmd(sc); - if (sc->mfi_flags & MFI_FLAGS_QFRZN) - sc->mfi_flags &= ~MFI_FLAGS_QFRZN; + sc->mfi_flags &= ~MFI_FLAGS_QFRZN; mfi_startio(sc); mtx_unlock(&sc->mfi_io_lock); return; @@ -573,58 +602,63 @@ map_tbolt_cmd_status(struct mfi_command *mfi_cmd, uint8_t status, uint8_t ext_status) { switch (status) { - case MFI_STAT_OK: - mfi_cmd->cm_frame->header.cmd_status = MFI_STAT_OK; - mfi_cmd->cm_frame->dcmd.header.cmd_status = MFI_STAT_OK; - mfi_cmd->cm_error = MFI_STAT_OK; - break; - - case MFI_STAT_SCSI_IO_FAILED: - case MFI_STAT_LD_INIT_IN_PROGRESS: - mfi_cmd->cm_frame->header.cmd_status = status; - mfi_cmd->cm_frame->header.scsi_status = ext_status; - mfi_cmd->cm_frame->dcmd.header.cmd_status = status; - mfi_cmd->cm_frame->dcmd.header.scsi_status - = ext_status; - break; - - case MFI_STAT_SCSI_DONE_WITH_ERROR: - mfi_cmd->cm_frame->header.cmd_status = ext_status; - mfi_cmd->cm_frame->dcmd.header.cmd_status = ext_status; - break; - - case MFI_STAT_LD_OFFLINE: - case MFI_STAT_DEVICE_NOT_FOUND: - mfi_cmd->cm_frame->header.cmd_status = status; - mfi_cmd->cm_frame->dcmd.header.cmd_status = status; - break; - - default: - mfi_cmd->cm_frame->header.cmd_status = status; - mfi_cmd->cm_frame->dcmd.header.cmd_status = status; - break; - } + case MFI_STAT_OK: + mfi_cmd->cm_frame->header.cmd_status = MFI_STAT_OK; + mfi_cmd->cm_frame->dcmd.header.cmd_status = MFI_STAT_OK; + mfi_cmd->cm_error = MFI_STAT_OK; + break; + + case MFI_STAT_SCSI_IO_FAILED: + case MFI_STAT_LD_INIT_IN_PROGRESS: + mfi_cmd->cm_frame->header.cmd_status = status; + mfi_cmd->cm_frame->header.scsi_status = ext_status; + mfi_cmd->cm_frame->dcmd.header.cmd_status = status; + mfi_cmd->cm_frame->dcmd.header.scsi_status + = ext_status; + break; + + case MFI_STAT_SCSI_DONE_WITH_ERROR: + mfi_cmd->cm_frame->header.cmd_status = ext_status; + mfi_cmd->cm_frame->dcmd.header.cmd_status = ext_status; + break; + + case MFI_STAT_LD_OFFLINE: + case MFI_STAT_DEVICE_NOT_FOUND: + mfi_cmd->cm_frame->header.cmd_status = status; + mfi_cmd->cm_frame->dcmd.header.cmd_status = status; + break; + + default: + mfi_cmd->cm_frame->header.cmd_status = status; + mfi_cmd->cm_frame->dcmd.header.cmd_status = status; + break; + } } /* * mfi_tbolt_return_cmd - Return a cmd to free command pool * @instance: Adapter soft state - * @cmd: Command packet to be returned to free command pool + * @tbolt_cmd: Tbolt command packet to be returned to free command pool + * @mfi_cmd: Oning MFI command packe */ -static inline void -mfi_tbolt_return_cmd(struct mfi_softc *sc, struct mfi_cmd_tbolt *cmd) +void +mfi_tbolt_return_cmd(struct mfi_softc *sc, struct mfi_cmd_tbolt *tbolt_cmd, + struct mfi_command *mfi_cmd) { mtx_assert(&sc->mfi_io_lock, MA_OWNED); - cmd->sync_cmd_idx = sc->mfi_max_fw_cmds; - TAILQ_INSERT_TAIL(&sc->mfi_cmd_tbolt_tqh, cmd, next); + mfi_cmd->cm_flags &= ~MFI_CMD_TBOLT; + mfi_cmd->cm_extra_frames = 0; + tbolt_cmd->sync_cmd_idx = sc->mfi_max_fw_cmds; + + TAILQ_INSERT_TAIL(&sc->mfi_cmd_tbolt_tqh, tbolt_cmd, next); } void mfi_tbolt_complete_cmd(struct mfi_softc *sc) { struct mfi_mpi2_reply_header *desc, *reply_desc; - struct mfi_command *cmd_mfi, *cmd_mfi_check; /* For MFA Cmds */ + struct mfi_command *cmd_mfi; /* For MFA Cmds */ struct mfi_cmd_tbolt *cmd_tbolt; uint16_t smid; uint8_t reply_descript_type; @@ -632,14 +666,17 @@ mfi_tbolt_complete_cmd(struct mfi_softc *sc) uint32_t status, extStatus; uint16_t num_completed; union desc_value val; + mtx_assert(&sc->mfi_io_lock, MA_OWNED); desc = (struct mfi_mpi2_reply_header *) ((uintptr_t)sc->reply_frame_pool_align + sc->last_reply_idx * sc->reply_size); reply_desc = desc; - if (!reply_desc) + if (reply_desc == NULL) { device_printf(sc->mfi_dev, "reply desc is NULL!!\n"); + return; + } reply_descript_type = reply_desc->ReplyFlags & MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK; @@ -652,13 +689,18 @@ mfi_tbolt_complete_cmd(struct mfi_softc *sc) /* Read Reply descriptor */ while ((val.u.low != 0xFFFFFFFF) && (val.u.high != 0xFFFFFFFF)) { smid = reply_desc->SMID; - if (!smid || smid > sc->mfi_max_fw_cmds + 1) { - device_printf(sc->mfi_dev, "smid is %x. Cannot " - "proceed. Returning \n", smid); - return; + if (smid == 0 || smid > sc->mfi_max_fw_cmds) { + device_printf(sc->mfi_dev, "smid is %d cannot " + "proceed - skipping\n", smid); + goto next; } - cmd_tbolt = sc->mfi_cmd_pool_tbolt[smid - 1]; + if (cmd_tbolt->sync_cmd_idx == sc->mfi_max_fw_cmds) { + device_printf(sc->mfi_dev, "cmd_tbolt %p " + "has invalid sync_cmd_idx=%d - skipping\n", + cmd_tbolt, cmd_tbolt->sync_cmd_idx); + goto next; + } cmd_mfi = &sc->mfi_commands[cmd_tbolt->sync_cmd_idx]; scsi_io_req = cmd_tbolt->io_request; @@ -666,33 +708,30 @@ mfi_tbolt_complete_cmd(struct mfi_softc *sc) extStatus = cmd_mfi->cm_frame->dcmd.header.scsi_status; map_tbolt_cmd_status(cmd_mfi, status, extStatus); - if (cmd_mfi->cm_flags & MFI_CMD_SCSI && + /* mfi_tbolt_return_cmd is handled by mfi complete / return */ + if ((cmd_mfi->cm_flags & MFI_CMD_SCSI) != 0 && (cmd_mfi->cm_flags & MFI_CMD_POLLED) != 0) { /* polled LD/SYSPD IO command */ - mfi_tbolt_return_cmd(sc, cmd_tbolt); /* XXX mark okay for now DJA */ cmd_mfi->cm_frame->header.cmd_status = MFI_STAT_OK; - } else { + } else { /* remove command from busy queue if not polled */ - TAILQ_FOREACH(cmd_mfi_check, &sc->mfi_busy, cm_link) { - if (cmd_mfi_check == cmd_mfi) { - mfi_remove_busy(cmd_mfi); - break; - } - } + if ((cmd_mfi->cm_flags & MFI_ON_MFIQ_BUSY) != 0) + mfi_remove_busy(cmd_mfi); /* complete the command */ mfi_complete(sc, cmd_mfi); - mfi_tbolt_return_cmd(sc, cmd_tbolt); } +next: sc->last_reply_idx++; if (sc->last_reply_idx >= sc->mfi_max_fw_cmds) { MFI_WRITE4(sc, MFI_RPI, sc->last_reply_idx); sc->last_reply_idx = 0; } - /*set it back to all 0xfff.*/ + + /* Set it back to all 0xfff */ ((union mfi_mpi2_reply_descriptor*)desc)->words = ~((uint64_t)0x00); @@ -728,17 +767,23 @@ mfi_tbolt_complete_cmd(struct mfi_softc *sc) */ struct mfi_cmd_tbolt * -mfi_tbolt_get_cmd(struct mfi_softc *sc) +mfi_tbolt_get_cmd(struct mfi_softc *sc, struct mfi_command *mfi_cmd) { struct mfi_cmd_tbolt *cmd = NULL; mtx_assert(&sc->mfi_io_lock, MA_OWNED); - cmd = TAILQ_FIRST(&sc->mfi_cmd_tbolt_tqh); + if ((cmd = TAILQ_FIRST(&sc->mfi_cmd_tbolt_tqh)) == NULL) + return (NULL); TAILQ_REMOVE(&sc->mfi_cmd_tbolt_tqh, cmd, next); memset((uint8_t *)cmd->sg_frame, 0, MEGASAS_MAX_SZ_CHAIN_FRAME); memset((uint8_t *)cmd->io_request, 0, MEGASAS_THUNDERBOLT_NEW_MSG_SIZE); + + cmd->sync_cmd_idx = mfi_cmd->cm_index; + mfi_cmd->cm_extra_frames = cmd->index; /* Frame count used as SMID */ + mfi_cmd->cm_flags |= MFI_CMD_TBOLT; + return cmd; } @@ -767,11 +812,9 @@ mfi_build_mpt_pass_thru(struct mfi_softc *sc, struct mfi_command *mfi_cmd) struct mfi_mpi2_request_raid_scsi_io *io_req; struct mfi_cmd_tbolt *cmd; - cmd = mfi_tbolt_get_cmd(sc); + cmd = mfi_tbolt_get_cmd(sc, mfi_cmd); if (!cmd) return EBUSY; - mfi_cmd->cm_extra_frames = cmd->index; /* Frame count used as SMID */ - cmd->sync_cmd_idx = mfi_cmd->cm_index; io_req = cmd->io_request; mpi25_ieee_chain = (MPI25_IEEE_SGE_CHAIN64 *)&io_req->SGL.IeeeChain; @@ -980,16 +1023,21 @@ mfi_build_and_issue_cmd(struct mfi_softc *sc, struct mfi_command *mfi_cmd) struct mfi_cmd_tbolt *cmd; union mfi_mpi2_request_descriptor *req_desc = NULL; uint16_t index; - cmd = mfi_tbolt_get_cmd(sc); - if (!cmd) - return NULL; - mfi_cmd->cm_extra_frames = cmd->index; - cmd->sync_cmd_idx = mfi_cmd->cm_index; + cmd = mfi_tbolt_get_cmd(sc, mfi_cmd); + if (cmd == NULL) + return (NULL); index = cmd->index; req_desc = mfi_tbolt_get_request_descriptor(sc, index-1); - if (mfi_tbolt_build_io(sc, mfi_cmd, cmd)) - return NULL; + if (req_desc == NULL) { + mfi_tbolt_return_cmd(sc, cmd, mfi_cmd); + return (NULL); + } + + if (mfi_tbolt_build_io(sc, mfi_cmd, cmd) != 0) { + mfi_tbolt_return_cmd(sc, cmd, mfi_cmd); + return (NULL); + } req_desc->header.SMID = index; return req_desc; } @@ -1008,7 +1056,7 @@ mfi_tbolt_build_mpt_cmd(struct mfi_softc *sc, struct mfi_command *cmd) index = cmd->cm_extra_frames; req_desc = mfi_tbolt_get_request_descriptor(sc, index - 1); - if (!req_desc) + if (req_desc == NULL) return NULL; bzero(req_desc, sizeof(*req_desc)); @@ -1024,7 +1072,7 @@ mfi_tbolt_send_frame(struct mfi_softc *sc, struct mfi_command *cm) struct mfi_frame_header *hdr; uint8_t *cdb; union mfi_mpi2_request_descriptor *req_desc = NULL; - int tm = MFI_POLL_TIMEOUT_SECS * 1000; + int tm = mfi_polled_cmd_timeout * 1000; hdr = &cm->cm_frame->header; cdb = cm->cm_frame->pass.cdb; @@ -1058,9 +1106,8 @@ mfi_tbolt_send_frame(struct mfi_softc *sc, struct mfi_command *cm) return 1; } } else if ((req_desc = mfi_tbolt_build_mpt_cmd(sc, cm)) == NULL) { - device_printf(sc->mfi_dev, "Mapping from MFI to MPT " - "Failed\n"); - return 1; + device_printf(sc->mfi_dev, "Mapping from MFI to MPT Failed\n"); + return (1); } if (cm->cm_flags & MFI_CMD_SCSI) { @@ -1078,23 +1125,30 @@ mfi_tbolt_send_frame(struct mfi_softc *sc, struct mfi_command *cm) if ((cm->cm_flags & MFI_CMD_POLLED) == 0) return 0; - if (cm->cm_flags & MFI_CMD_SCSI) { - /* check reply queue */ - mfi_tbolt_complete_cmd(sc); - } - - /* This is a polled command, so busy-wait for it to complete. */ + /* + * This is a polled command, so busy-wait for it to complete. + * + * The value of hdr->cmd_status is updated directly by the hardware + * so there is no garantee that mfi_tbolt_complete_cmd is called + * prior to this value changing. + */ while (hdr->cmd_status == MFI_STAT_INVALID_STATUS) { DELAY(1000); tm -= 1; if (tm <= 0) break; if (cm->cm_flags & MFI_CMD_SCSI) { - /* check reply queue */ + /* + * Force check reply queue. + * This ensures that dump works correctly + */ mfi_tbolt_complete_cmd(sc); } } + /* ensure the command cleanup has been processed before returning */ + mfi_tbolt_complete_cmd(sc); + if (hdr->cmd_status == MFI_STAT_INVALID_STATUS) { device_printf(sc->mfi_dev, "Frame %p timed out " "command 0x%X\n", hdr, cm->cm_frame->dcmd.opcode); @@ -1104,9 +1158,10 @@ mfi_tbolt_send_frame(struct mfi_softc *sc, struct mfi_command *cm) } static void -mfi_issue_pending_cmds_again (struct mfi_softc *sc) +mfi_issue_pending_cmds_again(struct mfi_softc *sc) { struct mfi_command *cm, *tmp; + struct mfi_cmd_tbolt *cmd; mtx_assert(&sc->mfi_io_lock, MA_OWNED); TAILQ_FOREACH_REVERSE_SAFE(cm, &sc->mfi_busy, BUSYQ, cm_link, tmp) { @@ -1119,50 +1174,51 @@ mfi_issue_pending_cmds_again (struct mfi_softc *sc) * should be performed on the controller */ if (cm->retry_for_fw_reset == 3) { - device_printf(sc->mfi_dev, "megaraid_sas: command %d " - "was tried multiple times during adapter reset" - "Shutting down the HBA\n", cm->cm_index); + device_printf(sc->mfi_dev, "megaraid_sas: command %p " + "index=%d was tried multiple times during adapter " + "reset - Shutting down the HBA\n", cm, cm->cm_index); mfi_kill_hba(sc); sc->hw_crit_error = 1; return; } - if ((cm->cm_flags & MFI_ON_MFIQ_BUSY) != 0) { - struct mfi_cmd_tbolt *cmd; - mfi_remove_busy(cm); - cmd = sc->mfi_cmd_pool_tbolt[cm->cm_extra_frames - - 1 ]; - mfi_tbolt_return_cmd(sc, cmd); - if ((cm->cm_flags & MFI_ON_MFIQ_MASK) == 0) { - if (cm->cm_frame->dcmd.opcode != - MFI_DCMD_CTRL_EVENT_WAIT) { - device_printf(sc->mfi_dev, - "APJ ****requeue command %d \n", - cm->cm_index); - mfi_requeue_ready(cm); - } + mfi_remove_busy(cm); + if ((cm->cm_flags & MFI_CMD_TBOLT) != 0) { + if (cm->cm_extra_frames != 0 && cm->cm_extra_frames <= + sc->mfi_max_fw_cmds) { + cmd = sc->mfi_cmd_pool_tbolt[cm->cm_extra_frames - 1]; + mfi_tbolt_return_cmd(sc, cmd, cm); + } else { + device_printf(sc->mfi_dev, + "Invalid extra_frames: %d detected\n", + cm->cm_extra_frames); } - else - mfi_release_command(cm); } + + if (cm->cm_frame->dcmd.opcode != MFI_DCMD_CTRL_EVENT_WAIT) { + device_printf(sc->mfi_dev, + "APJ ****requeue command %p index=%d\n", + cm, cm->cm_index); + mfi_requeue_ready(cm); + } else + mfi_release_command(cm); } mfi_startio(sc); } static void -mfi_kill_hba (struct mfi_softc *sc) +mfi_kill_hba(struct mfi_softc *sc) { if (sc->mfi_flags & MFI_FLAGS_TBOLT) - MFI_WRITE4 (sc, 0x00,MFI_STOP_ADP); + MFI_WRITE4(sc, 0x00, MFI_STOP_ADP); else - MFI_WRITE4 (sc, MFI_IDB,MFI_STOP_ADP); + MFI_WRITE4(sc, MFI_IDB, MFI_STOP_ADP); } static void mfi_process_fw_state_chg_isr(void *arg) { struct mfi_softc *sc= (struct mfi_softc *)arg; - struct mfi_cmd_tbolt *cmd; int error, status; if (sc->adpreset == 1) { @@ -1191,26 +1247,32 @@ mfi_process_fw_state_chg_isr(void *arg) device_printf(sc->mfi_dev, "controller is not in " "ready state\n"); mfi_kill_hba(sc); - sc->hw_crit_error= 1; - return ; + sc->hw_crit_error = 1; + return; + } + if ((error = mfi_tbolt_init_MFI_queue(sc)) != 0) { + device_printf(sc->mfi_dev, "Failed to initialise MFI " + "queue\n"); + mfi_kill_hba(sc); + sc->hw_crit_error = 1; + return; } - if ((error = mfi_tbolt_init_MFI_queue(sc)) != 0) - return; - mtx_lock(&sc->mfi_io_lock); + /* Init last reply index and max */ + MFI_WRITE4(sc, MFI_RFPI, sc->mfi_max_fw_cmds - 1); + MFI_WRITE4(sc, MFI_RPI, sc->last_reply_idx); sc->mfi_enable_intr(sc); sc->adpreset = 0; - free(sc->mfi_aen_cm->cm_data, M_MFIBUF); - mfi_remove_busy(sc->mfi_aen_cm); - cmd = sc->mfi_cmd_pool_tbolt[sc->mfi_aen_cm->cm_extra_frames - - 1]; - mfi_tbolt_return_cmd(sc, cmd); - if (sc->mfi_aen_cm) { + if (sc->mfi_aen_cm != NULL) { + free(sc->mfi_aen_cm->cm_data, M_MFIBUF); + mfi_remove_busy(sc->mfi_aen_cm); mfi_release_command(sc->mfi_aen_cm); sc->mfi_aen_cm = NULL; } - if (sc->mfi_map_sync_cm) { + + if (sc->mfi_map_sync_cm != NULL) { + mfi_remove_busy(sc->mfi_map_sync_cm); mfi_release_command(sc->mfi_map_sync_cm); sc->mfi_map_sync_cm = NULL; } @@ -1223,9 +1285,12 @@ mfi_process_fw_state_chg_isr(void *arg) */ if (!sc->hw_crit_error) { /* - * Initiate AEN (Asynchronous Event Notification) + * Initiate AEN (Asynchronous Event Notification) & + * Sync Map */ mfi_aen_setup(sc, sc->last_seq_num); + mfi_tbolt_sync_map_info(sc); + sc->issuepend_done = 1; device_printf(sc->mfi_dev, "second stage of reset " "complete, FW is ready now.\n"); @@ -1237,7 +1302,6 @@ mfi_process_fw_state_chg_isr(void *arg) device_printf(sc->mfi_dev, "mfi_process_fw_state_chg_isr " "called with unhandled value:%d\n", sc->adpreset); } - mtx_unlock(&sc->mfi_io_lock); } /* @@ -1276,25 +1340,27 @@ void mfi_tbolt_sync_map_info(struct mfi_softc *sc) { int error = 0, i; - struct mfi_command *cmd; - struct mfi_dcmd_frame *dcmd; + struct mfi_command *cmd = NULL; + struct mfi_dcmd_frame *dcmd = NULL; uint32_t context = 0; - union mfi_ld_ref *ld_sync; + union mfi_ld_ref *ld_sync = NULL; size_t ld_size; struct mfi_frame_header *hdr; struct mfi_command *cm = NULL; struct mfi_ld_list *list = NULL; + mtx_assert(&sc->mfi_io_lock, MA_OWNED); + if (sc->mfi_map_sync_cm != NULL || sc->cm_map_abort) return; - mtx_lock(&sc->mfi_io_lock); error = mfi_dcmd_command(sc, &cm, MFI_DCMD_LD_GET_LIST, (void **)&list, sizeof(*list)); if (error) goto out; cm->cm_flags = MFI_CMD_POLLED | MFI_CMD_DATAIN; + if (mfi_wait_command(sc, cm) != 0) { device_printf(sc->mfi_dev, "Failed to get device listing\n"); goto out; @@ -1308,18 +1374,15 @@ mfi_tbolt_sync_map_info(struct mfi_softc *sc) } ld_size = sizeof(*ld_sync) * list->ld_count; - mtx_unlock(&sc->mfi_io_lock); ld_sync = (union mfi_ld_ref *) malloc(ld_size, M_MFIBUF, - M_WAITOK | M_ZERO); + M_NOWAIT | M_ZERO); if (ld_sync == NULL) { device_printf(sc->mfi_dev, "Failed to allocate sync\n"); goto out; } - for (i = 0; i < list->ld_count; i++) { + for (i = 0; i < list->ld_count; i++) ld_sync[i].ref = list->ld_list[i].ld.ref; - } - mtx_lock(&sc->mfi_io_lock); if ((cmd = mfi_dequeue_free(sc)) == NULL) { device_printf(sc->mfi_dev, "Failed to get command\n"); free(ld_sync, M_MFIBUF); @@ -1355,7 +1418,7 @@ mfi_tbolt_sync_map_info(struct mfi_softc *sc) device_printf(sc->mfi_dev, "failed to send map sync\n"); free(ld_sync, M_MFIBUF); sc->mfi_map_sync_cm = NULL; - mfi_requeue_ready(cmd); + mfi_release_command(cmd); goto out; } @@ -1364,7 +1427,6 @@ out: free(list, M_MFIBUF); if (cm) mfi_release_command(cm); - mtx_unlock(&sc->mfi_io_lock); } static void @@ -1389,14 +1451,13 @@ mfi_sync_map_complete(struct mfi_command *cm) } free(cm->cm_data, M_MFIBUF); - sc->mfi_map_sync_cm = NULL; wakeup(&sc->mfi_map_sync_cm); + sc->mfi_map_sync_cm = NULL; mfi_release_command(cm); /* set it up again so the driver can catch more events */ - if (!aborted) { + if (!aborted) mfi_queue_map_sync(sc); - } } static void @@ -1412,5 +1473,7 @@ mfi_handle_map_sync(void *context, int pending) struct mfi_softc *sc; sc = context; + mtx_lock(&sc->mfi_io_lock); mfi_tbolt_sync_map_info(sc); + mtx_unlock(&sc->mfi_io_lock); } diff --git a/sys/dev/mfi/mfireg.h b/sys/dev/mfi/mfireg.h index dab9cf7..52ddafe 100644 --- a/sys/dev/mfi/mfireg.h +++ b/sys/dev/mfi/mfireg.h @@ -86,6 +86,7 @@ __FBSDID("$FreeBSD$"); * ThunderBolt specific Register */ +#define MFI_RFPI 0x48 /* reply_free_post_host_index */ #define MFI_RPI 0x6c /* reply_post_host_index */ #define MFI_ILQP 0xc0 /* inbound_low_queue_port */ #define MFI_IHQP 0xc4 /* inbound_high_queue_port */ @@ -259,6 +260,13 @@ typedef enum { #define MFI_FRAME_DIR_READ 0x0010 #define MFI_FRAME_DIR_BOTH 0x0018 #define MFI_FRAME_IEEE_SGL 0x0020 +#define MFI_FRAME_FMT "\20" \ + "\1NOPOST" \ + "\2SGL64" \ + "\3SENSE64" \ + "\4WRITE" \ + "\5READ" \ + "\6IEEESGL" /* ThunderBolt Specific */ @@ -456,8 +464,8 @@ typedef enum { #define MFI_FRAME_SIZE 64 #define MFI_MBOX_SIZE 12 -/* Firmware flashing can take 40s */ -#define MFI_POLL_TIMEOUT_SECS 50 +/* Firmware flashing can take 50+ seconds */ +#define MFI_POLL_TIMEOUT_SECS 60 /* Allow for speedier math calculations */ #define MFI_SECTOR_LEN 512 diff --git a/sys/dev/mfi/mfivar.h b/sys/dev/mfi/mfivar.h index bb2a324..664ede9 100644 --- a/sys/dev/mfi/mfivar.h +++ b/sys/dev/mfi/mfivar.h @@ -102,12 +102,25 @@ struct mfi_command { #define MFI_CMD_DATAOUT (1<<2) #define MFI_CMD_COMPLETED (1<<3) #define MFI_CMD_POLLED (1<<4) -#define MFI_ON_MFIQ_FREE (1<<5) -#define MFI_ON_MFIQ_READY (1<<6) -#define MFI_ON_MFIQ_BUSY (1<<7) -#define MFI_ON_MFIQ_MASK ((1<<5)|(1<<6)|(1<<7)) -#define MFI_CMD_SCSI (1<<8) -#define MFI_CMD_CCB (1<<9) +#define MFI_CMD_SCSI (1<<5) +#define MFI_CMD_CCB (1<<6) +#define MFI_CMD_TBOLT (1<<7) +#define MFI_ON_MFIQ_FREE (1<<8) +#define MFI_ON_MFIQ_READY (1<<9) +#define MFI_ON_MFIQ_BUSY (1<<10) +#define MFI_ON_MFIQ_MASK (MFI_ON_MFIQ_FREE | MFI_ON_MFIQ_READY| \ + MFI_ON_MFIQ_BUSY) +#define MFI_CMD_FLAGS_FMT "\20" \ + "\1MAPPED" \ + "\2DATAIN" \ + "\3DATAOUT" \ + "\4COMPLETED" \ + "\5POLLED" \ + "\6SCSI" \ + "\7TBOLT" \ + "\10Q_FREE" \ + "\11Q_READY" \ + "\12Q_BUSY" uint8_t retry_for_fw_reset; void (* cm_complete)(struct mfi_command *cm); void *cm_private; @@ -268,10 +281,6 @@ struct mfi_softc { */ struct mfi_command *mfi_commands; /* - * How many commands were actually allocated - */ - int mfi_total_cmds; - /* * How many commands the firmware can handle. Also how big the reply * queue is, minus 1. */ @@ -470,9 +479,8 @@ extern int mfi_build_cdb(int, uint8_t, u_int64_t, u_int32_t, uint8_t *); mfi_enqueue_ ## name (struct mfi_command *cm) \ { \ if ((cm->cm_flags & MFI_ON_MFIQ_MASK) != 0) { \ - printf("command %p is on another queue, " \ + panic("command %p is on another queue, " \ "flags = %#x\n", cm, cm->cm_flags); \ - panic("command is on another queue"); \ } \ TAILQ_INSERT_TAIL(&cm->cm_sc->mfi_ ## name, cm, cm_link); \ cm->cm_flags |= MFI_ON_ ## index; \ @@ -482,9 +490,8 @@ extern int mfi_build_cdb(int, uint8_t, u_int64_t, u_int32_t, uint8_t *); mfi_requeue_ ## name (struct mfi_command *cm) \ { \ if ((cm->cm_flags & MFI_ON_MFIQ_MASK) != 0) { \ - printf("command %p is on another queue, " \ + panic("command %p is on another queue, " \ "flags = %#x\n", cm, cm->cm_flags); \ - panic("command is on another queue"); \ } \ TAILQ_INSERT_HEAD(&cm->cm_sc->mfi_ ## name, cm, cm_link); \ cm->cm_flags |= MFI_ON_ ## index; \ @@ -497,10 +504,9 @@ extern int mfi_build_cdb(int, uint8_t, u_int64_t, u_int32_t, uint8_t *); \ if ((cm = TAILQ_FIRST(&sc->mfi_ ## name)) != NULL) { \ if ((cm->cm_flags & MFI_ON_ ## index) == 0) { \ - printf("command %p not in queue, " \ + panic("command %p not in queue, " \ "flags = %#x, bit = %#x\n", cm, \ cm->cm_flags, MFI_ON_ ## index); \ - panic("command not in queue"); \ } \ TAILQ_REMOVE(&sc->mfi_ ## name, cm, cm_link); \ cm->cm_flags &= ~MFI_ON_ ## index; \ @@ -512,10 +518,9 @@ extern int mfi_build_cdb(int, uint8_t, u_int64_t, u_int32_t, uint8_t *); mfi_remove_ ## name (struct mfi_command *cm) \ { \ if ((cm->cm_flags & MFI_ON_ ## index) == 0) { \ - printf("command %p not in queue, flags = %#x, " \ + panic("command %p not in queue, flags = %#x, " \ "bit = %#x\n", cm, cm->cm_flags, \ MFI_ON_ ## index); \ - panic("command not in queue"); \ } \ TAILQ_REMOVE(&cm->cm_sc->mfi_ ## name, cm, cm_link); \ cm->cm_flags &= ~MFI_ON_ ## index; \ @@ -608,7 +613,8 @@ SYSCTL_DECL(_hw_mfi); #ifdef MFI_DEBUG extern void mfi_print_cmd(struct mfi_command *cm); extern void mfi_dump_cmds(struct mfi_softc *sc); -extern void mfi_validate_sg(struct mfi_softc *, struct mfi_command *, const char *, int ); +extern void mfi_validate_sg(struct mfi_softc *, struct mfi_command *, + const char *, int); #define MFI_PRINT_CMD(cm) mfi_print_cmd(cm) #define MFI_DUMP_CMDS(sc) mfi_dump_cmds(sc) #define MFI_VALIDATE_CMD(sc, cm) mfi_validate_sg(sc, cm, __FUNCTION__, __LINE__) @@ -618,6 +624,8 @@ extern void mfi_validate_sg(struct mfi_softc *, struct mfi_command *, const char #define MFI_VALIDATE_CMD(sc, cm) #endif -extern void mfi_release_command(struct mfi_command *cm); +extern void mfi_release_command(struct mfi_command *); +extern void mfi_tbolt_return_cmd(struct mfi_softc *, + struct mfi_cmd_tbolt *, struct mfi_command *); #endif /* _MFIVAR_H */ diff --git a/sys/dev/msk/if_msk.c b/sys/dev/msk/if_msk.c index d0ca808..664575c 100644 --- a/sys/dev/msk/if_msk.c +++ b/sys/dev/msk/if_msk.c @@ -1695,6 +1695,12 @@ msk_attach(device_t dev) ifp->if_capabilities |= IFCAP_VLAN_HWCSUM; } ifp->if_capenable = ifp->if_capabilities; + /* + * Disable RX checksum offloading on controllers that don't use + * new descriptor format but give chance to enable it. + */ + if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0) + ifp->if_capenable &= ~IFCAP_RXCSUM; /* * Tell the upper layer(s) we support long frames. diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 66da0d0..6d110ab 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1269,6 +1269,15 @@ brelse(struct buf *bp) KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp)); + if (BUF_LOCKRECURSED(bp)) { + /* + * Do not process, in particular, do not handle the + * B_INVAL/B_RELBUF and do not release to free list. + */ + BUF_UNLOCK(bp); + return; + } + if (bp->b_flags & B_MANAGED) { bqrelse(bp); return; @@ -1445,12 +1454,6 @@ brelse(struct buf *bp) brelvp(bp); } - if (BUF_LOCKRECURSED(bp)) { - /* do not release to free list */ - BUF_UNLOCK(bp); - return; - } - /* enqueue */ mtx_lock(&bqlock); /* Handle delayed bremfree() processing. */ @@ -2682,6 +2685,9 @@ loop: /* We timed out or were interrupted. */ else if (error) return (NULL); + /* If recursed, assume caller knows the rules. */ + else if (BUF_LOCKRECURSED(bp)) + goto end; /* * The buffer is locked. B_CACHE is cleared if the buffer is @@ -2865,6 +2871,7 @@ loop: } CTR4(KTR_BUF, "getblk(%p, %ld, %d) = %p", vp, (long)blkno, size, bp); BUF_ASSERT_HELD(bp); +end: KASSERT(bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); return (bp); diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index b54dc04..0696edd 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -385,6 +385,7 @@ extern int vttoif_tab[]; #define SKIPSYSTEM 0x0001 /* vflush: skip vnodes marked VSYSTEM */ #define FORCECLOSE 0x0002 /* vflush: force file closure */ #define WRITECLOSE 0x0004 /* vflush: only close writable files */ +#define EARLYFLUSH 0x0008 /* vflush: early call for ffs_flushfiles */ #define V_SAVE 0x0001 /* vinvalbuf: sync file first */ #define V_ALT 0x0002 /* vinvalbuf: invalidate only alternate bufs */ #define V_NORMAL 0x0004 /* vinvalbuf: invalidate only regular bufs */ diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index abe4073..789a7cf 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -1790,6 +1790,17 @@ fail: return (0); } +static inline struct buf * +getinobuf(struct inode *ip, u_int cg, u_int32_t cginoblk, int gbflags) +{ + struct fs *fs; + + fs = ip->i_fs; + return (getblk(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, + cg * fs->fs_ipg + cginoblk)), (int)fs->fs_bsize, 0, 0, + gbflags)); +} + /* * Determine whether an inode can be allocated. * @@ -1814,9 +1825,11 @@ ffs_nodealloccg(ip, cg, ipref, mode, unused) u_int8_t *inosused, *loc; struct ufs2_dinode *dp2; int error, start, len, i; + u_int32_t old_initediblk; fs = ip->i_fs; ump = ip->i_ump; +check_nifree: if (fs->fs_cs(fs, cg).cs_nifree == 0) return (0); UFS_UNLOCK(ump); @@ -1828,13 +1841,13 @@ ffs_nodealloccg(ip, cg, ipref, mode, unused) return (0); } cgp = (struct cg *)bp->b_data; +restart: if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { brelse(bp); UFS_LOCK(ump); return (0); } bp->b_xflags |= BX_BKGRDWRITE; - cgp->cg_old_time = cgp->cg_time = time_second; inosused = cg_inosused(cgp); if (ipref) { ipref %= fs->fs_ipg; @@ -1856,7 +1869,6 @@ ffs_nodealloccg(ip, cg, ipref, mode, unused) } } ipref = (loc - inosused) * NBBY + ffs(~*loc) - 1; - cgp->cg_irotor = ipref; gotit: /* * Check to see if we need to initialize more inodes. @@ -1864,9 +1876,37 @@ gotit: if (fs->fs_magic == FS_UFS2_MAGIC && ipref + INOPB(fs) > cgp->cg_initediblk && cgp->cg_initediblk < cgp->cg_niblk) { - ibp = getblk(ip->i_devvp, fsbtodb(fs, - ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)), - (int)fs->fs_bsize, 0, 0, 0); + old_initediblk = cgp->cg_initediblk; + + /* + * Free the cylinder group lock before writing the + * initialized inode block. Entering the + * babarrierwrite() with the cylinder group lock + * causes lock order violation between the lock and + * snaplk. + * + * Another thread can decide to initialize the same + * inode block, but whichever thread first gets the + * cylinder group lock after writing the newly + * allocated inode block will update it and the other + * will realize that it has lost and leave the + * cylinder group unchanged. + */ + ibp = getinobuf(ip, cg, old_initediblk, GB_LOCK_NOWAIT); + brelse(bp); + if (ibp == NULL) { + /* + * The inode block buffer is already owned by + * another thread, which must initialize it. + * Wait on the buffer to allow another thread + * to finish the updates, with dropped cg + * buffer lock, then retry. + */ + ibp = getinobuf(ip, cg, old_initediblk, 0); + brelse(ibp); + UFS_LOCK(ump); + goto check_nifree; + } bzero(ibp->b_data, (int)fs->fs_bsize); dp2 = (struct ufs2_dinode *)(ibp->b_data); for (i = 0; i < INOPB(fs); i++) { @@ -1883,8 +1923,29 @@ gotit: * loading of newly created filesystems. */ babarrierwrite(ibp); - cgp->cg_initediblk += INOPB(fs); + + /* + * After the inode block is written, try to update the + * cg initediblk pointer. If another thread beat us + * to it, then leave it unchanged as the other thread + * has already set it correctly. + */ + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, NOCRED, &bp); + UFS_LOCK(ump); + ACTIVECLEAR(fs, cg); + UFS_UNLOCK(ump); + if (error != 0) { + brelse(bp); + return (error); + } + cgp = (struct cg *)bp->b_data; + if (cgp->cg_initediblk == old_initediblk) + cgp->cg_initediblk += INOPB(fs); + goto restart; } + cgp->cg_old_time = cgp->cg_time = time_second; + cgp->cg_irotor = ipref; UFS_LOCK(ump); ACTIVECLEAR(fs, cg); setbit(inosused, ipref); diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index 16fe134..e39fd46 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -1908,7 +1908,12 @@ softdep_flushfiles(oldmnt, flags, td) int flags; struct thread *td; { - int error, depcount, loopcnt, retry_flush_count, retry; +#ifdef QUOTA + struct ufsmount *ump; + int i; +#endif + int error, early, depcount, loopcnt, retry_flush_count, retry; + int morework; loopcnt = 10; retry_flush_count = 3; @@ -1926,7 +1931,9 @@ retry_flush: * Do another flush in case any vnodes were brought in * as part of the cleanup operations. */ - if ((error = ffs_flushfiles(oldmnt, flags, td)) != 0) + early = retry_flush_count == 1 || (oldmnt->mnt_kern_flag & + MNTK_UNMOUNT) == 0 ? 0 : EARLYFLUSH; + if ((error = ffs_flushfiles(oldmnt, flags | early, td)) != 0) break; if ((error = softdep_flushworklist(oldmnt, &depcount, td)) != 0 || depcount == 0) @@ -1950,7 +1957,17 @@ retry_flush: MNT_ILOCK(oldmnt); KASSERT((oldmnt->mnt_kern_flag & MNTK_NOINSMNTQ) != 0, ("softdep_flushfiles: !MNTK_NOINSMNTQ")); - if (oldmnt->mnt_nvnodelistsize > 0) { + morework = oldmnt->mnt_nvnodelistsize > 0; +#ifdef QUOTA + ump = VFSTOUFS(oldmnt); + UFS_LOCK(ump); + for (i = 0; i < MAXQUOTAS; i++) { + if (ump->um_quotas[i] != NULLVP) + morework = 1; + } + UFS_UNLOCK(ump); +#endif + if (morework) { if (--retry_flush_count > 0) { retry = 1; loopcnt = 3; diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index 0204613..b3292d0 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1351,9 +1351,10 @@ ffs_flushfiles(mp, flags, td) struct thread *td; { struct ufsmount *ump; - int error; + int qerror, error; ump = VFSTOUFS(mp); + qerror = 0; #ifdef QUOTA if (mp->mnt_flag & MNT_QUOTA) { int i; @@ -1361,11 +1362,19 @@ ffs_flushfiles(mp, flags, td) if (error) return (error); for (i = 0; i < MAXQUOTAS; i++) { - quotaoff(td, mp, i); + error = quotaoff(td, mp, i); + if (error != 0) { + if ((flags & EARLYFLUSH) == 0) + return (error); + else + qerror = error; + } } + /* - * Here we fall through to vflush again to ensure - * that we have gotten rid of all the system vnodes. + * Here we fall through to vflush again to ensure that + * we have gotten rid of all the system vnodes, unless + * quotas must not be closed. */ } #endif @@ -1380,11 +1389,21 @@ ffs_flushfiles(mp, flags, td) * that we have gotten rid of all the system vnodes. */ } - /* - * Flush all the files. + + /* + * Do not close system files if quotas were not closed, to be + * able to sync the remaining dquots. The freeblks softupdate + * workitems might hold a reference on a dquot, preventing + * quotaoff() from completing. Next round of + * softdep_flushworklist() iteration should process the + * blockers, allowing the next run of quotaoff() to finally + * flush held dquots. + * + * Otherwise, flush all the files. */ - if ((error = vflush(mp, 0, flags, td)) != 0) + if (qerror == 0 && (error = vflush(mp, 0, flags, td)) != 0) return (error); + /* * Flush filesystem metadata. */ diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c index 87ac9a1..a949898 100644 --- a/sys/ufs/ufs/ufs_quota.c +++ b/sys/ufs/ufs/ufs_quota.c @@ -80,7 +80,7 @@ static int dqopen(struct vnode *, struct ufsmount *, int); static int dqget(struct vnode *, u_long, struct ufsmount *, int, struct dquot **); static int dqsync(struct vnode *, struct dquot *); -static void dqflush(struct vnode *); +static int dqflush(struct vnode *); static int quotaoff1(struct thread *td, struct mount *mp, int type); static int quotaoff_inchange(struct thread *td, struct mount *mp, int type); @@ -674,8 +674,12 @@ again: vrele(vp); } - dqflush(qvp); - /* Clear um_quotas before closing the quota vnode to prevent + error = dqflush(qvp); + if (error != 0) + return (error); + + /* + * Clear um_quotas before closing the quota vnode to prevent * access to the closed vnode from dqget/dqsync */ UFS_LOCK(ump); @@ -1594,17 +1598,19 @@ out: /* * Flush all entries from the cache for a particular vnode. */ -static void +static int dqflush(struct vnode *vp) { struct dquot *dq, *nextdq; struct dqhash *dqh; + int error; /* * Move all dquot's that used to refer to this quota * file off their hash chains (they will eventually * fall off the head of the free list and be re-used). */ + error = 0; DQH_LOCK(); for (dqh = &dqhashtbl[dqhash]; dqh >= dqhashtbl; dqh--) { for (dq = LIST_FIRST(dqh); dq; dq = nextdq) { @@ -1612,12 +1618,15 @@ dqflush(struct vnode *vp) if (dq->dq_ump->um_quotas[dq->dq_type] != vp) continue; if (dq->dq_cnt) - panic("dqflush: stray dquot"); - LIST_REMOVE(dq, dq_hash); - dq->dq_ump = (struct ufsmount *)0; + error = EBUSY; + else { + LIST_REMOVE(dq, dq_hash); + dq->dq_ump = NULL; + } } } DQH_UNLOCK(); + return (error); } /* |