diff options
author | mav <mav@FreeBSD.org> | 2017-07-26 16:14:05 +0000 |
---|---|---|
committer | mav <mav@FreeBSD.org> | 2017-07-26 16:14:05 +0000 |
commit | f0882293abab53b447ecd49ce97c8200e69c03b2 (patch) | |
tree | 0f72defadd9e9b2b270bf7d7c945f67d125d59fb /cddl | |
parent | 47b82b1986579f97f82435ad6ee165c4a380cad4 (diff) | |
download | FreeBSD-src-f0882293abab53b447ecd49ce97c8200e69c03b2.zip FreeBSD-src-f0882293abab53b447ecd49ce97c8200e69c03b2.tar.gz |
MFC r315896: MFV r315290, r315291: 7303 dynamic metaslab selection
illumos/illumos-gate@8363e80ae72609660f6090766ca8c2c18aa53f0c
https://github.com/illumos/illumos-gate/commit/8363e80ae72609660f6090766ca8c2c18
https://www.illumos.org/issues/7303
This change introduces a new weighting algorithm to improve metaslab selection
.
The new weighting algorithm relies on the SPACEMAP_HISTOGRAM feature. As a res
ult,
the metaslab weight now encodes the type of weighting algorithm used
(size-based vs segment-based).
This also introduce a new allocation tracing facility and two new dcmds to hel
p
debug allocation problems. Each zio now contains a zio_alloc_list_t structure
that is populated as the zio goes through the allocations stage. Here's an
example of how to use the tracing facility:
> c5ec000::print zio_t io_alloc_list | ::walk list | ::metaslab_trace
MSID DVA ASIZE WEIGHT RESULT VDEV
- 0 400 0 NOT_ALLOCATABLE ztest.0a
- 0 400 0 NOT_ALLOCATABLE ztest.0a
- 0 400 0 ENOSPC ztest.0a
- 0 200 0 NOT_ALLOCATABLE ztest.0a
- 0 200 0 NOT_ALLOCATABLE ztest.0a
- 0 200 0 ENOSPC ztest.0a
1 0 400 1 x 8M 17b1a00 ztest.0a
> 1ff2400::print zio_t io_alloc_list | ::walk list | ::metaslab_trace
MSID DVA ASIZE WEIGHT RESULT VDEV
- 0 200 0 NOT_ALLOCATABLE mirror-2
- 0 200 0 NOT_ALLOCATABLE mirror-0
1 0 200 1 x 4M 112ae00 mirror-1
- 1 200 0 NOT_ALLOCATABLE mirror-2
- 1 200 0 NOT_ALLOCATABLE mirror-0
1 1 200 1 x 4M 112b000 mirror-1
- 2 200 0 NOT_ALLOCATABLE mirror-2
If the metaslab is using segment-based weighting then the WEIGHT column will
display the number of segments available in the bucket where the allocation
attempt was made.
Author: George Wilson <george.wilson@delphix.com>
Reviewed by: Alex Reece <alex@delphix.com>
Reviewed by: Chris Siden <christopher.siden@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Paul Dagnelie <paul.dagnelie@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Don Brady <don.brady@intel.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
Diffstat (limited to 'cddl')
-rw-r--r-- | cddl/contrib/opensolaris/cmd/zdb/zdb.c | 25 | ||||
-rw-r--r-- | cddl/contrib/opensolaris/cmd/ztest/ztest.c | 2 | ||||
-rw-r--r-- | cddl/contrib/opensolaris/lib/libzpool/common/kernel.c | 5 |
3 files changed, 27 insertions, 5 deletions
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.c b/cddl/contrib/opensolaris/cmd/zdb/zdb.c index 482e560..743641b 100644 --- a/cddl/contrib/opensolaris/cmd/zdb/zdb.c +++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.c @@ -2585,10 +2585,21 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb) if (!dump_opt['L']) { vdev_t *rvd = spa->spa_root_vdev; + + /* + * We are going to be changing the meaning of the metaslab's + * ms_tree. Ensure that the allocator doesn't try to + * use the tree. + */ + spa->spa_normal_class->mc_ops = &zdb_metaslab_ops; + spa->spa_log_class->mc_ops = &zdb_metaslab_ops; + for (uint64_t c = 0; c < rvd->vdev_children; c++) { vdev_t *vd = rvd->vdev_child[c]; + metaslab_group_t *mg = vd->vdev_mg; for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { metaslab_t *msp = vd->vdev_ms[m]; + ASSERT3P(msp->ms_group, ==, mg); mutex_enter(&msp->ms_lock); metaslab_unload(msp); @@ -2609,8 +2620,6 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb) (longlong_t)m, (longlong_t)vd->vdev_ms_count); - msp->ms_ops = &zdb_metaslab_ops; - /* * We don't want to spend the CPU * manipulating the size-ordered @@ -2620,7 +2629,10 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb) msp->ms_tree->rt_ops = NULL; VERIFY0(space_map_load(msp->ms_sm, msp->ms_tree, SM_ALLOC)); - msp->ms_loaded = B_TRUE; + + if (!msp->ms_loaded) { + msp->ms_loaded = B_TRUE; + } } mutex_exit(&msp->ms_lock); } @@ -2642,8 +2654,10 @@ zdb_leak_fini(spa_t *spa) vdev_t *rvd = spa->spa_root_vdev; for (int c = 0; c < rvd->vdev_children; c++) { vdev_t *vd = rvd->vdev_child[c]; + metaslab_group_t *mg = vd->vdev_mg; for (int m = 0; m < vd->vdev_ms_count; m++) { metaslab_t *msp = vd->vdev_ms[m]; + ASSERT3P(mg, ==, msp->ms_group); mutex_enter(&msp->ms_lock); /* @@ -2657,7 +2671,10 @@ zdb_leak_fini(spa_t *spa) * from the ms_tree. */ range_tree_vacate(msp->ms_tree, zdb_leak, vd); - msp->ms_loaded = B_FALSE; + + if (msp->ms_loaded) { + msp->ms_loaded = B_FALSE; + } mutex_exit(&msp->ms_lock); } diff --git a/cddl/contrib/opensolaris/cmd/ztest/ztest.c b/cddl/contrib/opensolaris/cmd/ztest/ztest.c index bfc80e3..491bd6b 100644 --- a/cddl/contrib/opensolaris/cmd/ztest/ztest.c +++ b/cddl/contrib/opensolaris/cmd/ztest/ztest.c @@ -173,7 +173,7 @@ static const ztest_shared_opts_t ztest_opts_defaults = { .zo_mirrors = 2, .zo_raidz = 4, .zo_raidz_parity = 1, - .zo_vdev_size = SPA_MINDEVSIZE * 2, + .zo_vdev_size = SPA_MINDEVSIZE * 4, /* 256m default size */ .zo_datasets = 7, .zo_threads = 23, .zo_passtime = 60, /* 60 seconds */ diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c b/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c index a7fc128..701f0b4 100644 --- a/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c +++ b/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c @@ -97,6 +97,11 @@ kstat_create(char *module, int instance, char *name, char *class, /*ARGSUSED*/ void +kstat_named_init(kstat_named_t *knp, const char *name, uchar_t type) +{} + +/*ARGSUSED*/ +void kstat_install(kstat_t *ksp) {} |