diff options
Diffstat (limited to 'cddl/contrib/opensolaris/cmd/zdb/zdb.c')
-rw-r--r-- | cddl/contrib/opensolaris/cmd/zdb/zdb.c | 213 |
1 files changed, 171 insertions, 42 deletions
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.c b/cddl/contrib/opensolaris/cmd/zdb/zdb.c index 61e8071..04970fc 100644 --- a/cddl/contrib/opensolaris/cmd/zdb/zdb.c +++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2011, 2014 by Delphix. All rights reserved. */ #include <stdio.h> @@ -75,9 +75,9 @@ DMU_OT_ZAP_OTHER : DMU_OT_NUMTYPES)) #ifndef lint -extern int zfs_recover; +extern boolean_t zfs_recover; #else -int zfs_recover; +boolean_t zfs_recover; #endif const char cmdname[] = "zdb"; @@ -111,11 +111,11 @@ static void usage(void) { (void) fprintf(stderr, - "Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] " - "[-U config] [-M inflight I/Os] poolname [object...]\n" + "Usage: %s [-CumMdibcsDvhLXFPA] [-t txg] [-e [-p path...]] " + "[-U config] [-I inflight I/Os] [-x dumpdir] poolname [object...]\n" " %s [-divPA] [-e -p path...] [-U config] dataset " "[object...]\n" - " %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] " + " %s -mM [-LXFPA] [-t txg] [-e [-p path...]] [-U config] " "poolname [vdev [metaslab...]]\n" " %s -R [-A] [-e [-p path...]] poolname " "vdev:offset:size[:flags]\n" @@ -138,6 +138,7 @@ usage(void) (void) fprintf(stderr, " -h pool history\n"); (void) fprintf(stderr, " -b block statistics\n"); (void) fprintf(stderr, " -m metaslabs\n"); + (void) fprintf(stderr, " -M metaslab groups\n"); (void) fprintf(stderr, " -c checksum all metadata (twice for " "all data) blocks\n"); (void) fprintf(stderr, " -s report stats on zdb's I/O\n"); @@ -150,7 +151,7 @@ usage(void) (void) fprintf(stderr, " -R read and display block from a " "device\n\n"); (void) fprintf(stderr, " Below options are intended for use " - "with other options (except -l):\n"); + "with other options:\n"); (void) fprintf(stderr, " -A ignore assertions (-A), enable " "panic recovery (-AA) or both (-AAA)\n"); (void) fprintf(stderr, " -F attempt automatic rewind within " @@ -163,11 +164,14 @@ usage(void) "has altroot/not in a cachefile\n"); (void) fprintf(stderr, " -p <path> -- use one or more with " "-e to specify path to vdev dir\n"); - (void) fprintf(stderr, " -P print numbers in parseable form\n"); + (void) fprintf(stderr, " -x <dumpdir> -- " + "dump all read blocks into specified directory\n"); + (void) fprintf(stderr, " -P print numbers in parseable form\n"); (void) fprintf(stderr, " -t <txg> -- highest txg to use when " "searching for uberblocks\n"); - (void) fprintf(stderr, " -M <number of inflight I/Os> -- " - "specify the maximum number of checksumming I/Os [default is 200]"); + (void) fprintf(stderr, " -I <number of inflight I/Os> -- " + "specify the maximum number of " + "checksumming I/Os [default is 200]\n"); (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " "to make only that option verbose\n"); (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); @@ -545,7 +549,7 @@ get_metaslab_refcount(vdev_t *vd) { int refcount = 0; - if (vd->vdev_top == vd) { + if (vd->vdev_top == vd && !vd->vdev_removing) { for (int m = 0; m < vd->vdev_ms_count; m++) { space_map_t *sm = vd->vdev_ms[m]->ms_sm; @@ -683,9 +687,10 @@ dump_metaslab(metaslab_t *msp) * The space map histogram represents free space in chunks * of sm_shift (i.e. bucket 0 refers to 2^sm_shift). */ - (void) printf("\tOn-disk histogram:\n"); + (void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n", + (u_longlong_t)msp->ms_fragmentation); dump_histogram(sm->sm_phys->smp_histogram, - SPACE_MAP_HISTOGRAM_SIZE(sm), sm->sm_shift); + SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift); } if (dump_opt['d'] > 5 || dump_opt['m'] > 3) { @@ -710,6 +715,47 @@ print_vdev_metaslab_header(vdev_t *vd) } static void +dump_metaslab_groups(spa_t *spa) +{ + vdev_t *rvd = spa->spa_root_vdev; + metaslab_class_t *mc = spa_normal_class(spa); + uint64_t fragmentation; + + metaslab_class_histogram_verify(mc); + + for (int c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + metaslab_group_t *mg = tvd->vdev_mg; + + if (mg->mg_class != mc) + continue; + + metaslab_group_histogram_verify(mg); + mg->mg_fragmentation = metaslab_group_fragmentation(mg); + + (void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t" + "fragmentation", + (u_longlong_t)tvd->vdev_id, + (u_longlong_t)tvd->vdev_ms_count); + if (mg->mg_fragmentation == ZFS_FRAG_INVALID) { + (void) printf("%3s\n", "-"); + } else { + (void) printf("%3llu%%\n", + (u_longlong_t)mg->mg_fragmentation); + } + dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); + } + + (void) printf("\tpool %s\tfragmentation", spa_name(spa)); + fragmentation = metaslab_class_fragmentation(mc); + if (fragmentation == ZFS_FRAG_INVALID) + (void) printf("\t%3s\n", "-"); + else + (void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation); + dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); +} + +static void dump_metaslabs(spa_t *spa) { vdev_t *vd, *rvd = spa->spa_root_vdev; @@ -1032,7 +1078,8 @@ dump_dnode(objset_t *os, uint64_t object, void *data, size_t size) } static uint64_t -blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb) +blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, + const zbookmark_phys_t *zb) { if (dnp == NULL) { ASSERT(zb->zb_level < 0); @@ -1059,8 +1106,17 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp) return; } - blkbuf[0] = '\0'; + if (BP_IS_EMBEDDED(bp)) { + (void) sprintf(blkbuf, + "EMBEDDED et=%u %llxL/%llxP B=%llu", + (int)BPE_GET_ETYPE(bp), + (u_longlong_t)BPE_GET_LSIZE(bp), + (u_longlong_t)BPE_GET_PSIZE(bp), + (u_longlong_t)bp->blk_birth); + return; + } + blkbuf[0] = '\0'; for (int i = 0; i < ndvas; i++) (void) snprintf(blkbuf + strlen(blkbuf), buflen - strlen(blkbuf), "%llu:%llx:%llx ", @@ -1078,21 +1134,23 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp) "%llxL/%llxP F=%llu B=%llu/%llu", (u_longlong_t)BP_GET_LSIZE(bp), (u_longlong_t)BP_GET_PSIZE(bp), - (u_longlong_t)bp->blk_fill, + (u_longlong_t)BP_GET_FILL(bp), (u_longlong_t)bp->blk_birth, (u_longlong_t)BP_PHYSICAL_BIRTH(bp)); } } static void -print_indirect(blkptr_t *bp, const zbookmark_t *zb, +print_indirect(blkptr_t *bp, const zbookmark_phys_t *zb, const dnode_phys_t *dnp) { char blkbuf[BP_SPRINTF_LEN]; int l; - ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); - ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); + if (!BP_IS_EMBEDDED(bp)) { + ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); + ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); + } (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb)); @@ -1112,7 +1170,7 @@ print_indirect(blkptr_t *bp, const zbookmark_t *zb, static int visit_indirect(spa_t *spa, const dnode_phys_t *dnp, - blkptr_t *bp, const zbookmark_t *zb) + blkptr_t *bp, const zbookmark_phys_t *zb) { int err = 0; @@ -1138,7 +1196,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp, /* recursively visit blocks below this */ cbp = buf->b_data; for (i = 0; i < epb; i++, cbp++) { - zbookmark_t czb; + zbookmark_phys_t czb; SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, zb->zb_level - 1, @@ -1146,10 +1204,10 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp, err = visit_indirect(spa, dnp, cbp, &czb); if (err) break; - fill += cbp->blk_fill; + fill += BP_GET_FILL(cbp); } if (!err) - ASSERT3U(fill, ==, bp->blk_fill); + ASSERT3U(fill, ==, BP_GET_FILL(bp)); (void) arc_buf_remove_ref(buf, &buf); } @@ -1162,7 +1220,7 @@ dump_indirect(dnode_t *dn) { dnode_phys_t *dnp = dn->dn_phys; int j; - zbookmark_t czb; + zbookmark_phys_t czb; (void) printf("Indirect blocks:\n"); @@ -1816,14 +1874,14 @@ dump_dir(objset_t *os) if (dds.dds_type == DMU_OST_META) { dds.dds_creation_txg = TXG_INITIAL; - usedobjs = os->os_rootbp->blk_fill; + usedobjs = BP_GET_FILL(os->os_rootbp); refdbytes = os->os_spa->spa_dsl_pool-> dp_mos_dir->dd_phys->dd_used_bytes; } else { dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch); } - ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill); + ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp)); zdb_nicenum(refdbytes, numbuf); @@ -2108,6 +2166,8 @@ typedef struct zdb_blkstats { uint64_t zb_lsize; uint64_t zb_psize; uint64_t zb_count; + uint64_t zb_gangs; + uint64_t zb_ditto_samevdev; uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE]; } zdb_blkstats_t; @@ -2132,6 +2192,9 @@ typedef struct zdb_cb { zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1]; uint64_t zcb_dedup_asize; uint64_t zcb_dedup_blocks; + uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES]; + uint64_t zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES] + [BPE_PAYLOAD_SIZE]; uint64_t zcb_start; uint64_t zcb_lastprint; uint64_t zcb_totalasize; @@ -2155,6 +2218,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, for (int i = 0; i < 4; i++) { int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL; int t = (i & 1) ? type : ZDB_OT_TOTAL; + int equal; zdb_blkstats_t *zb = &zcb->zcb_type[l][t]; zb->zb_asize += BP_GET_ASIZE(bp); @@ -2162,6 +2226,34 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, zb->zb_psize += BP_GET_PSIZE(bp); zb->zb_count++; zb->zb_psize_histogram[BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT]++; + + zb->zb_gangs += BP_COUNT_GANG(bp); + + switch (BP_GET_NDVAS(bp)) { + case 2: + if (DVA_GET_VDEV(&bp->blk_dva[0]) == + DVA_GET_VDEV(&bp->blk_dva[1])) + zb->zb_ditto_samevdev++; + break; + case 3: + equal = (DVA_GET_VDEV(&bp->blk_dva[0]) == + DVA_GET_VDEV(&bp->blk_dva[1])) + + (DVA_GET_VDEV(&bp->blk_dva[0]) == + DVA_GET_VDEV(&bp->blk_dva[2])) + + (DVA_GET_VDEV(&bp->blk_dva[1]) == + DVA_GET_VDEV(&bp->blk_dva[2])); + if (equal != 0) + zb->zb_ditto_samevdev++; + break; + } + + } + + if (BP_IS_EMBEDDED(bp)) { + zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++; + zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)] + [BPE_GET_PSIZE(bp)]++; + return; } if (dump_opt['L']) @@ -2200,7 +2292,7 @@ zdb_blkptr_done(zio_t *zio) blkptr_t *bp = zio->io_bp; int ioerr = zio->io_error; zdb_cb_t *zcb = zio->io_private; - zbookmark_t *zb = &zio->io_bookmark; + zbookmark_phys_t *zb = &zio->io_bookmark; zio_data_buf_free(zio->io_data, zio->io_size); @@ -2235,7 +2327,7 @@ zdb_blkptr_done(zio_t *zio) /* ARGSUSED */ static int zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, - const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) + const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) { zdb_cb_t *zcb = arg; dmu_object_type_t type; @@ -2263,7 +2355,8 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)); - if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) { + if (!BP_IS_EMBEDDED(bp) && + (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) { size_t size = BP_GET_PSIZE(bp); void *data = zio_data_buf_alloc(size); int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW; @@ -2319,8 +2412,7 @@ zdb_leak(void *arg, uint64_t start, uint64_t size) } static metaslab_ops_t zdb_metaslab_ops = { - NULL, /* alloc */ - NULL /* fragmented */ + NULL /* alloc */ }; static void @@ -2455,7 +2547,7 @@ dump_block_stats(spa_t *spa) zdb_blkstats_t *zb, *tzb; uint64_t norm_alloc, norm_space, total_alloc, total_found; int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD; - int leaks = 0; + boolean_t leaks = B_FALSE; (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n", (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "", @@ -2543,7 +2635,7 @@ dump_block_stats(spa_t *spa) (u_longlong_t)total_alloc, (dump_opt['L']) ? "unreachable" : "leaked", (longlong_t)(total_alloc - total_found)); - leaks = 1; + leaks = B_TRUE; } if (tzb->zb_count == 0) @@ -2552,6 +2644,8 @@ dump_block_stats(spa_t *spa) (void) printf("\n"); (void) printf("\tbp count: %10llu\n", (u_longlong_t)tzb->zb_count); + (void) printf("\tganged count: %10llu\n", + (longlong_t)tzb->zb_gangs); (void) printf("\tbp logical: %10llu avg: %6llu\n", (u_longlong_t)tzb->zb_lsize, (u_longlong_t)(tzb->zb_lsize / tzb->zb_count)); @@ -2573,6 +2667,28 @@ dump_block_stats(spa_t *spa) (void) printf("\tSPA allocated: %10llu used: %5.2f%%\n", (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space); + for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) { + if (zcb.zcb_embedded_blocks[i] == 0) + continue; + (void) printf("\n"); + (void) printf("\tadditional, non-pointer bps of type %u: " + "%10llu\n", + i, (u_longlong_t)zcb.zcb_embedded_blocks[i]); + + if (dump_opt['b'] >= 3) { + (void) printf("\t number of (compressed) bytes: " + "number of bps\n"); + dump_histogram(zcb.zcb_embedded_histogram[i], + sizeof (zcb.zcb_embedded_histogram[i]) / + sizeof (zcb.zcb_embedded_histogram[i][0]), 0); + } + } + + if (tzb->zb_ditto_samevdev != 0) { + (void) printf("\tDittoed blocks on same vdev: %llu\n", + (longlong_t)tzb->zb_ditto_samevdev); + } + if (dump_opt['b'] >= 2) { int l, t, level; (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE" @@ -2580,7 +2696,7 @@ dump_block_stats(spa_t *spa) for (t = 0; t <= ZDB_OT_TOTAL; t++) { char csize[32], lsize[32], psize[32], asize[32]; - char avg[32]; + char avg[32], gang[32]; char *typename; if (t < DMU_OT_NUMTYPES) @@ -2621,6 +2737,7 @@ dump_block_stats(spa_t *spa) zdb_nicenum(zb->zb_psize, psize); zdb_nicenum(zb->zb_asize, asize); zdb_nicenum(zb->zb_asize / zb->zb_count, avg); + zdb_nicenum(zb->zb_gangs, gang); (void) printf("%6s\t%5s\t%5s\t%5s\t%5s" "\t%5.2f\t%6.2f\t", @@ -2634,6 +2751,11 @@ dump_block_stats(spa_t *spa) (void) printf(" L%d %s\n", level, typename); + if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) { + (void) printf("\t number of ganged " + "blocks: %s\n", gang); + } + if (dump_opt['b'] >= 4) { (void) printf("psize " "(in 512-byte sectors): " @@ -2668,20 +2790,20 @@ typedef struct zdb_ddt_entry { /* ARGSUSED */ static int zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, - const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) + const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) { avl_tree_t *t = arg; avl_index_t where; zdb_ddt_entry_t *zdde, zdde_search; - if (BP_IS_HOLE(bp)) + if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) return (0); if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) { (void) printf("traversing objset %llu, %llu objects, " "%lu blocks so far\n", (u_longlong_t)zb->zb_objset, - (u_longlong_t)bp->blk_fill, + (u_longlong_t)BP_GET_FILL(bp), avl_numnodes(t)); } @@ -2785,6 +2907,8 @@ dump_zpool(spa_t *spa) if (dump_opt['d'] > 2 || dump_opt['m']) dump_metaslabs(spa); + if (dump_opt['M']) + dump_metaslab_groups(spa); if (dump_opt['d'] || dump_opt['i']) { dump_dir(dp->dp_meta_objset); @@ -3279,7 +3403,8 @@ main(int argc, char **argv) dprintf_setup(&argc, argv); - while ((c = getopt(argc, argv, "bcdhilmM:suCDRSAFLXevp:t:U:P")) != -1) { + while ((c = getopt(argc, argv, + "bcdhilmMI:suCDRSAFLXx:evp:t:U:P")) != -1) { switch (c) { case 'b': case 'c': @@ -3292,6 +3417,7 @@ main(int argc, char **argv) case 'u': case 'C': case 'D': + case 'M': case 'R': case 'S': dump_opt[c]++; @@ -3305,10 +3431,7 @@ main(int argc, char **argv) case 'P': dump_opt[c]++; break; - case 'v': - verbose++; - break; - case 'M': + case 'I': max_inflight = strtoull(optarg, NULL, 0); if (max_inflight == 0) { (void) fprintf(stderr, "maximum number " @@ -3343,6 +3466,12 @@ main(int argc, char **argv) case 'U': spa_config_path = optarg; break; + case 'v': + verbose++; + break; + case 'x': + vn_dumpdir = optarg; + break; default: usage(); break; |