From ede49d4106ad9e6d34a2592322e48604f72605bd Mon Sep 17 00:00:00 2001 From: delphij Date: Wed, 19 Mar 2014 23:55:03 +0000 Subject: MFC r260150: MFV r259170: 4370 avoid transmitting holes during zfs send 4371 DMU code clean up illumos/illumos-gate@43466aae47bfcd2ad9bf501faec8e75c08095e4f NOTE: Make sure the boot code is updated if a zpool upgrade is done on boot zpool. --- cddl/contrib/opensolaris/cmd/zdb/zdb.c | 83 ++++++++++++---------- cddl/contrib/opensolaris/cmd/zdb/zdb_il.c | 14 ++-- cddl/contrib/opensolaris/cmd/zhack/zhack.c | 14 ++-- .../contrib/opensolaris/cmd/zpool/zpool-features.7 | 72 ++++++++++++++++++- 4 files changed, 136 insertions(+), 47 deletions(-) (limited to 'cddl') diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.c b/cddl/contrib/opensolaris/cmd/zdb/zdb.c index 9294cd6..932687a 100644 --- a/cddl/contrib/opensolaris/cmd/zdb/zdb.c +++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.c @@ -764,7 +764,7 @@ dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index) if (ddp->ddp_phys_birth == 0) continue; ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); - sprintf_blkptr(blkbuf, &blk); + snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk); (void) printf("index %llx refcnt %llu %s %s\n", (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt, types[p], blkbuf); @@ -1024,31 +1024,39 @@ blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb) } static void -sprintf_blkptr_compact(char *blkbuf, const blkptr_t *bp) +snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp) { const dva_t *dva = bp->blk_dva; int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1; if (dump_opt['b'] >= 6) { - sprintf_blkptr(blkbuf, bp); + snprintf_blkptr(blkbuf, buflen, bp); return; } blkbuf[0] = '\0'; for (int i = 0; i < ndvas; i++) - (void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ", + (void) snprintf(blkbuf + strlen(blkbuf), + buflen - strlen(blkbuf), "%llu:%llx:%llx ", (u_longlong_t)DVA_GET_VDEV(&dva[i]), (u_longlong_t)DVA_GET_OFFSET(&dva[i]), (u_longlong_t)DVA_GET_ASIZE(&dva[i])); - (void) sprintf(blkbuf + strlen(blkbuf), - "%llxL/%llxP F=%llu B=%llu/%llu", - (u_longlong_t)BP_GET_LSIZE(bp), - (u_longlong_t)BP_GET_PSIZE(bp), - (u_longlong_t)bp->blk_fill, - (u_longlong_t)bp->blk_birth, - (u_longlong_t)BP_PHYSICAL_BIRTH(bp)); + if (BP_IS_HOLE(bp)) { + (void) snprintf(blkbuf + strlen(blkbuf), + buflen - strlen(blkbuf), "B=%llu", + (u_longlong_t)bp->blk_birth); + } else { + (void) snprintf(blkbuf + strlen(blkbuf), + buflen - strlen(blkbuf), + "%llxL/%llxP F=%llu B=%llu/%llu", + (u_longlong_t)BP_GET_LSIZE(bp), + (u_longlong_t)BP_GET_PSIZE(bp), + (u_longlong_t)bp->blk_fill, + (u_longlong_t)bp->blk_birth, + (u_longlong_t)BP_PHYSICAL_BIRTH(bp)); + } } static void @@ -1073,7 +1081,7 @@ print_indirect(blkptr_t *bp, const zbookmark_t *zb, } } - sprintf_blkptr_compact(blkbuf, bp); + snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp); (void) printf("%s\n", blkbuf); } @@ -1088,7 +1096,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp, print_indirect(bp, zb, dnp); - if (BP_GET_LEVEL(bp) > 0) { + if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) { uint32_t flags = ARC_WAIT; int i; blkptr_t *cbp; @@ -1213,7 +1221,7 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size) zdb_nicenum(ds->ds_compressed_bytes, compressed); zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed); zdb_nicenum(ds->ds_unique_bytes, unique); - sprintf_blkptr(blkbuf, &ds->ds_bp); + snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp); (void) printf("\t\tdir_obj = %llu\n", (u_longlong_t)ds->ds_dir_obj); @@ -1258,7 +1266,7 @@ dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) char blkbuf[BP_SPRINTF_LEN]; if (bp->blk_birth != 0) { - sprintf_blkptr(blkbuf, bp); + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); (void) printf("\t%s\n", blkbuf); } return (0); @@ -1296,7 +1304,7 @@ dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) char blkbuf[BP_SPRINTF_LEN]; ASSERT(bp->blk_birth != 0); - sprintf_blkptr_compact(blkbuf, bp); + snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp); (void) printf("\t%s\n", blkbuf); return (0); } @@ -1795,8 +1803,9 @@ dump_dir(objset_t *os) zdb_nicenum(refdbytes, numbuf); if (verbosity >= 4) { - (void) sprintf(blkbuf, ", rootbp "); - (void) sprintf_blkptr(blkbuf + strlen(blkbuf), os->os_rootbp); + (void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp "); + (void) snprintf_blkptr(blkbuf + strlen(blkbuf), + sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp); } else { blkbuf[0] = '\0'; } @@ -1826,7 +1835,7 @@ dump_dir(objset_t *os) if (verbosity < 2) return; - if (os->os_rootbp->blk_birth == 0) + if (BP_IS_HOLE(os->os_rootbp)) return; dump_object(os, 0, verbosity, &print_header); @@ -1867,7 +1876,7 @@ dump_uberblock(uberblock_t *ub, const char *header, const char *footer) (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp))); if (dump_opt['u'] >= 3) { char blkbuf[BP_SPRINTF_LEN]; - sprintf_blkptr(blkbuf, &ub->ub_rootbp); + snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp); (void) printf("\trootbp = %s\n", blkbuf); } (void) printf(footer ? footer : ""); @@ -2181,7 +2190,7 @@ zdb_blkptr_done(zio_t *zio) zcb->zcb_errors[ioerr]++; if (dump_opt['b'] >= 2) - sprintf_blkptr(blkbuf, bp); + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); else blkbuf[0] = '\0'; @@ -2204,11 +2213,22 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) { zdb_cb_t *zcb = arg; - char blkbuf[BP_SPRINTF_LEN]; dmu_object_type_t type; boolean_t is_metadata; - if (bp == NULL) + if (dump_opt['b'] >= 5 && bp->blk_birth > 0) { + char blkbuf[BP_SPRINTF_LEN]; + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); + (void) printf("objset %llu object %llu " + "level %lld offset 0x%llx %s\n", + (u_longlong_t)zb->zb_objset, + (u_longlong_t)zb->zb_object, + (longlong_t)zb->zb_level, + (u_longlong_t)blkid2offset(dnp, bp, zb), + blkbuf); + } + + if (BP_IS_HOLE(bp)) return (0); type = BP_GET_TYPE(bp); @@ -2239,17 +2259,6 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, zcb->zcb_readfails = 0; - if (dump_opt['b'] >= 5) { - sprintf_blkptr(blkbuf, bp); - (void) printf("objset %llu object %llu " - "level %lld offset 0x%llx %s\n", - (u_longlong_t)zb->zb_objset, - (u_longlong_t)zb->zb_object, - (longlong_t)zb->zb_level, - (u_longlong_t)blkid2offset(dnp, bp, zb), - blkbuf); - } - if (dump_opt['b'] < 5 && isatty(STDERR_FILENO) && gethrtime() > zcb->zcb_lastprint + NANOSEC) { uint64_t now = gethrtime(); @@ -2406,7 +2415,7 @@ count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) if (dump_opt['b'] >= 5) { char blkbuf[BP_SPRINTF_LEN]; - sprintf_blkptr(blkbuf, bp); + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); (void) printf("[%s] %s\n", "deferred free", blkbuf); } @@ -2640,7 +2649,7 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, avl_index_t where; zdb_ddt_entry_t *zdde, zdde_search; - if (bp == NULL) + if (BP_IS_HOLE(bp)) return (0); if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) { @@ -2807,7 +2816,7 @@ zdb_print_blkptr(blkptr_t *bp, int flags) if (flags & ZDB_FLAG_BSWAP) byteswap_uint64_array((void *)bp, sizeof (blkptr_t)); - sprintf_blkptr(blkbuf, bp); + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); (void) printf("%s\n", blkbuf); } diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c b/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c index a0ed985..17f7ad3 100644 --- a/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c +++ b/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c @@ -24,6 +24,10 @@ */ /* + * Copyright (c) 2013 by Delphix. All rights reserved. + */ + +/* * Print intent log header and statistics. */ @@ -47,7 +51,7 @@ print_log_bp(const blkptr_t *bp, const char *prefix) { char blkbuf[BP_SPRINTF_LEN]; - sprintf_blkptr(blkbuf, bp); + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); (void) printf("%s%s\n", prefix, blkbuf); } @@ -132,6 +136,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr) if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { (void) printf("%shas blkptr, %s\n", prefix, + !BP_IS_HOLE(bp) && bp->blk_birth >= spa_first_txg(zilog->zl_spa) ? "will claim" : "won't claim"); print_log_bp(bp, prefix); @@ -139,8 +144,6 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr) if (BP_IS_HOLE(bp)) { (void) printf("\t\t\tLSIZE 0x%llx\n", (u_longlong_t)BP_GET_LSIZE(bp)); - } - if (bp->blk_birth == 0) { bzero(buf, sizeof (buf)); (void) printf("%s\n", prefix); return; @@ -313,7 +316,8 @@ print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg) if (verbose >= 5) { (void) strcpy(blkbuf, ", "); - sprintf_blkptr(blkbuf + strlen(blkbuf), bp); + snprintf_blkptr(blkbuf + strlen(blkbuf), + sizeof (blkbuf) - strlen(blkbuf), bp); } else { blkbuf[0] = '\0'; } @@ -361,7 +365,7 @@ dump_intent_log(zilog_t *zilog) int verbose = MAX(dump_opt['d'], dump_opt['i']); int i; - if (zh->zh_log.blk_birth == 0 || verbose < 1) + if (BP_IS_HOLE(&zh->zh_log) || verbose < 1) return; (void) printf("\n ZIL header: claim_txg %llu, " diff --git a/cddl/contrib/opensolaris/cmd/zhack/zhack.c b/cddl/contrib/opensolaris/cmd/zhack/zhack.c index 9b80fcc..ace8c32 100644 --- a/cddl/contrib/opensolaris/cmd/zhack/zhack.c +++ b/cddl/contrib/opensolaris/cmd/zhack/zhack.c @@ -277,6 +277,9 @@ zhack_do_feature_stat(int argc, char **argv) dump_obj(os, spa->spa_feat_for_read_obj, "for_read"); dump_obj(os, spa->spa_feat_for_write_obj, "for_write"); dump_obj(os, spa->spa_feat_desc_obj, "descriptions"); + if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) { + dump_obj(os, spa->spa_feat_enabled_txg_obj, "enabled_txg"); + } dump_mos(spa); spa_close(spa, FTAG); @@ -313,7 +316,9 @@ zhack_do_feature_enable(int argc, char **argv) feature.fi_uname = "zhack"; feature.fi_mos = B_FALSE; feature.fi_can_readonly = B_FALSE; + feature.fi_activate_on_enable = B_FALSE; feature.fi_depends = nodeps; + feature.fi_feature = SPA_FEATURE_NONE; optind = 1; while ((c = getopt(argc, argv, "rmd:")) != -1) { @@ -371,7 +376,7 @@ feature_incr_sync(void *arg, dmu_tx_t *tx) zfeature_info_t *feature = arg; uint64_t refcount; - VERIFY0(feature_get_refcount(spa, feature, &refcount)); + VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount)); feature_sync(spa, feature, refcount + 1, tx); spa_history_log_internal(spa, "zhack feature incr", tx, "name=%s", feature->fi_guid); @@ -384,7 +389,7 @@ feature_decr_sync(void *arg, dmu_tx_t *tx) zfeature_info_t *feature = arg; uint64_t refcount; - VERIFY0(feature_get_refcount(spa, feature, &refcount)); + VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount)); feature_sync(spa, feature, refcount - 1, tx); spa_history_log_internal(spa, "zhack feature decr", tx, "name=%s", feature->fi_guid); @@ -411,6 +416,7 @@ zhack_do_feature_ref(int argc, char **argv) feature.fi_mos = B_FALSE; feature.fi_desc = NULL; feature.fi_depends = nodeps; + feature.fi_feature = SPA_FEATURE_NONE; optind = 1; while ((c = getopt(argc, argv, "md")) != -1) { @@ -459,8 +465,8 @@ zhack_do_feature_ref(int argc, char **argv) if (decr) { uint64_t count; - if (feature_get_refcount(spa, &feature, &count) == 0 && - count != 0) { + if (feature_get_refcount_from_disk(spa, &feature, + &count) == 0 && count != 0) { fatal(spa, FTAG, "feature refcount already 0: %s", feature.fi_guid); } diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 b/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 index d807257..43eede5 100644 --- a/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 +++ b/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 @@ -23,7 +23,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 08, 2013 +.Dd December 31, 2013 .Dt ZPOOL-FEATURES 7 .Os .Sh NAME @@ -286,6 +286,76 @@ and will be returned to the .Sy enabled state when all datasets that use this feature are destroyed. +.It Sy enabled_txg +.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:enabled_txg" +.It GUID Ta com.delphix:enabled_txg +.It READ\-ONLY COMPATIBLE Ta yes +.It DEPENDENCIES Ta none +.El +.Pp +Once this feature is enabled ZFS records the transaction group number +in which new features are enabled. This has no user-visible impact, +but other features may depend on this feature. +.Pp +This feature becomes +.Sy active +as soon as it is enabled and will +never return to being +.Sy enabled . +.It Sy hole_birth +.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:hole_birth" +.It GUID Ta com.delphix:hole_birth +.It READ\-ONLY COMPATIBLE Ta no +.It DEPENDENCIES Ta enabled_txg +.El +.Pp +This feature improves performance of incremental sends +.Pq Dq zfs send -i +and receives for objects with many holes. +The most common case of +hole-filled objects is zvols. +.Pp +An incremental send stream from snapshot +.Sy A +to snapshot +.Sy B +contains information about every block that changed between +.Sy A +and +.Sy B . +Blocks which did not change between those snapshots can be +identified and omitted from the stream using a piece of metadata called +the 'block birth time', but birth times are not recorded for holes +.Pq blocks filled only with zeroes . +Since holes created after +.Sy A +cannot be +distinguished from holes created before +.Sy A , +information about every +hole in the entire filesystem or zvol is included in the send stream. +.Pp +For workloads where holes are rare this is not a problem. +However, when +incrementally replicating filesystems or zvols with many holes +.Pq for example a zvol formatted with another filesystem +a lot of time will +be spent sending and receiving unnecessary information about holes that +already exist on the receiving side. +.Pp +Once the +.Sy hole_birth +feature has been enabled the block birth times +of all new holes will be recorded. +Incremental sends between snapshots +created after this feature is enabled will use this new metadata to avoid +sending information about holes that already exist on the receiving side. +.Pp +This feature becomes +.Sy active +as soon as it is enabled and will +never return to being +.Sy enabled . .El .Sh SEE ALSO .Xr zpool 8 -- cgit v1.1