diff options
author | avg <avg@FreeBSD.org> | 2013-12-10 13:33:56 +0000 |
---|---|---|
committer | avg <avg@FreeBSD.org> | 2013-12-10 13:33:56 +0000 |
commit | 888a2df901616fb2900279c75580de3d4bc93278 (patch) | |
tree | 86a07fe33f3e290f9ced3b3f76f78d8cc704d4fd | |
parent | 58b9ad4f2e737ff922ff49ae564a32ee94c0ff6b (diff) | |
download | FreeBSD-src-888a2df901616fb2900279c75580de3d4bc93278.zip FreeBSD-src-888a2df901616fb2900279c75580de3d4bc93278.tar.gz |
4370 avoid transmitting holes during zfs send
4371 DMU code clean up
illumos/illumos-gate@43466aae47bfcd2ad9bf501faec8e75c08095e4f
-rw-r--r-- | cmd/zdb/zdb.c | 83 | ||||
-rw-r--r-- | cmd/zdb/zdb_il.c | 14 | ||||
-rw-r--r-- | cmd/zhack/zhack.c | 14 | ||||
-rw-r--r-- | man/man5/zpool-features.5 | 65 |
4 files changed, 130 insertions, 46 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 1723342..a61cae3 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -764,7 +764,7 @@ dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index) if (ddp->ddp_phys_birth == 0) continue; ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); - sprintf_blkptr(blkbuf, &blk); + snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk); (void) printf("index %llx refcnt %llu %s %s\n", (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt, types[p], blkbuf); @@ -1022,31 +1022,39 @@ blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb) } static void -sprintf_blkptr_compact(char *blkbuf, const blkptr_t *bp) +snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp) { const dva_t *dva = bp->blk_dva; int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1; if (dump_opt['b'] >= 6) { - sprintf_blkptr(blkbuf, bp); + snprintf_blkptr(blkbuf, buflen, bp); return; } blkbuf[0] = '\0'; for (int i = 0; i < ndvas; i++) - (void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ", + (void) snprintf(blkbuf + strlen(blkbuf), + buflen - strlen(blkbuf), "%llu:%llx:%llx ", (u_longlong_t)DVA_GET_VDEV(&dva[i]), (u_longlong_t)DVA_GET_OFFSET(&dva[i]), (u_longlong_t)DVA_GET_ASIZE(&dva[i])); - (void) sprintf(blkbuf + strlen(blkbuf), - "%llxL/%llxP F=%llu B=%llu/%llu", - (u_longlong_t)BP_GET_LSIZE(bp), - (u_longlong_t)BP_GET_PSIZE(bp), - (u_longlong_t)bp->blk_fill, - (u_longlong_t)bp->blk_birth, - (u_longlong_t)BP_PHYSICAL_BIRTH(bp)); + if (BP_IS_HOLE(bp)) { + (void) snprintf(blkbuf + strlen(blkbuf), + buflen - strlen(blkbuf), "B=%llu", + (u_longlong_t)bp->blk_birth); + } else { + (void) snprintf(blkbuf + strlen(blkbuf), + buflen - strlen(blkbuf), + "%llxL/%llxP F=%llu B=%llu/%llu", + (u_longlong_t)BP_GET_LSIZE(bp), + (u_longlong_t)BP_GET_PSIZE(bp), + (u_longlong_t)bp->blk_fill, + (u_longlong_t)bp->blk_birth, + (u_longlong_t)BP_PHYSICAL_BIRTH(bp)); + } } static void @@ -1071,7 +1079,7 @@ print_indirect(blkptr_t *bp, const zbookmark_t *zb, } } - sprintf_blkptr_compact(blkbuf, bp); + snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp); (void) printf("%s\n", blkbuf); } @@ -1086,7 +1094,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp, print_indirect(bp, zb, dnp); - if (BP_GET_LEVEL(bp) > 0) { + if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) { uint32_t flags = ARC_WAIT; int i; blkptr_t *cbp; @@ -1211,7 +1219,7 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size) zdb_nicenum(ds->ds_compressed_bytes, compressed); zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed); zdb_nicenum(ds->ds_unique_bytes, unique); - sprintf_blkptr(blkbuf, &ds->ds_bp); + snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp); (void) printf("\t\tdir_obj = %llu\n", (u_longlong_t)ds->ds_dir_obj); @@ -1256,7 +1264,7 @@ dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) char blkbuf[BP_SPRINTF_LEN]; if (bp->blk_birth != 0) { - sprintf_blkptr(blkbuf, bp); + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); (void) printf("\t%s\n", blkbuf); } return (0); @@ -1294,7 +1302,7 @@ dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) char blkbuf[BP_SPRINTF_LEN]; ASSERT(bp->blk_birth != 0); - sprintf_blkptr_compact(blkbuf, bp); + snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp); (void) printf("\t%s\n", blkbuf); return (0); } @@ -1793,8 +1801,9 @@ dump_dir(objset_t *os) zdb_nicenum(refdbytes, numbuf); if (verbosity >= 4) { - (void) sprintf(blkbuf, ", rootbp "); - (void) sprintf_blkptr(blkbuf + strlen(blkbuf), os->os_rootbp); + (void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp "); + (void) snprintf_blkptr(blkbuf + strlen(blkbuf), + sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp); } else { blkbuf[0] = '\0'; } @@ -1824,7 +1833,7 @@ dump_dir(objset_t *os) if (verbosity < 2) return; - if (os->os_rootbp->blk_birth == 0) + if (BP_IS_HOLE(os->os_rootbp)) return; dump_object(os, 0, verbosity, &print_header); @@ -1865,7 +1874,7 @@ dump_uberblock(uberblock_t *ub, const char *header, const char *footer) (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp))); if (dump_opt['u'] >= 3) { char blkbuf[BP_SPRINTF_LEN]; - sprintf_blkptr(blkbuf, &ub->ub_rootbp); + snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp); (void) printf("\trootbp = %s\n", blkbuf); } (void) printf(footer ? footer : ""); @@ -2202,7 +2211,7 @@ zdb_blkptr_done(zio_t *zio) zcb->zcb_errors[ioerr]++; if (dump_opt['b'] >= 2) - sprintf_blkptr(blkbuf, bp); + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); else blkbuf[0] = '\0'; @@ -2224,11 +2233,22 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) { zdb_cb_t *zcb = arg; - char blkbuf[BP_SPRINTF_LEN]; dmu_object_type_t type; boolean_t is_metadata; - if (bp == NULL) + if (dump_opt['b'] >= 5 && bp->blk_birth > 0) { + char blkbuf[BP_SPRINTF_LEN]; + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); + (void) printf("objset %llu object %llu " + "level %lld offset 0x%llx %s\n", + (u_longlong_t)zb->zb_objset, + (u_longlong_t)zb->zb_object, + (longlong_t)zb->zb_level, + (u_longlong_t)blkid2offset(dnp, bp, zb), + blkbuf); + } + + if (BP_IS_HOLE(bp)) return (0); type = BP_GET_TYPE(bp); @@ -2259,17 +2279,6 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, zcb->zcb_readfails = 0; - if (dump_opt['b'] >= 5) { - sprintf_blkptr(blkbuf, bp); - (void) printf("objset %llu object %llu " - "level %lld offset 0x%llx %s\n", - (u_longlong_t)zb->zb_objset, - (u_longlong_t)zb->zb_object, - (longlong_t)zb->zb_level, - (u_longlong_t)blkid2offset(dnp, bp, zb), - blkbuf); - } - if (dump_opt['b'] < 5 && isatty(STDERR_FILENO) && gethrtime() > zcb->zcb_lastprint + NANOSEC) { uint64_t now = gethrtime(); @@ -2426,7 +2435,7 @@ count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) if (dump_opt['b'] >= 5) { char blkbuf[BP_SPRINTF_LEN]; - sprintf_blkptr(blkbuf, bp); + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); (void) printf("[%s] %s\n", "deferred free", blkbuf); } @@ -2673,7 +2682,7 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, avl_index_t where; zdb_ddt_entry_t *zdde, zdde_search; - if (bp == NULL) + if (BP_IS_HOLE(bp)) return (0); if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) { @@ -2840,7 +2849,7 @@ zdb_print_blkptr(blkptr_t *bp, int flags) if (flags & ZDB_FLAG_BSWAP) byteswap_uint64_array((void *)bp, sizeof (blkptr_t)); - sprintf_blkptr(blkbuf, bp); + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); (void) printf("%s\n", blkbuf); } diff --git a/cmd/zdb/zdb_il.c b/cmd/zdb/zdb_il.c index a0ed985..17f7ad3 100644 --- a/cmd/zdb/zdb_il.c +++ b/cmd/zdb/zdb_il.c @@ -24,6 +24,10 @@ */ /* + * Copyright (c) 2013 by Delphix. All rights reserved. + */ + +/* * Print intent log header and statistics. */ @@ -47,7 +51,7 @@ print_log_bp(const blkptr_t *bp, const char *prefix) { char blkbuf[BP_SPRINTF_LEN]; - sprintf_blkptr(blkbuf, bp); + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); (void) printf("%s%s\n", prefix, blkbuf); } @@ -132,6 +136,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr) if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { (void) printf("%shas blkptr, %s\n", prefix, + !BP_IS_HOLE(bp) && bp->blk_birth >= spa_first_txg(zilog->zl_spa) ? "will claim" : "won't claim"); print_log_bp(bp, prefix); @@ -139,8 +144,6 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr) if (BP_IS_HOLE(bp)) { (void) printf("\t\t\tLSIZE 0x%llx\n", (u_longlong_t)BP_GET_LSIZE(bp)); - } - if (bp->blk_birth == 0) { bzero(buf, sizeof (buf)); (void) printf("%s<hole>\n", prefix); return; @@ -313,7 +316,8 @@ print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg) if (verbose >= 5) { (void) strcpy(blkbuf, ", "); - sprintf_blkptr(blkbuf + strlen(blkbuf), bp); + snprintf_blkptr(blkbuf + strlen(blkbuf), + sizeof (blkbuf) - strlen(blkbuf), bp); } else { blkbuf[0] = '\0'; } @@ -361,7 +365,7 @@ dump_intent_log(zilog_t *zilog) int verbose = MAX(dump_opt['d'], dump_opt['i']); int i; - if (zh->zh_log.blk_birth == 0 || verbose < 1) + if (BP_IS_HOLE(&zh->zh_log) || verbose < 1) return; (void) printf("\n ZIL header: claim_txg %llu, " diff --git a/cmd/zhack/zhack.c b/cmd/zhack/zhack.c index 1d80023..c0ed974 100644 --- a/cmd/zhack/zhack.c +++ b/cmd/zhack/zhack.c @@ -277,6 +277,9 @@ zhack_do_feature_stat(int argc, char **argv) dump_obj(os, spa->spa_feat_for_read_obj, "for_read"); dump_obj(os, spa->spa_feat_for_write_obj, "for_write"); dump_obj(os, spa->spa_feat_desc_obj, "descriptions"); + if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) { + dump_obj(os, spa->spa_feat_enabled_txg_obj, "enabled_txg"); + } dump_mos(spa); spa_close(spa, FTAG); @@ -313,7 +316,9 @@ zhack_do_feature_enable(int argc, char **argv) feature.fi_uname = "zhack"; feature.fi_mos = B_FALSE; feature.fi_can_readonly = B_FALSE; + feature.fi_activate_on_enable = B_FALSE; feature.fi_depends = nodeps; + feature.fi_feature = SPA_FEATURE_NONE; optind = 1; while ((c = getopt(argc, argv, "rmd:")) != -1) { @@ -371,7 +376,7 @@ feature_incr_sync(void *arg, dmu_tx_t *tx) zfeature_info_t *feature = arg; uint64_t refcount; - VERIFY0(feature_get_refcount(spa, feature, &refcount)); + VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount)); feature_sync(spa, feature, refcount + 1, tx); spa_history_log_internal(spa, "zhack feature incr", tx, "guid=%s", feature->fi_guid); @@ -384,7 +389,7 @@ feature_decr_sync(void *arg, dmu_tx_t *tx) zfeature_info_t *feature = arg; uint64_t refcount; - VERIFY0(feature_get_refcount(spa, feature, &refcount)); + VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount)); feature_sync(spa, feature, refcount - 1, tx); spa_history_log_internal(spa, "zhack feature decr", tx, "guid=%s", feature->fi_guid); @@ -411,6 +416,7 @@ zhack_do_feature_ref(int argc, char **argv) feature.fi_mos = B_FALSE; feature.fi_desc = NULL; feature.fi_depends = nodeps; + feature.fi_feature = SPA_FEATURE_NONE; optind = 1; while ((c = getopt(argc, argv, "md")) != -1) { @@ -459,8 +465,8 @@ zhack_do_feature_ref(int argc, char **argv) if (decr) { uint64_t count; - if (feature_get_refcount(spa, &feature, &count) == 0 && - count != 0) { + if (feature_get_refcount_from_disk(spa, &feature, + &count) == 0 && count != 0) { fatal(spa, FTAG, "feature refcount already 0: %s", feature.fi_guid); } diff --git a/man/man5/zpool-features.5 b/man/man5/zpool-features.5 index 33088b3..4df496d 100644 --- a/man/man5/zpool-features.5 +++ b/man/man5/zpool-features.5 @@ -295,5 +295,70 @@ this feature are destroyed. .RE +.sp +.ne 2 +.na +\fB\fBenabled_txg\fR\fR +.ad +.RS 4n +.TS +l l . +GUID com.delphix:enabled_txg +READ\-ONLY COMPATIBLE yes +DEPENDENCIES none +.TE + +Once this feature is enabled ZFS records the transaction group number +in which new features are enabled. This has no user-visible impact, +but other features may depend on this feature. + +This feature becomes \fBactive\fR as soon as it is enabled and will +never return to being \fBenabled\fB. + +.RE + +.sp +.ne 2 +.na +\fB\fBhole_birth\fR\fR +.ad +.RS 4n +.TS +l l . +GUID com.delphix:hole_birth +READ\-ONLY COMPATIBLE no +DEPENDENCIES enabled_txg +.TE + +This feature improves performance of incremental sends ("zfs send -i") +and receives for objects with many holes. The most common case of +hole-filled objects is zvols. + +An incremental send stream from snapshot \fBA\fR to snapshot \fBB\fR +contains information about every block that changed between \fBA\fR and +\fBB\fR. Blocks which did not change between those snapshots can be +identified and omitted from the stream using a piece of metadata called +the 'block birth time', but birth times are not recorded for holes (blocks +filled only with zeroes). Since holes created after \fBA\fR cannot be +distinguished from holes created before \fBA\fR, information about every +hole in the entire filesystem or zvol is included in the send stream. + +For workloads where holes are rare this is not a problem. However, when +incrementally replicating filesystems or zvols with many holes (for +example a zvol formatted with another filesystem) a lot of time will +be spent sending and receiving unnecessary information about holes that +already exist on the receiving side. + +Once the \fBhole_birth\fR feature has been enabled the block birth times +of all new holes will be recorded. Incremental sends between snapshots +created after this feature is enabled will use this new metadata to avoid +sending information about holes that already exist on the receiving side. + +This feature becomes \fBactive\fR as soon as it is enabled and will +never return to being \fBenabled\fB. + +.RE + + .SH "SEE ALSO" \fBzpool\fR(1M) |