summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoravg <avg@FreeBSD.org>2013-12-10 13:33:56 +0000
committeravg <avg@FreeBSD.org>2013-12-10 13:33:56 +0000
commit888a2df901616fb2900279c75580de3d4bc93278 (patch)
tree86a07fe33f3e290f9ced3b3f76f78d8cc704d4fd
parent58b9ad4f2e737ff922ff49ae564a32ee94c0ff6b (diff)
downloadFreeBSD-src-888a2df901616fb2900279c75580de3d4bc93278.zip
FreeBSD-src-888a2df901616fb2900279c75580de3d4bc93278.tar.gz
4370 avoid transmitting holes during zfs send
4371 DMU code clean up illumos/illumos-gate@43466aae47bfcd2ad9bf501faec8e75c08095e4f
-rw-r--r--cmd/zdb/zdb.c83
-rw-r--r--cmd/zdb/zdb_il.c14
-rw-r--r--cmd/zhack/zhack.c14
-rw-r--r--man/man5/zpool-features.565
4 files changed, 130 insertions, 46 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index 1723342..a61cae3 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -764,7 +764,7 @@ dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
if (ddp->ddp_phys_birth == 0)
continue;
ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
- sprintf_blkptr(blkbuf, &blk);
+ snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
(void) printf("index %llx refcnt %llu %s %s\n",
(u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
types[p], blkbuf);
@@ -1022,31 +1022,39 @@ blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb)
}
static void
-sprintf_blkptr_compact(char *blkbuf, const blkptr_t *bp)
+snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
{
const dva_t *dva = bp->blk_dva;
int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
if (dump_opt['b'] >= 6) {
- sprintf_blkptr(blkbuf, bp);
+ snprintf_blkptr(blkbuf, buflen, bp);
return;
}
blkbuf[0] = '\0';
for (int i = 0; i < ndvas; i++)
- (void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
+ (void) snprintf(blkbuf + strlen(blkbuf),
+ buflen - strlen(blkbuf), "%llu:%llx:%llx ",
(u_longlong_t)DVA_GET_VDEV(&dva[i]),
(u_longlong_t)DVA_GET_OFFSET(&dva[i]),
(u_longlong_t)DVA_GET_ASIZE(&dva[i]));
- (void) sprintf(blkbuf + strlen(blkbuf),
- "%llxL/%llxP F=%llu B=%llu/%llu",
- (u_longlong_t)BP_GET_LSIZE(bp),
- (u_longlong_t)BP_GET_PSIZE(bp),
- (u_longlong_t)bp->blk_fill,
- (u_longlong_t)bp->blk_birth,
- (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
+ if (BP_IS_HOLE(bp)) {
+ (void) snprintf(blkbuf + strlen(blkbuf),
+ buflen - strlen(blkbuf), "B=%llu",
+ (u_longlong_t)bp->blk_birth);
+ } else {
+ (void) snprintf(blkbuf + strlen(blkbuf),
+ buflen - strlen(blkbuf),
+ "%llxL/%llxP F=%llu B=%llu/%llu",
+ (u_longlong_t)BP_GET_LSIZE(bp),
+ (u_longlong_t)BP_GET_PSIZE(bp),
+ (u_longlong_t)bp->blk_fill,
+ (u_longlong_t)bp->blk_birth,
+ (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
+ }
}
static void
@@ -1071,7 +1079,7 @@ print_indirect(blkptr_t *bp, const zbookmark_t *zb,
}
}
- sprintf_blkptr_compact(blkbuf, bp);
+ snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
(void) printf("%s\n", blkbuf);
}
@@ -1086,7 +1094,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
print_indirect(bp, zb, dnp);
- if (BP_GET_LEVEL(bp) > 0) {
+ if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
uint32_t flags = ARC_WAIT;
int i;
blkptr_t *cbp;
@@ -1211,7 +1219,7 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
zdb_nicenum(ds->ds_compressed_bytes, compressed);
zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed);
zdb_nicenum(ds->ds_unique_bytes, unique);
- sprintf_blkptr(blkbuf, &ds->ds_bp);
+ snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp);
(void) printf("\t\tdir_obj = %llu\n",
(u_longlong_t)ds->ds_dir_obj);
@@ -1256,7 +1264,7 @@ dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
char blkbuf[BP_SPRINTF_LEN];
if (bp->blk_birth != 0) {
- sprintf_blkptr(blkbuf, bp);
+ snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
(void) printf("\t%s\n", blkbuf);
}
return (0);
@@ -1294,7 +1302,7 @@ dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
char blkbuf[BP_SPRINTF_LEN];
ASSERT(bp->blk_birth != 0);
- sprintf_blkptr_compact(blkbuf, bp);
+ snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
(void) printf("\t%s\n", blkbuf);
return (0);
}
@@ -1793,8 +1801,9 @@ dump_dir(objset_t *os)
zdb_nicenum(refdbytes, numbuf);
if (verbosity >= 4) {
- (void) sprintf(blkbuf, ", rootbp ");
- (void) sprintf_blkptr(blkbuf + strlen(blkbuf), os->os_rootbp);
+ (void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp ");
+ (void) snprintf_blkptr(blkbuf + strlen(blkbuf),
+ sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp);
} else {
blkbuf[0] = '\0';
}
@@ -1824,7 +1833,7 @@ dump_dir(objset_t *os)
if (verbosity < 2)
return;
- if (os->os_rootbp->blk_birth == 0)
+ if (BP_IS_HOLE(os->os_rootbp))
return;
dump_object(os, 0, verbosity, &print_header);
@@ -1865,7 +1874,7 @@ dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
(u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
if (dump_opt['u'] >= 3) {
char blkbuf[BP_SPRINTF_LEN];
- sprintf_blkptr(blkbuf, &ub->ub_rootbp);
+ snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
(void) printf("\trootbp = %s\n", blkbuf);
}
(void) printf(footer ? footer : "");
@@ -2202,7 +2211,7 @@ zdb_blkptr_done(zio_t *zio)
zcb->zcb_errors[ioerr]++;
if (dump_opt['b'] >= 2)
- sprintf_blkptr(blkbuf, bp);
+ snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
else
blkbuf[0] = '\0';
@@ -2224,11 +2233,22 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
{
zdb_cb_t *zcb = arg;
- char blkbuf[BP_SPRINTF_LEN];
dmu_object_type_t type;
boolean_t is_metadata;
- if (bp == NULL)
+ if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
+ char blkbuf[BP_SPRINTF_LEN];
+ snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
+ (void) printf("objset %llu object %llu "
+ "level %lld offset 0x%llx %s\n",
+ (u_longlong_t)zb->zb_objset,
+ (u_longlong_t)zb->zb_object,
+ (longlong_t)zb->zb_level,
+ (u_longlong_t)blkid2offset(dnp, bp, zb),
+ blkbuf);
+ }
+
+ if (BP_IS_HOLE(bp))
return (0);
type = BP_GET_TYPE(bp);
@@ -2259,17 +2279,6 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
zcb->zcb_readfails = 0;
- if (dump_opt['b'] >= 5) {
- sprintf_blkptr(blkbuf, bp);
- (void) printf("objset %llu object %llu "
- "level %lld offset 0x%llx %s\n",
- (u_longlong_t)zb->zb_objset,
- (u_longlong_t)zb->zb_object,
- (longlong_t)zb->zb_level,
- (u_longlong_t)blkid2offset(dnp, bp, zb),
- blkbuf);
- }
-
if (dump_opt['b'] < 5 && isatty(STDERR_FILENO) &&
gethrtime() > zcb->zcb_lastprint + NANOSEC) {
uint64_t now = gethrtime();
@@ -2426,7 +2435,7 @@ count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
if (dump_opt['b'] >= 5) {
char blkbuf[BP_SPRINTF_LEN];
- sprintf_blkptr(blkbuf, bp);
+ snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
(void) printf("[%s] %s\n",
"deferred free", blkbuf);
}
@@ -2673,7 +2682,7 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
avl_index_t where;
zdb_ddt_entry_t *zdde, zdde_search;
- if (bp == NULL)
+ if (BP_IS_HOLE(bp))
return (0);
if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
@@ -2840,7 +2849,7 @@ zdb_print_blkptr(blkptr_t *bp, int flags)
if (flags & ZDB_FLAG_BSWAP)
byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
- sprintf_blkptr(blkbuf, bp);
+ snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
(void) printf("%s\n", blkbuf);
}
diff --git a/cmd/zdb/zdb_il.c b/cmd/zdb/zdb_il.c
index a0ed985..17f7ad3 100644
--- a/cmd/zdb/zdb_il.c
+++ b/cmd/zdb/zdb_il.c
@@ -24,6 +24,10 @@
*/
/*
+ * Copyright (c) 2013 by Delphix. All rights reserved.
+ */
+
+/*
* Print intent log header and statistics.
*/
@@ -47,7 +51,7 @@ print_log_bp(const blkptr_t *bp, const char *prefix)
{
char blkbuf[BP_SPRINTF_LEN];
- sprintf_blkptr(blkbuf, bp);
+ snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
(void) printf("%s%s\n", prefix, blkbuf);
}
@@ -132,6 +136,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
(void) printf("%shas blkptr, %s\n", prefix,
+ !BP_IS_HOLE(bp) &&
bp->blk_birth >= spa_first_txg(zilog->zl_spa) ?
"will claim" : "won't claim");
print_log_bp(bp, prefix);
@@ -139,8 +144,6 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
if (BP_IS_HOLE(bp)) {
(void) printf("\t\t\tLSIZE 0x%llx\n",
(u_longlong_t)BP_GET_LSIZE(bp));
- }
- if (bp->blk_birth == 0) {
bzero(buf, sizeof (buf));
(void) printf("%s<hole>\n", prefix);
return;
@@ -313,7 +316,8 @@ print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
if (verbose >= 5) {
(void) strcpy(blkbuf, ", ");
- sprintf_blkptr(blkbuf + strlen(blkbuf), bp);
+ snprintf_blkptr(blkbuf + strlen(blkbuf),
+ sizeof (blkbuf) - strlen(blkbuf), bp);
} else {
blkbuf[0] = '\0';
}
@@ -361,7 +365,7 @@ dump_intent_log(zilog_t *zilog)
int verbose = MAX(dump_opt['d'], dump_opt['i']);
int i;
- if (zh->zh_log.blk_birth == 0 || verbose < 1)
+ if (BP_IS_HOLE(&zh->zh_log) || verbose < 1)
return;
(void) printf("\n ZIL header: claim_txg %llu, "
diff --git a/cmd/zhack/zhack.c b/cmd/zhack/zhack.c
index 1d80023..c0ed974 100644
--- a/cmd/zhack/zhack.c
+++ b/cmd/zhack/zhack.c
@@ -277,6 +277,9 @@ zhack_do_feature_stat(int argc, char **argv)
dump_obj(os, spa->spa_feat_for_read_obj, "for_read");
dump_obj(os, spa->spa_feat_for_write_obj, "for_write");
dump_obj(os, spa->spa_feat_desc_obj, "descriptions");
+ if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) {
+ dump_obj(os, spa->spa_feat_enabled_txg_obj, "enabled_txg");
+ }
dump_mos(spa);
spa_close(spa, FTAG);
@@ -313,7 +316,9 @@ zhack_do_feature_enable(int argc, char **argv)
feature.fi_uname = "zhack";
feature.fi_mos = B_FALSE;
feature.fi_can_readonly = B_FALSE;
+ feature.fi_activate_on_enable = B_FALSE;
feature.fi_depends = nodeps;
+ feature.fi_feature = SPA_FEATURE_NONE;
optind = 1;
while ((c = getopt(argc, argv, "rmd:")) != -1) {
@@ -371,7 +376,7 @@ feature_incr_sync(void *arg, dmu_tx_t *tx)
zfeature_info_t *feature = arg;
uint64_t refcount;
- VERIFY0(feature_get_refcount(spa, feature, &refcount));
+ VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
feature_sync(spa, feature, refcount + 1, tx);
spa_history_log_internal(spa, "zhack feature incr", tx,
"guid=%s", feature->fi_guid);
@@ -384,7 +389,7 @@ feature_decr_sync(void *arg, dmu_tx_t *tx)
zfeature_info_t *feature = arg;
uint64_t refcount;
- VERIFY0(feature_get_refcount(spa, feature, &refcount));
+ VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
feature_sync(spa, feature, refcount - 1, tx);
spa_history_log_internal(spa, "zhack feature decr", tx,
"guid=%s", feature->fi_guid);
@@ -411,6 +416,7 @@ zhack_do_feature_ref(int argc, char **argv)
feature.fi_mos = B_FALSE;
feature.fi_desc = NULL;
feature.fi_depends = nodeps;
+ feature.fi_feature = SPA_FEATURE_NONE;
optind = 1;
while ((c = getopt(argc, argv, "md")) != -1) {
@@ -459,8 +465,8 @@ zhack_do_feature_ref(int argc, char **argv)
if (decr) {
uint64_t count;
- if (feature_get_refcount(spa, &feature, &count) == 0 &&
- count != 0) {
+ if (feature_get_refcount_from_disk(spa, &feature,
+ &count) == 0 && count != 0) {
fatal(spa, FTAG, "feature refcount already 0: %s",
feature.fi_guid);
}
diff --git a/man/man5/zpool-features.5 b/man/man5/zpool-features.5
index 33088b3..4df496d 100644
--- a/man/man5/zpool-features.5
+++ b/man/man5/zpool-features.5
@@ -295,5 +295,70 @@ this feature are destroyed.
.RE
+.sp
+.ne 2
+.na
+\fB\fBenabled_txg\fR\fR
+.ad
+.RS 4n
+.TS
+l l .
+GUID com.delphix:enabled_txg
+READ\-ONLY COMPATIBLE yes
+DEPENDENCIES none
+.TE
+
+Once this feature is enabled ZFS records the transaction group number
+in which new features are enabled. This has no user-visible impact,
+but other features may depend on this feature.
+
+This feature becomes \fBactive\fR as soon as it is enabled and will
+never return to being \fBenabled\fB.
+
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBhole_birth\fR\fR
+.ad
+.RS 4n
+.TS
+l l .
+GUID com.delphix:hole_birth
+READ\-ONLY COMPATIBLE no
+DEPENDENCIES enabled_txg
+.TE
+
+This feature improves performance of incremental sends ("zfs send -i")
+and receives for objects with many holes. The most common case of
+hole-filled objects is zvols.
+
+An incremental send stream from snapshot \fBA\fR to snapshot \fBB\fR
+contains information about every block that changed between \fBA\fR and
+\fBB\fR. Blocks which did not change between those snapshots can be
+identified and omitted from the stream using a piece of metadata called
+the 'block birth time', but birth times are not recorded for holes (blocks
+filled only with zeroes). Since holes created after \fBA\fR cannot be
+distinguished from holes created before \fBA\fR, information about every
+hole in the entire filesystem or zvol is included in the send stream.
+
+For workloads where holes are rare this is not a problem. However, when
+incrementally replicating filesystems or zvols with many holes (for
+example a zvol formatted with another filesystem) a lot of time will
+be spent sending and receiving unnecessary information about holes that
+already exist on the receiving side.
+
+Once the \fBhole_birth\fR feature has been enabled the block birth times
+of all new holes will be recorded. Incremental sends between snapshots
+created after this feature is enabled will use this new metadata to avoid
+sending information about holes that already exist on the receiving side.
+
+This feature becomes \fBactive\fR as soon as it is enabled and will
+never return to being \fBenabled\fB.
+
+.RE
+
+
.SH "SEE ALSO"
\fBzpool\fR(1M)
OpenPOWER on IntegriCloud