summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsmh <smh@FreeBSD.org>2016-03-10 23:45:23 +0000
committersmh <smh@FreeBSD.org>2016-03-10 23:45:23 +0000
commitc9384328c9b7e2c5e13765a58182cbfb269133cc (patch)
tree56f2e0e27482386a5cf94967d32016e9fd6eb58d
parentcee61dc42661e3adf3f18e1b1791f03e13f3b540 (diff)
downloadFreeBSD-src-c9384328c9b7e2c5e13765a58182cbfb269133cc.zip
FreeBSD-src-c9384328c9b7e2c5e13765a58182cbfb269133cc.tar.gz
MFS r296629:
ZFS send fails to transmit some holes PR: 207714 Approved by: re (gjb) Sponsored by: Multiplay
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c8
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c49
2 files changed, 43 insertions, 14 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
index 6ca021e..2c9802f 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
* Copyright 2014 HybridCluster. All rights reserved.
*/
@@ -50,6 +50,12 @@ dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
* reasonably sparse (at most 1/4 full). Look from the
* beginning once, but after that keep looking from here.
* If we can't find one, just keep going from here.
+ *
+ * Note that dmu_traverse depends on the behavior that we use
+ * multiple blocks of the dnode object before going back to
+ * reuse objects. Any change to this algorithm should preserve
+ * that property or find another solution to the issues
+ * described in traverse_visitbp.
*/
if (P2PHASE(object, L2_dnode_count) == 0) {
uint64_t offset = restarted ? object << DNODE_SHIFT : 0;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
index 2c718df..dd0644a 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2015 Chunwei Chen. All rights reserved.
*/
@@ -63,6 +63,7 @@ typedef struct traverse_data {
uint64_t td_hole_birth_enabled_txg;
blkptr_cb_t *td_func;
void *td_arg;
+ boolean_t td_realloc_possible;
} traverse_data_t;
static int traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
@@ -232,18 +233,30 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
if (bp->blk_birth == 0) {
/*
- * Since this block has a birth time of 0 it must be a
- * hole created before the SPA_FEATURE_HOLE_BIRTH
- * feature was enabled. If SPA_FEATURE_HOLE_BIRTH
- * was enabled before the min_txg for this traveral we
- * know the hole must have been created before the
- * min_txg for this traveral, so we can skip it. If
- * SPA_FEATURE_HOLE_BIRTH was enabled after the min_txg
- * for this traveral we cannot tell if the hole was
- * created before or after the min_txg for this
- * traversal, so we cannot skip it.
+ * Since this block has a birth time of 0 it must be one of
+ * two things: a hole created before the
+ * SPA_FEATURE_HOLE_BIRTH feature was enabled, or a hole
+ * which has always been a hole in an object.
+ *
+ * If a file is written sparsely, then the unwritten parts of
+ * the file were "always holes" -- that is, they have been
+ * holes since this object was allocated. However, we (and
+ * our callers) can not necessarily tell when an object was
+ * allocated. Therefore, if it's possible that this object
+ * was freed and then its object number reused, we need to
+ * visit all the holes with birth==0.
+ *
+ * If it isn't possible that the object number was reused,
+ * then if SPA_FEATURE_HOLE_BIRTH was enabled before we wrote
+ * all the blocks we will visit as part of this traversal,
+ * then this hole must have always existed, so we can skip
+ * it. We visit blocks born after (exclusive) td_min_txg.
+ *
+ * Note that the meta-dnode cannot be reallocated.
*/
- if (td->td_hole_birth_enabled_txg < td->td_min_txg)
+ if ((!td->td_realloc_possible ||
+ zb->zb_object == DMU_META_DNODE_OBJECT) &&
+ td->td_hole_birth_enabled_txg <= td->td_min_txg)
return (0);
} else if (bp->blk_birth <= td->td_min_txg) {
return (0);
@@ -338,6 +351,15 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
objset_phys_t *osp = buf->b_data;
prefetch_dnode_metadata(td, &osp->os_meta_dnode, zb->zb_objset,
DMU_META_DNODE_OBJECT);
+ /*
+ * See the block comment above for the goal of this variable.
+ * If the maxblkid of the meta-dnode is 0, then we know that
+ * we've never had more than DNODES_PER_BLOCK objects in the
+ * dataset, which means we can't have reused any object ids.
+ */
+ if (osp->os_meta_dnode.dn_maxblkid == 0)
+ td->td_realloc_possible = B_FALSE;
+
if (arc_buf_size(buf) >= sizeof (objset_phys_t)) {
prefetch_dnode_metadata(td, &osp->os_groupused_dnode,
zb->zb_objset, DMU_GROUPUSED_OBJECT);
@@ -544,12 +566,13 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
td.td_pfd = &pd;
td.td_flags = flags;
td.td_paused = B_FALSE;
+ td.td_realloc_possible = (txg_start == 0 ? B_FALSE : B_TRUE);
if (spa_feature_is_active(spa, SPA_FEATURE_HOLE_BIRTH)) {
VERIFY(spa_feature_enabled_txg(spa,
SPA_FEATURE_HOLE_BIRTH, &td.td_hole_birth_enabled_txg));
} else {
- td.td_hole_birth_enabled_txg = 0;
+ td.td_hole_birth_enabled_txg = UINT64_MAX;
}
pd.pd_flags = flags;
OpenPOWER on IntegriCloud