summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
diff options
context:
space:
mode:
authormav <mav@FreeBSD.org>2015-11-13 09:30:17 +0000
committermav <mav@FreeBSD.org>2015-11-13 09:30:17 +0000
commit76f98aa10ac1d58655f47588910862da714725e1 (patch)
tree256fee4ecc37c986528c3db137032619de79e32d /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
parent49d28de1a8839687816d368b8ce18808d9643c72 (diff)
downloadFreeBSD-src-76f98aa10ac1d58655f47588910862da714725e1.zip
FreeBSD-src-76f98aa10ac1d58655f47588910862da714725e1.tar.gz
MFC r289362, r289445: 2605 want to resume interrupted zfs send
Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Paul Dagnelie <pcd@delphix.com> Reviewed by: Richard Elling <Richard.Elling@RichardElling.com> Reviewed by: Xin Li <delphij@freebsd.org> Reviewed by: Arne Jansen <sensille@gmx.net> Approved by: Dan McDonald <danmcd@omniti.com> Author: Matthew Ahrens <mahrens@delphix.com> illumos/illumos-gate@9c3fd1216fa7fb02cfbc78a2518a686d54b48ab8 For more info, see: - slides http://www.slideshare.net/MatthewAhrens/openzfs-send-and-receive - video https://www.youtube.com/watch?v=iY44jPMvxog - manpage changes (for zfs resume -s and zfs send -t) - upcoming talk at the OpenZFS Developer Summit The TL;DR is: Use "zfs receive -s" to save the partially received state on failure. On failure, get the receive token with "zfs get receive_resume_token <fs>" Resume the send with "zfs send -t <token_value>" Relnotes: yes
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c82
1 files changed, 45 insertions, 37 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
index 151d04c..2c718df 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
@@ -48,6 +48,7 @@ typedef struct prefetch_data {
int pd_flags;
boolean_t pd_cancel;
boolean_t pd_exited;
+ zbookmark_phys_t pd_resume;
} prefetch_data_t;
typedef struct traverse_data {
@@ -307,59 +308,52 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
arc_flags_t flags = ARC_FLAG_WAIT;
int i;
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
- dnode_phys_t *cdnp;
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err != 0)
goto post;
- cdnp = buf->b_data;
+ dnode_phys_t *child_dnp = buf->b_data;
for (i = 0; i < epb; i++) {
- prefetch_dnode_metadata(td, &cdnp[i], zb->zb_objset,
- zb->zb_blkid * epb + i);
+ prefetch_dnode_metadata(td, &child_dnp[i],
+ zb->zb_objset, zb->zb_blkid * epb + i);
}
/* recursively visitbp() blocks below this */
for (i = 0; i < epb; i++) {
- err = traverse_dnode(td, &cdnp[i], zb->zb_objset,
- zb->zb_blkid * epb + i);
+ err = traverse_dnode(td, &child_dnp[i],
+ zb->zb_objset, zb->zb_blkid * epb + i);
if (err != 0)
break;
}
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
arc_flags_t flags = ARC_FLAG_WAIT;
- objset_phys_t *osp;
- dnode_phys_t *mdnp, *gdnp, *udnp;
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err != 0)
goto post;
- osp = buf->b_data;
- mdnp = &osp->os_meta_dnode;
- gdnp = &osp->os_groupused_dnode;
- udnp = &osp->os_userused_dnode;
-
- prefetch_dnode_metadata(td, mdnp, zb->zb_objset,
+ objset_phys_t *osp = buf->b_data;
+ prefetch_dnode_metadata(td, &osp->os_meta_dnode, zb->zb_objset,
DMU_META_DNODE_OBJECT);
if (arc_buf_size(buf) >= sizeof (objset_phys_t)) {
- prefetch_dnode_metadata(td, gdnp, zb->zb_objset,
- DMU_GROUPUSED_OBJECT);
- prefetch_dnode_metadata(td, udnp, zb->zb_objset,
- DMU_USERUSED_OBJECT);
+ prefetch_dnode_metadata(td, &osp->os_groupused_dnode,
+ zb->zb_objset, DMU_GROUPUSED_OBJECT);
+ prefetch_dnode_metadata(td, &osp->os_userused_dnode,
+ zb->zb_objset, DMU_USERUSED_OBJECT);
}
- err = traverse_dnode(td, mdnp, zb->zb_objset,
+ err = traverse_dnode(td, &osp->os_meta_dnode, zb->zb_objset,
DMU_META_DNODE_OBJECT);
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
- err = traverse_dnode(td, gdnp, zb->zb_objset,
- DMU_GROUPUSED_OBJECT);
+ err = traverse_dnode(td, &osp->os_groupused_dnode,
+ zb->zb_objset, DMU_GROUPUSED_OBJECT);
}
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
- err = traverse_dnode(td, udnp, zb->zb_objset,
- DMU_USERUSED_OBJECT);
+ err = traverse_dnode(td, &osp->os_userused_dnode,
+ zb->zb_objset, DMU_USERUSED_OBJECT);
}
}
@@ -391,9 +385,15 @@ post:
* Set the bookmark to the first level-0 block that we need
* to visit. This way, the resuming code does not need to
* deal with resuming from indirect blocks.
+ *
+ * Note, if zb_level <= 0, dnp may be NULL, so we don't want
+ * to dereference it.
*/
- td->td_resume->zb_blkid = zb->zb_blkid <<
- (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
+ td->td_resume->zb_blkid = zb->zb_blkid;
+ if (zb->zb_level > 0) {
+ td->td_resume->zb_blkid <<= zb->zb_level *
+ (dnp->dn_indblkshift - SPA_BLKPTRSHIFT);
+ }
td->td_paused = B_TRUE;
}
@@ -425,6 +425,10 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
int j, err = 0;
zbookmark_phys_t czb;
+ if (object != DMU_META_DNODE_OBJECT && td->td_resume != NULL &&
+ object < td->td_resume->zb_object)
+ return (0);
+
if (td->td_flags & TRAVERSE_PRE) {
SET_BOOKMARK(&czb, objset, object, ZB_DNODE_LEVEL,
ZB_DNODE_BLKID);
@@ -501,6 +505,7 @@ traverse_prefetch_thread(void *arg)
td.td_func = traverse_prefetcher;
td.td_arg = td_main->td_pfd;
td.td_pfd = NULL;
+ td.td_resume = &td_main->td_pfd->pd_resume;
SET_BOOKMARK(&czb, td.td_objset,
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
@@ -529,12 +534,6 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
ASSERT(ds == NULL || objset == ds->ds_object);
ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST));
- /*
- * The data prefetching mechanism (the prefetch thread) is incompatible
- * with resuming from a bookmark.
- */
- ASSERT(resume == NULL || !(flags & TRAVERSE_PREFETCH_DATA));
-
td.td_spa = spa;
td.td_objset = objset;
td.td_rootbp = rootbp;
@@ -554,6 +553,8 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
}
pd.pd_flags = flags;
+ if (resume != NULL)
+ pd.pd_resume = *resume;
mutex_init(&pd.pd_mtx, NULL, MUTEX_DEFAULT, NULL);
cv_init(&pd.pd_cv, NULL, CV_DEFAULT, NULL);
@@ -601,11 +602,19 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
* in syncing context).
*/
int
-traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags,
- blkptr_cb_t func, void *arg)
+traverse_dataset_resume(dsl_dataset_t *ds, uint64_t txg_start,
+ zbookmark_phys_t *resume,
+ int flags, blkptr_cb_t func, void *arg)
{
return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object,
- &dsl_dataset_phys(ds)->ds_bp, txg_start, NULL, flags, func, arg));
+ &dsl_dataset_phys(ds)->ds_bp, txg_start, resume, flags, func, arg));
+}
+
+int
+traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start,
+ int flags, blkptr_cb_t func, void *arg)
+{
+ return (traverse_dataset_resume(ds, txg_start, NULL, flags, func, arg));
}
int
@@ -625,7 +634,6 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
blkptr_cb_t func, void *arg)
{
int err;
- uint64_t obj;
dsl_pool_t *dp = spa_get_dsl(spa);
objset_t *mos = dp->dp_meta_objset;
boolean_t hard = (flags & TRAVERSE_HARD);
@@ -637,8 +645,8 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
return (err);
/* visit each dataset */
- for (obj = 1; err == 0;
- err = dmu_object_next(mos, &obj, FALSE, txg_start)) {
+ for (uint64_t obj = 1; err == 0;
+ err = dmu_object_next(mos, &obj, B_FALSE, txg_start)) {
dmu_object_info_t doi;
err = dmu_object_info(mos, obj, &doi);
OpenPOWER on IntegriCloud