From 99cadf9eedff63c2d1d6b1a5296b21ca12ac2aa0 Mon Sep 17 00:00:00 2001 From: mav Date: Wed, 12 Aug 2015 22:41:06 +0000 Subject: MFV r286704: 5960 zfs recv should prefetch indirect blocks 5925 zfs receive -o origin= Reviewed by: Prakash Surya Reviewed by: Matthew Ahrens Author: Paul Dagnelie While running 'zfs recv' we noticed that every 128th 8K block required a read. We were seeing that restore_write() was calling dmu_tx_hold_write() and the indirect block was not cached. We should prefetch upcoming indirect blocks to avoid having to go to disk and blocking the restore_write(). Allow an incremental send stream to be received as a clone, even if the stream does not mark it as a clone. --- cddl/contrib/opensolaris/cmd/zdb/zdb.c | 5 +- cddl/contrib/opensolaris/cmd/zfs/zfs.8 | 8 ++++ cddl/contrib/opensolaris/cmd/zfs/zfs_main.c | 29 +++++++++--- cddl/contrib/opensolaris/cmd/ztest/ztest.c | 9 ++-- .../contrib/opensolaris/lib/libzfs/common/libzfs.h | 4 +- .../opensolaris/lib/libzfs/common/libzfs_pool.c | 4 +- .../lib/libzfs/common/libzfs_sendrecv.c | 55 +++++++++++++++------- .../lib/libzpool/common/sys/zfs_context.h | 12 ++++- 8 files changed, 93 insertions(+), 33 deletions(-) (limited to 'cddl') diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.c b/cddl/contrib/opensolaris/cmd/zdb/zdb.c index 7c5df0b..75adc71 100644 --- a/cddl/contrib/opensolaris/cmd/zdb/zdb.c +++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.c @@ -2428,6 +2428,9 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, dmu_object_type_t type; boolean_t is_metadata; + if (bp == NULL) + return (0); + if (dump_opt['b'] >= 5 && bp->blk_birth > 0) { char blkbuf[BP_SPRINTF_LEN]; snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); @@ -2917,7 +2920,7 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, avl_index_t where; zdb_ddt_entry_t *zdde, zdde_search; - if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) + if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) return (0); if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) { diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs.8 b/cddl/contrib/opensolaris/cmd/zfs/zfs.8 index 2254de1..4da6d0b 100644 --- a/cddl/contrib/opensolaris/cmd/zfs/zfs.8 +++ b/cddl/contrib/opensolaris/cmd/zfs/zfs.8 @@ -191,11 +191,13 @@ .Nm .Cm receive Ns | Ns Cm recv .Op Fl vnFu +.Op Fl o Sy origin Ns = Ns Ar snapshot .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot .Nm .Cm receive Ns | Ns Cm recv .Op Fl vnFu .Op Fl d | e +.Op Fl o Sy origin Ns = Ns Ar snapshot .Ar filesystem .Nm .Cm allow @@ -2705,6 +2707,7 @@ feature. .Nm .Cm receive Ns | Ns Cm recv .Op Fl vnFu +.Op Fl o Sy origin Ns = Ns Ar snapshot .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot .Xc .It Xo @@ -2712,6 +2715,7 @@ feature. .Cm receive Ns | Ns Cm recv .Op Fl vnFu .Op Fl d | e +.Op Fl o Sy origin Ns = Ns Ar snapshot .Ar filesystem .Xc .Pp @@ -2796,6 +2800,10 @@ receive operation. Do not actually receive the stream. This can be useful in conjunction with the .Fl v option to verify the name the receive operation would use. +.It Fl o Sy origin Ns = Ns Ar snapshot +Forces the stream to be received as a clone of the given snapshot. +This is only valid if the stream is an incremental stream whose source +is the same as the provided origin. .It Fl F Force a rollback of the file system to the most recent snapshot before performing the receive operation. If receiving an incremental replication diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c index 389b248..9d80b9b 100644 --- a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c +++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c @@ -264,8 +264,9 @@ get_usage(zfs_help_t idx) return (gettext("\tpromote \n")); case HELP_RECEIVE: return (gettext("\treceive|recv [-vnFu] \n" - "\treceive|recv [-vnFu] [-d | -e] \n")); + "snapshot>\n" + "\treceive|recv [-vnFu] [-o origin=] [-d | -e] " + "\n")); case HELP_RENAME: return (gettext("\trename [-f] " "\n" @@ -791,7 +792,7 @@ zfs_do_create(int argc, char **argv) nomem(); break; case 'o': - if (parseprop(props, optarg)) + if (parseprop(props, optarg) != 0) goto error; break; case 's': @@ -3659,7 +3660,7 @@ zfs_do_snapshot(int argc, char **argv) while ((c = getopt(argc, argv, "ro:")) != -1) { switch (c) { case 'o': - if (parseprop(props, optarg)) + if (parseprop(props, optarg) != 0) return (1); break; case 'r': @@ -3918,10 +3919,19 @@ zfs_do_receive(int argc, char **argv) { int c, err; recvflags_t flags = { 0 }; + nvlist_t *props; + nvpair_t *nvp = NULL; + + if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) + nomem(); /* check options */ - while ((c = getopt(argc, argv, ":denuvF")) != -1) { + while ((c = getopt(argc, argv, ":o:denuvF")) != -1) { switch (c) { + case 'o': + if (parseprop(props, optarg) != 0) + return (1); + break; case 'd': flags.isprefix = B_TRUE; break; @@ -3966,6 +3976,13 @@ zfs_do_receive(int argc, char **argv) usage(B_FALSE); } + while ((nvp = nvlist_next_nvpair(props, nvp))) { + if (strcmp(nvpair_name(nvp), "origin") != 0) { + (void) fprintf(stderr, gettext("invalid option")); + usage(B_FALSE); + } + } + if (isatty(STDIN_FILENO)) { (void) fprintf(stderr, gettext("Error: Backup stream can not be read " @@ -3974,7 +3991,7 @@ zfs_do_receive(int argc, char **argv) return (1); } - err = zfs_receive(g_zfs, argv[0], &flags, STDIN_FILENO, NULL); + err = zfs_receive(g_zfs, argv[0], props, &flags, STDIN_FILENO, NULL); return (err != 0); } diff --git a/cddl/contrib/opensolaris/cmd/ztest/ztest.c b/cddl/contrib/opensolaris/cmd/ztest/ztest.c index 0695834..52c7284 100644 --- a/cddl/contrib/opensolaris/cmd/ztest/ztest.c +++ b/cddl/contrib/opensolaris/cmd/ztest/ztest.c @@ -3586,7 +3586,8 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id) */ n = ztest_random(regions) * stride + ztest_random(width); s = 1 + ztest_random(2 * width - 1); - dmu_prefetch(os, bigobj, n * chunksize, s * chunksize); + dmu_prefetch(os, bigobj, 0, n * chunksize, s * chunksize, + ZIO_PRIORITY_SYNC_READ); /* * Pick a random index and compute the offsets into packobj and bigobj. @@ -5705,8 +5706,10 @@ ztest_run(ztest_shared_t *zs) * Right before closing the pool, kick off a bunch of async I/O; * spa_close() should wait for it to complete. */ - for (uint64_t object = 1; object < 50; object++) - dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20); + for (uint64_t object = 1; object < 50; object++) { + dmu_prefetch(spa->spa_meta_objset, object, 0, 0, 1ULL << 20, + ZIO_PRIORITY_SYNC_READ); + } spa_close(spa, FTAG); diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h index fbfaab1..968732e 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h @@ -668,8 +668,8 @@ typedef struct recvflags { boolean_t nomount; } recvflags_t; -extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t *, - int, avl_tree_t *); +extern int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *, + recvflags_t *, int, avl_tree_t *); typedef enum diff_flags { ZFS_DIFF_PARSEABLE = 0x1, diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c index b9db421..5c78e81 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c @@ -3535,7 +3535,7 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv, } static int -zbookmark_compare(const void *a, const void *b) +zbookmark_mem_compare(const void *a, const void *b) { return (memcmp(a, b, sizeof (zbookmark_phys_t))); } @@ -3598,7 +3598,7 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) zc.zc_nvlist_dst_size; count -= zc.zc_nvlist_dst_size; - qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_compare); + qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare); verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0); diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c index bd3832e..f6efa97 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c @@ -64,8 +64,9 @@ extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *); /* We need to use something for ENODATA. */ #define ENODATA EIDRM -static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t *, - int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *); +static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *, + recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int, + uint64_t *); static const zio_cksum_t zero_cksum = { 0 }; @@ -2498,7 +2499,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, * zfs_receive_one() will take care of it (ie, * recv_skip() and return 0). */ - error = zfs_receive_impl(hdl, destname, flags, fd, + error = zfs_receive_impl(hdl, destname, NULL, flags, fd, sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd, action_handlep); if (error == ENODATA) { @@ -2631,9 +2632,9 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) */ static int zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, - recvflags_t *flags, dmu_replay_record_t *drr, - dmu_replay_record_t *drr_noswap, const char *sendfs, - nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, + const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr, + dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv, + avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, uint64_t *action_handlep) { zfs_cmd_t zc = { 0 }; @@ -2798,10 +2799,15 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, } if (flags->verbose) (void) printf("found clone origin %s\n", zc.zc_string); + } else if (originsnap) { + (void) strncpy(zc.zc_string, originsnap, ZFS_MAXNAMELEN); + if (flags->verbose) + (void) printf("using provided clone origin %s\n", + zc.zc_string); } stream_wantsnewfs = (drrb->drr_fromguid == 0 || - (drrb->drr_flags & DRR_FLAG_CLONE)); + (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap); if (stream_wantsnewfs) { /* @@ -3179,9 +3185,10 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, } static int -zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags, - int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl, - char **top_zfs, int cleanup_fd, uint64_t *action_handlep) +zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, + const char *originsnap, recvflags_t *flags, int infd, const char *sendfs, + nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, + uint64_t *action_handlep) { int err; dmu_replay_record_t drr, drr_noswap; @@ -3200,6 +3207,12 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags, "(%s) does not exist"), tosnap); return (zfs_error(hdl, EZFS_NOENT, errbuf)); } + if (originsnap && + !zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs " + "(%s) does not exist"), originsnap); + return (zfs_error(hdl, EZFS_NOENT, errbuf)); + } /* read in the BEGIN record */ if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE, @@ -3272,14 +3285,14 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags, *cp = '\0'; sendfs = nonpackage_sendfs; } - return (zfs_receive_one(hdl, infd, tosnap, flags, - &drr, &drr_noswap, sendfs, stream_nv, stream_avl, - top_zfs, cleanup_fd, action_handlep)); + return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags, + &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs, + cleanup_fd, action_handlep)); } else { assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_COMPOUNDSTREAM); - return (zfs_receive_package(hdl, infd, tosnap, flags, - &drr, &zcksum, top_zfs, cleanup_fd, action_handlep)); + return (zfs_receive_package(hdl, infd, tosnap, flags, &drr, + &zcksum, top_zfs, cleanup_fd, action_handlep)); } } @@ -3290,18 +3303,24 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags, * (-1 will override -2). */ int -zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags, - int infd, avl_tree_t *stream_avl) +zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props, + recvflags_t *flags, int infd, avl_tree_t *stream_avl) { char *top_zfs = NULL; int err; int cleanup_fd; uint64_t action_handle = 0; + char *originsnap = NULL; + if (props) { + err = nvlist_lookup_string(props, "origin", &originsnap); + if (err && err != ENOENT) + return (err); + } cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL); VERIFY(cleanup_fd >= 0); - err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL, + err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL, stream_avl, &top_zfs, cleanup_fd, &action_handle); VERIFY(0 == close(cleanup_fd)); diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h index 493be33..15ad5ed 100644 --- a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h +++ b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h @@ -135,8 +135,18 @@ extern int aok; /* * DTrace SDT probes have different signatures in userland than they do in - * kernel. If they're being used in kernel code, re-define them out of + * the kernel. If they're being used in kernel code, re-define them out of * existence for their counterparts in libzpool. + * + * Here's an example of how to use the set-error probes in userland: + * zfs$target:::set-error /arg0 == EBUSY/ {stack();} + * + * Here's an example of how to use DTRACE_PROBE probes in userland: + * If there is a probe declared as follows: + * DTRACE_PROBE2(zfs__probe_name, uint64_t, blkid, dnode_t *, dn); + * Then you can use it as follows: + * zfs$target:::probe2 /copyinstr(arg0) == "zfs__probe_name"/ + * {printf("%u %p\n", arg1, arg2);} */ #ifdef DTRACE_PROBE -- cgit v1.1