diff options
14 files changed, 327 insertions, 98 deletions
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool.8 b/cddl/contrib/opensolaris/cmd/zpool/zpool.8 index c09448a..d021e25 100644 --- a/cddl/contrib/opensolaris/cmd/zpool/zpool.8 +++ b/cddl/contrib/opensolaris/cmd/zpool/zpool.8 @@ -19,14 +19,15 @@ .\" information: Portions Copyright [yyyy] [name of copyright owner] .\" .\" Copyright (c) 2010, Sun Microsystems, Inc. All Rights Reserved. -.\" Copyright 2011, Nexenta Systems, Inc. All Rights Reserved. .\" Copyright (c) 2011, Justin T. Gibbs <gibbs@FreeBSD.org> -.\" Copyright (c) 2013 by Delphix. All Rights Reserved. .\" Copyright (c) 2012, Glen Barber <gjb@FreeBSD.org> +.\" Copyright (c) 2013 by Delphix. All Rights Reserved. +.\" Copyright 2017 Nexenta Systems, Inc. +.\" Copyright (c) 2017 Datto Inc. .\" .\" $FreeBSD$ .\" -.Dd July 26, 2014 +.Dd September 08, 2017 .Dt ZPOOL 8 .Os .Sh NAME @@ -153,7 +154,7 @@ .Op Ar new_device .Nm .Cm scrub -.Op Fl s +.Op Fl s | Fl p .Ar pool ... .Nm .Cm set @@ -1543,43 +1544,54 @@ manner. .It Xo .Nm .Cm scrub -.Op Fl s +.Op Fl s | Fl p .Ar pool ... .Xc .Pp -Begins a scrub. The scrub examines all data in the specified pools to verify -that it checksums correctly. For replicated (mirror or -.No raidz ) -devices, -.Tn ZFS -automatically repairs any damage discovered during the scrub. The -.Qq Nm Cm status +Begins a scrub or resumes a paused scrub. +The scrub examines all data in the specified pools to verify that it checksums +correctly. +For replicated +.Pq mirror or raidz +devices, ZFS automatically repairs any damage discovered during the scrub. +The +.Nm zpool Cm status command reports the progress of the scrub and summarizes the results of the scrub upon completion. .Pp -Scrubbing and resilvering are very similar operations. The difference is that -resilvering only examines data that -.Tn ZFS -knows to be out of date (for example, when attaching a new device to a mirror -or replacing an existing device), whereas scrubbing examines all data to -discover silent errors due to hardware faults or disk failure. -.Pp -Because scrubbing and resilvering are -.Tn I/O Ns -intensive -operations, -.Tn ZFS -only allows one at a time. If a scrub is already in progress, the -.Qq Nm Cm scrub -command returns an error. To start a new scrub, you have to stop the old scrub -with the -.Qq Nm Cm scrub Fl s -command first. If a resilver is in progress, -.Tn ZFS -does not allow a scrub to be started until the resilver completes. -.Bl -tag -width indent +Scrubbing and resilvering are very similar operations. +The difference is that resilvering only examines data that ZFS knows to be out +of date +.Po +for example, when attaching a new device to a mirror or replacing an existing +device +.Pc , +whereas scrubbing examines all data to discover silent errors due to hardware +faults or disk failure. +.Pp +Because scrubbing and resilvering are I/O-intensive operations, ZFS only allows +one at a time. +If a scrub is paused, the +.Nm zpool Cm scrub +resumes it. +If a resilver is in progress, ZFS does not allow a scrub to be started until the +resilver completes. +.Bl -tag -width Ds .It Fl s Stop scrubbing. .El +.Bl -tag -width Ds +.It Fl p +Pause scrubbing. +Scrub pause state and progress are periodically synced to disk. +If the system is restarted or pool is exported during a paused scrub, +even after import, scrub will remain paused until it is resumed. +Once resumed the scrub will pick up from the place where it was last +checkpointed to disk. +To resume a paused scrub issue +.Nm zpool Cm scrub +again. +.El .It Xo .Nm .Cm set diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c index e2dc24c..572265f 100644 --- a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c +++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c @@ -27,6 +27,7 @@ * Copyright (c) 2013 by Prasad Joshi (sTec). All rights reserved. * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>. * Copyright 2016 Nexenta Systems, Inc. + * Copyright (c) 2017 Datto Inc. */ #include <solaris.h> @@ -252,7 +253,7 @@ get_usage(zpool_help_t idx) case HELP_REOPEN: return (gettext("\treopen <pool>\n")); case HELP_SCRUB: - return (gettext("\tscrub [-s] <pool> ...\n")); + return (gettext("\tscrub [-s | -p] <pool> ...\n")); case HELP_STATUS: return (gettext("\tstatus [-vx] [-T d|u] [pool] ... [interval " "[count]]\n")); @@ -3825,6 +3826,7 @@ typedef struct scrub_cbdata { int cb_type; int cb_argc; char **cb_argv; + pool_scrub_cmd_t cb_scrub_cmd; } scrub_cbdata_t; int @@ -3842,15 +3844,16 @@ scrub_callback(zpool_handle_t *zhp, void *data) return (1); } - err = zpool_scan(zhp, cb->cb_type); + err = zpool_scan(zhp, cb->cb_type, cb->cb_scrub_cmd); return (err != 0); } /* - * zpool scrub [-s] <pool> ... + * zpool scrub [-s | -p] <pool> ... * * -s Stop. Stops any in-progress scrub. + * -p Pause. Pause in-progress scrub. */ int zpool_do_scrub(int argc, char **argv) @@ -3859,13 +3862,17 @@ zpool_do_scrub(int argc, char **argv) scrub_cbdata_t cb; cb.cb_type = POOL_SCAN_SCRUB; + cb.cb_scrub_cmd = POOL_SCRUB_NORMAL; /* check options */ - while ((c = getopt(argc, argv, "s")) != -1) { + while ((c = getopt(argc, argv, "sp")) != -1) { switch (c) { case 's': cb.cb_type = POOL_SCAN_NONE; break; + case 'p': + cb.cb_scrub_cmd = POOL_SCRUB_PAUSE; + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -3873,6 +3880,13 @@ zpool_do_scrub(int argc, char **argv) } } + if (cb.cb_type == POOL_SCAN_NONE && + cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) { + (void) fprintf(stderr, gettext("invalid option combination: " + "-s and -p are mutually exclusive\n")); + usage(B_FALSE); + } + cb.cb_argc = argc; cb.cb_argv = argv; argc -= optind; @@ -3901,7 +3915,7 @@ typedef struct status_cbdata { void print_scan_status(pool_scan_stat_t *ps) { - time_t start, end; + time_t start, end, pause; uint64_t elapsed, mins_left, hours_left; uint64_t pass_exam, examined, total; uint_t rate; @@ -3919,6 +3933,7 @@ print_scan_status(pool_scan_stat_t *ps) start = ps->pss_start_time; end = ps->pss_end_time; + pause = ps->pss_pass_scrub_pause; zfs_nicenum(ps->pss_processed, processed_buf, sizeof (processed_buf)); assert(ps->pss_func == POOL_SCAN_SCRUB || @@ -3961,8 +3976,17 @@ print_scan_status(pool_scan_stat_t *ps) * Scan is in progress. */ if (ps->pss_func == POOL_SCAN_SCRUB) { - (void) printf(gettext("scrub in progress since %s"), - ctime(&start)); + if (pause == 0) { + (void) printf(gettext("scrub in progress since %s"), + ctime(&start)); + } else { + char buf[32]; + struct tm *p = localtime(&pause); + (void) strftime(buf, sizeof (buf), "%a %b %e %T %Y", p); + (void) printf(gettext("scrub paused since %s\n"), buf); + (void) printf(gettext("\tscrub started on %s"), + ctime(&start)); + } } else if (ps->pss_func == POOL_SCAN_RESILVER) { (void) printf(gettext("resilver in progress since %s"), ctime(&start)); @@ -3974,6 +3998,7 @@ print_scan_status(pool_scan_stat_t *ps) /* elapsed time for this pass */ elapsed = time(NULL) - ps->pss_pass_start; + elapsed -= ps->pss_pass_scrub_spent_paused; elapsed = elapsed ? elapsed : 1; pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; rate = pass_exam / elapsed; @@ -3983,19 +4008,25 @@ print_scan_status(pool_scan_stat_t *ps) zfs_nicenum(examined, examined_buf, sizeof (examined_buf)); zfs_nicenum(total, total_buf, sizeof (total_buf)); - zfs_nicenum(rate, rate_buf, sizeof (rate_buf)); /* * do not print estimated time if hours_left is more than 30 days + * or we have a paused scrub */ - (void) printf(gettext(" %s scanned out of %s at %s/s"), - examined_buf, total_buf, rate_buf); - if (hours_left < (30 * 24)) { - (void) printf(gettext(", %lluh%um to go\n"), - (u_longlong_t)hours_left, (uint_t)(mins_left % 60)); + if (pause == 0) { + zfs_nicenum(rate, rate_buf, sizeof (rate_buf)); + (void) printf(gettext("\t%s scanned out of %s at %s/s"), + examined_buf, total_buf, rate_buf); + if (hours_left < (30 * 24)) { + (void) printf(gettext(", %lluh%um to go\n"), + (u_longlong_t)hours_left, (uint_t)(mins_left % 60)); + } else { + (void) printf(gettext( + ", (scan is slow, no estimated time)\n")); + } } else { - (void) printf(gettext( - ", (scan is slow, no estimated time)\n")); + (void) printf(gettext("\t%s scanned out of %s\n"), + examined_buf, total_buf); } if (ps->pss_func == POOL_SCAN_RESILVER) { diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h index 937e9b2..f2a9ebf 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h @@ -28,6 +28,7 @@ * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2016 Nexenta Systems, Inc. + * Copyright (c) 2017 Datto Inc. */ #ifndef _LIBZFS_H @@ -129,6 +130,7 @@ typedef enum zfs_error { EZFS_DIFF, /* general failure of zfs diff */ EZFS_DIFFDATA, /* bad zfs diff data */ EZFS_POOLREADONLY, /* pool is in read-only mode */ + EZFS_SCRUB_PAUSED, /* scrub currently paused */ EZFS_UNKNOWN } zfs_error_t; @@ -241,7 +243,7 @@ typedef struct splitflags { /* * Functions to manipulate pool and vdev state */ -extern int zpool_scan(zpool_handle_t *, pool_scan_func_t); +extern int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t); extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *); extern int zpool_reguid(zpool_handle_t *); extern int zpool_reopen(zpool_handle_t *); diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c index af1767f..f2efcba 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c @@ -25,6 +25,7 @@ * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright 2016 Nexenta Systems, Inc. * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> + * Copyright (c) 2017 Datto Inc. */ #include <sys/types.h> @@ -1841,22 +1842,39 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, * Scan the pool. */ int -zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func) +zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd) { zfs_cmd_t zc = { 0 }; char msg[1024]; + int err; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_cookie = func; + zc.zc_flags = cmd; + + if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0) + return (0); + + err = errno; - if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 || - (errno == ENOENT && func != POOL_SCAN_NONE)) + /* ECANCELED on a scrub means we resumed a paused scrub */ + if (err == ECANCELED && func == POOL_SCAN_SCRUB && + cmd == POOL_SCRUB_NORMAL) + return (0); + + if (err == ENOENT && func != POOL_SCAN_NONE && cmd == POOL_SCRUB_NORMAL) return (0); if (func == POOL_SCAN_SCRUB) { - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name); + if (cmd == POOL_SCRUB_PAUSE) { + (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, + "cannot pause scrubbing %s"), zc.zc_name); + } else { + assert(cmd == POOL_SCRUB_NORMAL); + (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, + "cannot scrub %s"), zc.zc_name); + } } else if (func == POOL_SCAN_NONE) { (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"), @@ -1865,7 +1883,7 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func) assert(!"unexpected result"); } - if (errno == EBUSY) { + if (err == EBUSY) { nvlist_t *nvroot; pool_scan_stat_t *ps = NULL; uint_t psc; @@ -1874,14 +1892,18 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func) ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc); - if (ps && ps->pss_func == POOL_SCAN_SCRUB) - return (zfs_error(hdl, EZFS_SCRUBBING, msg)); - else + if (ps && ps->pss_func == POOL_SCAN_SCRUB) { + if (cmd == POOL_SCRUB_PAUSE) + return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg)); + else + return (zfs_error(hdl, EZFS_SCRUBBING, msg)); + } else { return (zfs_error(hdl, EZFS_RESILVERING, msg)); - } else if (errno == ENOENT) { + } + } else if (err == ENOENT) { return (zfs_error(hdl, EZFS_NO_SCRUB, msg)); } else { - return (zpool_standard_error(hdl, errno, msg)); + return (zpool_standard_error(hdl, err, msg)); } } diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c index b60e25e..a2ae8b0 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c @@ -24,6 +24,7 @@ * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2011, 2015 by Delphix. All rights reserved. * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> + * Copyright (c) 2017 Datto Inc. */ /* @@ -224,6 +225,9 @@ libzfs_error_description(libzfs_handle_t *hdl) case EZFS_POSTSPLIT_ONLINE: return (dgettext(TEXT_DOMAIN, "disk was split from this pool " "into a new one")); + case EZFS_SCRUB_PAUSED: + return (dgettext(TEXT_DOMAIN, "scrub is paused; " + "use 'zpool scrub' to resume")); case EZFS_SCRUBBING: return (dgettext(TEXT_DOMAIN, "currently scrubbing; " "use 'zpool scrub -s' to cancel current scrub")); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c index 19e97cb..0bcfc00 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c @@ -22,6 +22,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2016 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] + * Copyright (c) 2017 Datto Inc. */ #include <sys/bpobj.h> @@ -212,6 +213,9 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx, mutex_enter(&bpo->bpo_lock); + if (!bpobj_hasentries(bpo)) + goto out; + if (free) dmu_buf_will_dirty(bpo->bpo_dbuf, tx); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c index 639eefe..ac1ac50 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c @@ -22,6 +22,7 @@ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2016 Gary Mills * Copyright (c) 2011, 2016 by Delphix. All rights reserved. + * Copyright (c) 2017 Datto Inc. */ #include <sys/dsl_scan.h> @@ -310,6 +311,8 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) scn->scn_phys.scn_queue_obj = 0; } + scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED; + /* * If we were "restarted" from a stopped state, don't bother * with anything else. @@ -393,6 +396,91 @@ dsl_scan_cancel(dsl_pool_t *dp) dsl_scan_cancel_sync, NULL, 3, ZFS_SPACE_CHECK_RESERVED)); } +boolean_t +dsl_scan_is_paused_scrub(const dsl_scan_t *scn) +{ + if (dsl_scan_scrubbing(scn->scn_dp) && + scn->scn_phys.scn_flags & DSF_SCRUB_PAUSED) + return (B_TRUE); + + return (B_FALSE); +} + +static int +dsl_scrub_pause_resume_check(void *arg, dmu_tx_t *tx) +{ + pool_scrub_cmd_t *cmd = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_scan_t *scn = dp->dp_scan; + + if (*cmd == POOL_SCRUB_PAUSE) { + /* can't pause a scrub when there is no in-progress scrub */ + if (!dsl_scan_scrubbing(dp)) + return (SET_ERROR(ENOENT)); + + /* can't pause a paused scrub */ + if (dsl_scan_is_paused_scrub(scn)) + return (SET_ERROR(EBUSY)); + } else if (*cmd != POOL_SCRUB_NORMAL) { + return (SET_ERROR(ENOTSUP)); + } + + return (0); +} + +static void +dsl_scrub_pause_resume_sync(void *arg, dmu_tx_t *tx) +{ + pool_scrub_cmd_t *cmd = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + spa_t *spa = dp->dp_spa; + dsl_scan_t *scn = dp->dp_scan; + + if (*cmd == POOL_SCRUB_PAUSE) { + /* can't pause a scrub when there is no in-progress scrub */ + spa->spa_scan_pass_scrub_pause = gethrestime_sec(); + scn->scn_phys.scn_flags |= DSF_SCRUB_PAUSED; + dsl_scan_sync_state(scn, tx); + } else { + ASSERT3U(*cmd, ==, POOL_SCRUB_NORMAL); + if (dsl_scan_is_paused_scrub(scn)) { + /* + * We need to keep track of how much time we spend + * paused per pass so that we can adjust the scrub rate + * shown in the output of 'zpool status' + */ + spa->spa_scan_pass_scrub_spent_paused += + gethrestime_sec() - spa->spa_scan_pass_scrub_pause; + spa->spa_scan_pass_scrub_pause = 0; + scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED; + dsl_scan_sync_state(scn, tx); + } + } +} + +/* + * Set scrub pause/resume state if it makes sense to do so + */ +int +dsl_scrub_set_pause_resume(const dsl_pool_t *dp, pool_scrub_cmd_t cmd) +{ + return (dsl_sync_task(spa_name(dp->dp_spa), + dsl_scrub_pause_resume_check, dsl_scrub_pause_resume_sync, &cmd, 3, + ZFS_SPACE_CHECK_RESERVED)); +} + +boolean_t +dsl_scan_scrubbing(const dsl_pool_t *dp) +{ + dsl_scan_t *scn = dp->dp_scan; + + if (scn->scn_phys.scn_state == DSS_SCANNING && + scn->scn_phys.scn_func == POOL_SCAN_SCRUB) + return (B_TRUE); + + return (B_FALSE); +} + static void dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, dnode_phys_t *dnp, dsl_dataset_t *ds, dsl_scan_t *scn, dmu_objset_type_t ostype, dmu_tx_t *tx); @@ -435,14 +523,14 @@ dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx) extern int zfs_vdev_async_write_active_min_dirty_percent; static boolean_t -dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb) +dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb) { /* we never skip user/group accounting objects */ if (zb && (int64_t)zb->zb_object < 0) return (B_FALSE); - if (scn->scn_pausing) - return (B_TRUE); /* we're already pausing */ + if (scn->scn_suspending) + return (B_TRUE); /* we're already suspending */ if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark)) return (B_FALSE); /* we're resuming */ @@ -452,7 +540,7 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb) return (B_FALSE); /* - * We pause if: + * We suspend if: * - we have scanned for the maximum time: an entire txg * timeout (default 5 sec) * or @@ -475,19 +563,19 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb) dirty_pct >= zfs_vdev_async_write_active_min_dirty_percent)) || spa_shutting_down(scn->scn_dp->dp_spa)) { if (zb) { - dprintf("pausing at bookmark %llx/%llx/%llx/%llx\n", + dprintf("suspending at bookmark %llx/%llx/%llx/%llx\n", (longlong_t)zb->zb_objset, (longlong_t)zb->zb_object, (longlong_t)zb->zb_level, (longlong_t)zb->zb_blkid); scn->scn_phys.scn_bookmark = *zb; } - dprintf("pausing at DDT bookmark %llx/%llx/%llx/%llx\n", + dprintf("suspending at DDT bookmark %llx/%llx/%llx/%llx\n", (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class, (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type, (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum, (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor); - scn->scn_pausing = B_TRUE; + scn->scn_suspending = B_TRUE; return (B_TRUE); } return (B_FALSE); @@ -625,7 +713,7 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp, /* * If we found the block we're trying to resume from, or * we went past it to a different object, zero it out to - * indicate that it's OK to start checking for pausing + * indicate that it's OK to start checking for suspending * again. */ if (bcmp(zb, &scn->scn_phys.scn_bookmark, sizeof (*zb)) == 0 || @@ -728,7 +816,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, /* * We also always visit user/group accounting * objects, and never skip them, even if we are - * pausing. This is necessary so that the space + * suspending. This is necessary so that the space * deltas from this txg get integrated. */ dsl_scan_visitdnode(scn, ds, osp->os_type, @@ -784,7 +872,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, /* ASSERT(pbuf == NULL || arc_released(pbuf)); */ - if (dsl_scan_check_pause(scn, zb)) + if (dsl_scan_check_suspend(scn, zb)) return; if (dsl_scan_check_resume(scn, dnp, zb)) @@ -1121,14 +1209,14 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx) char *dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); dsl_dataset_name(ds, dsname); zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; " - "pausing=%u", + "suspending=%u", (longlong_t)dsobj, dsname, (longlong_t)scn->scn_phys.scn_cur_min_txg, (longlong_t)scn->scn_phys.scn_cur_max_txg, - (int)scn->scn_pausing); + (int)scn->scn_suspending); kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN); - if (scn->scn_pausing) + if (scn->scn_suspending) goto out; /* @@ -1292,13 +1380,13 @@ dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx) dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx); n++; - if (dsl_scan_check_pause(scn, NULL)) + if (dsl_scan_check_suspend(scn, NULL)) break; } - zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; pausing=%u", - (longlong_t)n, (int)scn->scn_phys.scn_ddt_class_max, - (int)scn->scn_pausing); + zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; " + "suspending=%u", (longlong_t)n, + (int)scn->scn_phys.scn_ddt_class_max, (int)scn->scn_suspending); ASSERT(error == 0 || error == ENOENT); ASSERT(error != ENOENT || @@ -1341,7 +1429,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg; scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg; dsl_scan_ddt(scn, tx); - if (scn->scn_pausing) + if (scn->scn_suspending) return; } @@ -1353,7 +1441,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) dsl_scan_visit_rootbp(scn, NULL, &dp->dp_meta_rootbp, tx); spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp); - if (scn->scn_pausing) + if (scn->scn_suspending) return; if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) { @@ -1363,22 +1451,22 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) dsl_scan_visitds(scn, dp->dp_origin_snap->ds_object, tx); } - ASSERT(!scn->scn_pausing); + ASSERT(!scn->scn_suspending); } else if (scn->scn_phys.scn_bookmark.zb_objset != ZB_DESTROYED_OBJSET) { /* - * If we were paused, continue from here. Note if the - * ds we were paused on was deleted, the zb_objset may + * If we were suspended, continue from here. Note if the + * ds we were suspended on was deleted, the zb_objset may * be -1, so we will skip this and find a new objset * below. */ dsl_scan_visitds(scn, scn->scn_phys.scn_bookmark.zb_objset, tx); - if (scn->scn_pausing) + if (scn->scn_suspending) return; } /* - * In case we were paused right at the end of the ds, zero the + * In case we were suspended right at the end of the ds, zero the * bookmark so we don't think that we're still trying to resume. */ bzero(&scn->scn_phys.scn_bookmark, sizeof (zbookmark_phys_t)); @@ -1410,14 +1498,14 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) dsl_scan_visitds(scn, dsobj, tx); zap_cursor_fini(&zc); - if (scn->scn_pausing) + if (scn->scn_suspending) return; } zap_cursor_fini(&zc); } static boolean_t -dsl_scan_free_should_pause(dsl_scan_t *scn) +dsl_scan_free_should_suspend(dsl_scan_t *scn) { uint64_t elapsed_nanosecs; @@ -1441,7 +1529,7 @@ dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) if (!scn->scn_is_bptree || (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) { - if (dsl_scan_free_should_pause(scn)) + if (dsl_scan_free_should_suspend(scn)) return (SET_ERROR(ERESTART)); } @@ -1464,7 +1552,8 @@ dsl_scan_active(dsl_scan_t *scn) return (B_FALSE); if (spa_shutting_down(spa)) return (B_FALSE); - if (scn->scn_phys.scn_state == DSS_SCANNING || + if ((scn->scn_phys.scn_state == DSS_SCANNING && + !dsl_scan_is_paused_scrub(scn)) || (scn->scn_async_destroying && !scn->scn_async_stalled)) return (B_TRUE); @@ -1519,12 +1608,12 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) return; scn->scn_visited_this_txg = 0; - scn->scn_pausing = B_FALSE; + scn->scn_suspending = B_FALSE; scn->scn_sync_start_time = gethrtime(); spa->spa_scrub_active = B_TRUE; /* - * First process the async destroys. If we pause, don't do + * First process the async destroys. If we suspend, don't do * any scrubbing or resilvering. This ensures that there are no * async destroys while we are scanning, so the scan code doesn't * have to worry about traversing it. It is also faster to free the @@ -1641,7 +1730,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) return; if (scn->scn_done_txg == tx->tx_txg) { - ASSERT(!scn->scn_pausing); + ASSERT(!scn->scn_suspending); /* finished with scan. */ zfs_dbgmsg("txg %llu scan complete", tx->tx_txg); dsl_scan_done(scn, B_TRUE, tx); @@ -1650,6 +1739,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) return; } + if (dsl_scan_is_paused_scrub(scn)) + return; + if (scn->scn_phys.scn_ddt_bookmark.ddb_class <= scn->scn_phys.scn_ddt_class_max) { zfs_dbgmsg("doing scan sync txg %llu; " @@ -1684,7 +1776,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) (longlong_t)scn->scn_visited_this_txg, (longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time)); - if (!scn->scn_pausing) { + if (!scn->scn_suspending) { scn->scn_done_txg = tx->tx_txg + 1; zfs_dbgmsg("txg %llu traversal complete, waiting till txg %llu", tx->tx_txg, scn->scn_done_txg); @@ -1893,11 +1985,15 @@ dsl_scan_scrub_cb(dsl_pool_t *dp, return (0); } -/* Called by the ZFS_IOC_POOL_SCAN ioctl to start a scrub or resilver */ +/* + * Called by the ZFS_IOC_POOL_SCAN ioctl to start a scrub or resilver. + * Can also be called to resume a paused scrub. + */ int dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) { spa_t *spa = dp->dp_spa; + dsl_scan_t *scn = dp->dp_scan; /* * Purge all vdev caches and probe all devices. We do this here @@ -1912,6 +2008,16 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) spa->spa_scrub_reopen = B_FALSE; (void) spa_vdev_state_exit(spa, NULL, 0); + if (func == POOL_SCAN_SCRUB && dsl_scan_is_paused_scrub(scn)) { + /* got scrub start cmd, resume paused scrub */ + int err = dsl_scrub_set_pause_resume(scn->scn_dp, + POOL_SCRUB_NORMAL); + if (err == 0) + return (ECANCELED); + + return (SET_ERROR(err)); + } + return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check, dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_NONE)); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c index 40312de..d92a1cb 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c @@ -27,6 +27,7 @@ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2014 Integros [integros.com] + * Copyright (c) 2017 Datto Inc. */ /* @@ -6053,6 +6054,16 @@ spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) * SPA Scanning * ========================================================================== */ +int +spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t cmd) +{ + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); + + if (dsl_scan_resilvering(spa->spa_dsl_pool)) + return (SET_ERROR(EBUSY)); + + return (dsl_scrub_set_pause_resume(spa->spa_dsl_pool, cmd)); +} int spa_scan_stop(spa_t *spa) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c index 3b9c7e7..d507f04 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c @@ -26,6 +26,7 @@ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2014 Integros [integros.com] + * Copyright (c) 2017 Datto Inc. */ #include <sys/zfs_context.h> @@ -2149,6 +2150,11 @@ spa_scan_stat_init(spa_t *spa) { /* data not stored on disk */ spa->spa_scan_pass_start = gethrestime_sec(); + if (dsl_scan_is_paused_scrub(spa->spa_dsl_pool->dp_scan)) + spa->spa_scan_pass_scrub_pause = spa->spa_scan_pass_start; + else + spa->spa_scan_pass_scrub_pause = 0; + spa->spa_scan_pass_scrub_spent_paused = 0; spa->spa_scan_pass_exam = 0; vdev_scan_stat_init(spa->spa_root_vdev); } @@ -2179,6 +2185,8 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps) /* data not stored on disk */ ps->pss_pass_start = spa->spa_scan_pass_start; ps->pss_pass_exam = spa->spa_scan_pass_exam; + ps->pss_pass_scrub_pause = spa->spa_scan_pass_scrub_pause; + ps->pss_pass_scrub_spent_paused = spa->spa_scan_pass_scrub_spent_paused; return (0); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_scan.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_scan.h index ee8512c..fd950cc 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_scan.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_scan.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2017 Datto Inc. */ #ifndef _SYS_DSL_SCAN_H @@ -70,6 +71,7 @@ typedef struct dsl_scan_phys { typedef enum dsl_scan_flags { DSF_VISIT_DS_AGAIN = 1<<0, + DSF_SCRUB_PAUSED = 1<<1, } dsl_scan_flags_t; /* @@ -82,8 +84,8 @@ typedef enum dsl_scan_flags { * * The following members of this structure direct the behavior of the scan: * - * scn_pausing - a scan that cannot be completed in a single txg or - * has exceeded its allotted time will need to pause. + * scn_suspending - a scan that cannot be completed in a single txg or + * has exceeded its allotted time will need to suspend. * When this flag is set the scanner will stop traversing * the pool and write out the current state to disk. * @@ -105,7 +107,7 @@ typedef enum dsl_scan_flags { typedef struct dsl_scan { struct dsl_pool *scn_dp; - boolean_t scn_pausing; + boolean_t scn_suspending; uint64_t scn_restart_txg; uint64_t scn_done_txg; uint64_t scn_sync_start_time; @@ -115,8 +117,6 @@ typedef struct dsl_scan { boolean_t scn_is_bptree; boolean_t scn_async_destroying; boolean_t scn_async_stalled; - - /* for debugging / information */ uint64_t scn_visited_this_txg; dsl_scan_phys_t scn_phys; @@ -127,6 +127,8 @@ void dsl_scan_fini(struct dsl_pool *dp); void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *); int dsl_scan_cancel(struct dsl_pool *); int dsl_scan(struct dsl_pool *, pool_scan_func_t); +boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp); +int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd); void dsl_resilver_restart(struct dsl_pool *, uint64_t txg); boolean_t dsl_scan_resilvering(struct dsl_pool *dp); boolean_t dsl_dataset_unstable(struct dsl_dataset *ds); @@ -137,6 +139,7 @@ void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx); void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2, struct dmu_tx *tx); boolean_t dsl_scan_active(dsl_scan_t *scn); +boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn); #ifdef __cplusplus } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h index c66867b..71ffc5b 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h @@ -25,6 +25,7 @@ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2014 Integros [integros.com] + * Copyright (c) 2017 Datto Inc. */ #ifndef _SYS_SPA_H @@ -695,6 +696,7 @@ extern void spa_l2cache_drop(spa_t *spa); /* scanning */ extern int spa_scan(spa_t *spa, pool_scan_func_t func); extern int spa_scan_stop(spa_t *spa); +extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag); /* spa syncing */ extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h index f308e73..78cdaec 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h @@ -25,6 +25,7 @@ * Copyright 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. + * Copyright (c) 2017 Datto Inc. */ #ifndef _SYS_SPA_IMPL_H @@ -193,6 +194,8 @@ struct spa { uint8_t spa_scrub_started; /* started since last boot */ uint8_t spa_scrub_reopen; /* scrub doing vdev_reopen */ uint64_t spa_scan_pass_start; /* start time per pass/reboot */ + uint64_t spa_scan_pass_scrub_pause; /* scrub pause time */ + uint64_t spa_scan_pass_scrub_spent_paused; /* total paused */ uint64_t spa_scan_pass_exam; /* examined bytes per pass */ kmutex_t spa_async_lock; /* protect async state */ kthread_t *spa_async_thread; /* thread doing async task */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c index a60865d..b1d29b8 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c @@ -32,6 +32,7 @@ * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2017 RackTop Systems. + * Copyright (c) 2017 Datto Inc. */ /* @@ -1726,6 +1727,7 @@ zfs_ioc_pool_tryimport(zfs_cmd_t *zc) * inputs: * zc_name name of the pool * zc_cookie scan func (pool_scan_func_t) + * zc_flags scrub pause/resume flag (pool_scrub_cmd_t) */ static int zfs_ioc_pool_scan(zfs_cmd_t *zc) @@ -1736,7 +1738,12 @@ zfs_ioc_pool_scan(zfs_cmd_t *zc) if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); - if (zc->zc_cookie == POOL_SCAN_NONE) + if (zc->zc_flags >= POOL_SCRUB_FLAGS_END) + return (SET_ERROR(EINVAL)); + + if (zc->zc_flags == POOL_SCRUB_PAUSE) + error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE); + else if (zc->zc_cookie == POOL_SCAN_NONE) error = spa_scan_stop(spa); else error = spa_scan(spa, zc->zc_cookie); diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h b/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h index 7c72c88..676e5ca 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h +++ b/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h @@ -26,6 +26,7 @@ * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. All rights reserved. * Copyright (c) 2014 Integros [integros.com] + * Copyright (c) 2017 Datto Inc. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -697,6 +698,16 @@ typedef enum pool_scan_func { } pool_scan_func_t; /* + * Used to control scrub pause and resume. + */ +typedef enum pool_scrub_cmd { + POOL_SCRUB_NORMAL = 0, + POOL_SCRUB_PAUSE, + POOL_SCRUB_FLAGS_END +} pool_scrub_cmd_t; + + +/* * ZIO types. Needed to interpret vdev statistics below. */ typedef enum zio_type { @@ -728,6 +739,9 @@ typedef struct pool_scan_stat { /* values not stored on disk */ uint64_t pss_pass_exam; /* examined bytes per scan pass */ uint64_t pss_pass_start; /* start time of a scan pass */ + uint64_t pss_pass_scrub_pause; /* pause time of a scurb pass */ + /* cumulative time scrub spent paused, needed for rate calculation */ + uint64_t pss_pass_scrub_spent_paused; } pool_scan_stat_t; typedef enum dsl_scan_state { |