summaryrefslogtreecommitdiffstats
path: root/cddl/contrib/opensolaris/lib
diff options
context:
space:
mode:
authormav <mav@FreeBSD.org>2018-04-16 03:32:41 +0000
committermav <mav@FreeBSD.org>2018-04-16 03:32:41 +0000
commit13f49c3c8f0f20b613f308073363007c7d62ea57 (patch)
tree51aabb1447f1077a7af9776bc016c6fbed3290e8 /cddl/contrib/opensolaris/lib
parent05908e9a0336c64217262cd3df008e560d0f67e7 (diff)
downloadFreeBSD-src-13f49c3c8f0f20b613f308073363007c7d62ea57.zip
FreeBSD-src-13f49c3c8f0f20b613f308073363007c7d62ea57.tar.gz
MFC r329732: MFV r329502: 7614 zfs device evacuation/removal
illumos/illumos-gate@5cabbc6b49070407fb9610cfe73d4c0e0dea3e77 https://www.illumos.org/issues/7614: This project allows top-level vdevs to be removed from the storage pool with “zpool remove”, reducing the total amount of storage in the pool. This operation copies all allocated regions of the device to be removed onto other devices, recording the mapping from old to new location. After the removal is complete, read and free operations to the removed (now “indirect”) vdev must be remapped and performed at the new location on disk. The indirect mapping table is kept in memory whenever the pool is loaded, so there is minimal performance overhead when doing operations on the indirect vdev. The size of the in-memory mapping table will be reduced when its entries become “obsolete” because they are no longer used by any block pointers in the pool. An entry becomes obsolete when all the blocks that use it are freed. An entry can also become obsolete when all the snapshots that reference it are deleted, and the block pointers that reference it have been “remapped” in all filesystems/zvols (and clones). Whenever an indirect block is written, all the block pointers in it will be “remapped” to their new (concrete) locations if possible. This process can be accelerated by using the “zfs remap” command to proactively rewrite all indirect blocks that reference indirect (removed) vdevs. Note that when a device is removed, we do not verify the checksum of the data that is copied. This makes the process much faster, but if it were used on redundant vdevs (i.e. mirror or raidz vdevs), it would be possible to copy the wrong data, when we have the correct data on e.g. the other side of the mirror. Therefore, mirror and raidz devices can not be removed. Reviewed by: Alex Reece <alex@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: John Kennedy <john.kennedy@delphix.com> Reviewed by: Prakash Surya <prakash.surya@delphix.com> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Richard Laager <rlaager@wiktel.com> Reviewed by: Tim Chase <tim@chase2k.com> Approved by: Garrett D'Amore <garrett@damore.org> Author: Prashanth Sreenivasa <pks@delphix.com>
Diffstat (limited to 'cddl/contrib/opensolaris/lib')
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h6
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c18
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c110
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c7
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c10
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h1
6 files changed, 132 insertions, 20 deletions
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
index 3e15dd1..e382c0e 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
@@ -131,6 +131,7 @@ typedef enum zfs_error {
EZFS_DIFFDATA, /* bad zfs diff data */
EZFS_POOLREADONLY, /* pool is in read-only mode */
EZFS_SCRUB_PAUSED, /* scrub currently paused */
+ EZFS_NO_PENDING, /* cannot cancel, no operation is pending */
EZFS_UNKNOWN
} zfs_error_t;
@@ -267,6 +268,8 @@ extern int zpool_vdev_attach(zpool_handle_t *, const char *,
const char *, nvlist_t *, int);
extern int zpool_vdev_detach(zpool_handle_t *, const char *);
extern int zpool_vdev_remove(zpool_handle_t *, const char *);
+extern int zpool_vdev_remove_cancel(zpool_handle_t *);
+extern int zpool_vdev_indirect_size(zpool_handle_t *, const char *, uint64_t *);
extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *,
splitflags_t);
@@ -825,6 +828,7 @@ extern int zpool_fru_set(zpool_handle_t *, uint64_t, const char *);
extern int zmount(const char *, const char *, int, char *, char *, int, char *,
int);
#endif
+extern int zfs_remap_indirects(libzfs_handle_t *hdl, const char *);
#ifdef __cplusplus
}
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
index 691fb04..0b51e1cc 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
@@ -3829,6 +3829,24 @@ zfs_snapshot_cb(zfs_handle_t *zhp, void *arg)
return (rv);
}
+int
+zfs_remap_indirects(libzfs_handle_t *hdl, const char *fs)
+{
+ int err;
+ char errbuf[1024];
+
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+ "cannot remap filesystem '%s' "), fs);
+
+ err = lzc_remap(fs);
+
+ if (err != 0) {
+ (void) zfs_standard_error(hdl, err, errbuf);
+ }
+
+ return (err);
+}
+
/*
* Creates snapshots. The keys in the snaps nvlist are the snapshots to be
* created.
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
index 612f37e..2b428ef 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright 2016 Nexenta Systems, Inc.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
@@ -1334,6 +1334,13 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
(void) zfs_error(hdl, EZFS_BADDEV, msg);
break;
+ case EINVAL:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "invalid config; a pool with removing/removed "
+ "vdevs does not support adding raidz vdevs"));
+ (void) zfs_error(hdl, EZFS_BADDEV, msg);
+ break;
+
case EOVERFLOW:
/*
* This occurrs when one of the devices is below
@@ -2664,7 +2671,7 @@ zpool_vdev_attach(zpool_handle_t *zhp,
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
- &islog)) == 0)
+ &islog)) == NULL)
return (zfs_error(hdl, EZFS_NODEVICE, msg));
if (avail_spare)
@@ -2773,7 +2780,8 @@ zpool_vdev_attach(zpool_handle_t *zhp,
break;
case EBUSY:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy, "
+ "or pool has removing/removed vdevs"),
new_disk);
(void) zfs_error(hdl, EZFS_BADDEV, msg);
break;
@@ -2827,7 +2835,7 @@ zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
- NULL)) == 0)
+ NULL)) == NULL)
return (zfs_error(hdl, EZFS_NODEVICE, msg));
if (avail_spare)
@@ -3116,8 +3124,7 @@ out:
}
/*
- * Remove the given device. Currently, this is supported only for hot spares
- * and level 2 cache devices.
+ * Remove the given device.
*/
int
zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
@@ -3134,26 +3141,61 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
- &islog)) == 0)
- return (zfs_error(hdl, EZFS_NODEVICE, msg));
- /*
- * XXX - this should just go away.
- */
- if (!avail_spare && !l2cache && !islog) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "only inactive hot spares, cache, top-level, "
- "or log devices can be removed"));
+ &islog)) == NULL)
return (zfs_error(hdl, EZFS_NODEVICE, msg));
- }
version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
if (islog && version < SPA_VERSION_HOLES) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "pool must be upgrade to support log removal"));
+ "pool must be upgraded to support log removal"));
return (zfs_error(hdl, EZFS_BADVERSION, msg));
}
- verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+ if (!islog && !avail_spare && !l2cache && zpool_is_bootable(zhp)) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "root pool can not have removed devices, "
+ "because GRUB does not understand them"));
+ return (zfs_error(hdl, EINVAL, msg));
+ }
+
+ zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
+
+ if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
+ return (0);
+
+ switch (errno) {
+
+ case EINVAL:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "invalid config; all top-level vdevs must "
+ "have the same sector size and not be raidz."));
+ (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
+ break;
+
+ case EBUSY:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Pool busy; removal may already be in progress"));
+ (void) zfs_error(hdl, EZFS_BUSY, msg);
+ break;
+
+ default:
+ (void) zpool_standard_error(hdl, errno, msg);
+ }
+ return (-1);
+}
+
+int
+zpool_vdev_remove_cancel(zpool_handle_t *zhp)
+{
+ zfs_cmd_t zc = { 0 };
+ char msg[1024];
+ libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+ (void) snprintf(msg, sizeof (msg),
+ dgettext(TEXT_DOMAIN, "cannot cancel removal"));
+
+ (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+ zc.zc_cookie = 1;
if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
return (0);
@@ -3161,6 +3203,36 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
return (zpool_standard_error(hdl, errno, msg));
}
+int
+zpool_vdev_indirect_size(zpool_handle_t *zhp, const char *path,
+ uint64_t *sizep)
+{
+ char msg[1024];
+ nvlist_t *tgt;
+ boolean_t avail_spare, l2cache, islog;
+ libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+ (void) snprintf(msg, sizeof (msg),
+ dgettext(TEXT_DOMAIN, "cannot determine indirect size of %s"),
+ path);
+
+ if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
+ &islog)) == NULL)
+ return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+ if (avail_spare || l2cache || islog) {
+ *sizep = 0;
+ return (0);
+ }
+
+ if (nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_INDIRECT_SIZE, sizep) != 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "indirect size not available"));
+ return (zfs_error(hdl, EINVAL, msg));
+ }
+ return (0);
+}
+
/*
* Clear the errors for the pool, or the particular device if specified.
*/
@@ -3188,7 +3260,7 @@ zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if (path) {
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
- &l2cache, NULL)) == 0)
+ &l2cache, NULL)) == NULL)
return (zfs_error(hdl, EZFS_NODEVICE, msg));
/*
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
index 5f5335d..6adab0b 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
@@ -240,6 +240,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
return (dgettext(TEXT_DOMAIN, "invalid diff data"));
case EZFS_POOLREADONLY:
return (dgettext(TEXT_DOMAIN, "pool is read-only"));
+ case EZFS_NO_PENDING:
+ return (dgettext(TEXT_DOMAIN, "operation is not "
+ "in progress"));
case EZFS_UNKNOWN:
return (dgettext(TEXT_DOMAIN, "unknown error"));
default:
@@ -487,6 +490,10 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
case EROFS:
zfs_verror(hdl, EZFS_POOLREADONLY, fmt, ap);
break;
+ /* There is no pending operation to cancel */
+ case ESRCH:
+ zfs_verror(hdl, EZFS_NO_PENDING, fmt, ap);
+ break;
default:
zfs_error_aux(hdl, strerror(error));
diff --git a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c
index b1863be..a7c973f 100644
--- a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c
+++ b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c
@@ -286,6 +286,16 @@ lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen)
return (0);
}
+int
+lzc_remap(const char *fsname)
+{
+ int error;
+ nvlist_t *args = fnvlist_alloc();
+ error = lzc_ioctl(ZFS_IOC_REMAP, fsname, args, NULL);
+ nvlist_free(args);
+ return (error);
+}
+
/*
* Creates snapshots.
*
diff --git a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h
index c61ad52..5202fd1 100644
--- a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h
+++ b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h
@@ -47,6 +47,7 @@ enum lzc_dataset_type {
LZC_DATSET_TYPE_ZVOL
};
+int lzc_remap(const char *fsname);
int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **);
int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *);
int lzc_clone(const char *, const char *, nvlist_t *);
OpenPOWER on IntegriCloud