summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib
diff options
context:
space:
mode:
authormm <mm@FreeBSD.org>2012-06-11 11:35:22 +0000
committermm <mm@FreeBSD.org>2012-06-11 11:35:22 +0000
commitcc61ab2f133566dca51970d44cc49a4355039b5d (patch)
treea0efb9ad759cc8c8f8ec6f2cbabe6b3305a334e9 /sys/cddl/contrib
parent2cecdd81d885af2868172221b5068c523eade78c (diff)
downloadFreeBSD-src-cc61ab2f133566dca51970d44cc49a4355039b5d.zip
FreeBSD-src-cc61ab2f133566dca51970d44cc49a4355039b5d.tar.gz
Introduce "feature flags" for ZFS pools (bump SPA version to 5000).
Add first feature "com.delphix:async_destroy" (asynchronous destroy of ZFS datasets). Implement features support in ZFS boot code. Illumos revisions merged: 13700:2889e2596bd6 13701:1949b688d5fb 2619 asynchronous destruction of ZFS file systems 2747 SPA versioning with zfs feature flags References: https://www.illumos.org/issues/2619 https://www.illumos.org/issues/2747 Obtained from: illumos (issue #2619, #2747) MFC after: 1 month
Diffstat (limited to 'sys/cddl/contrib')
-rw-r--r--sys/cddl/contrib/opensolaris/common/nvpair/fnvpair.c498
-rw-r--r--sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c155
-rw-r--r--sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h70
-rw-r--r--sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c22
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/Makefile.files10
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c224
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c9
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c126
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c22
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c107
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c5
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c19
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c183
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c8
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c57
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c132
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c16
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c462
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c9
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c57
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bptree.h64
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h96
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_traverse.h4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h9
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h9
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_scan.h4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h17
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h9
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h5
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h9
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfeature.h52
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h14
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c7
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c103
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c14
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c414
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c19
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c46
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h20
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/sys/nvpair.h68
46 files changed, 2794 insertions, 402 deletions
diff --git a/sys/cddl/contrib/opensolaris/common/nvpair/fnvpair.c b/sys/cddl/contrib/opensolaris/common/nvpair/fnvpair.c
new file mode 100644
index 0000000..acef4cb
--- /dev/null
+++ b/sys/cddl/contrib/opensolaris/common/nvpair/fnvpair.c
@@ -0,0 +1,498 @@
+
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#include <sys/nvpair.h>
+#ifndef _KERNEL
+#include <sys/zfs_context.h>
+#else
+#include <sys/debug.h>
+#include <sys/kmem.h>
+#endif
+
+/*
+ * "Force" nvlist wrapper.
+ *
+ * These functions wrap the nvlist_* functions with assertions that assume
+ * the operation is successful. This allows the caller's code to be much
+ * more readable, especially for the fnvlist_lookup_* and fnvpair_value_*
+ * functions, which can return the requested value (rather than filling in
+ * a pointer).
+ *
+ * These functions use NV_UNIQUE_NAME, encoding NV_ENCODE_NATIVE, and allocate
+ * with KM_SLEEP.
+ *
+ * More wrappers should be added as needed -- for example
+ * nvlist_lookup_*_array and nvpair_value_*_array.
+ */
+
+nvlist_t *
+fnvlist_alloc(void)
+{
+ nvlist_t *nvl;
+ VERIFY3U(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP), ==, 0);
+ return (nvl);
+}
+
+void
+fnvlist_free(nvlist_t *nvl)
+{
+ nvlist_free(nvl);
+}
+
+size_t
+fnvlist_size(nvlist_t *nvl)
+{
+ size_t size;
+ VERIFY3U(nvlist_size(nvl, &size, NV_ENCODE_NATIVE), ==, 0);
+ return (size);
+}
+
+/*
+ * Returns allocated buffer of size *sizep. Caller must free the buffer with
+ * fnvlist_pack_free().
+ */
+char *
+fnvlist_pack(nvlist_t *nvl, size_t *sizep)
+{
+ char *packed = 0;
+ VERIFY3U(nvlist_pack(nvl, &packed, sizep, NV_ENCODE_NATIVE,
+ KM_SLEEP), ==, 0);
+ return (packed);
+}
+
+/*ARGSUSED*/
+void
+fnvlist_pack_free(char *pack, size_t size)
+{
+#ifdef _KERNEL
+ kmem_free(pack, size);
+#else
+ free(pack);
+#endif
+}
+
+nvlist_t *
+fnvlist_unpack(char *buf, size_t buflen)
+{
+ nvlist_t *rv;
+ VERIFY3U(nvlist_unpack(buf, buflen, &rv, KM_SLEEP), ==, 0);
+ return (rv);
+}
+
+nvlist_t *
+fnvlist_dup(nvlist_t *nvl)
+{
+ nvlist_t *rv;
+ VERIFY3U(nvlist_dup(nvl, &rv, KM_SLEEP), ==, 0);
+ return (rv);
+}
+
+void
+fnvlist_merge(nvlist_t *dst, nvlist_t *src)
+{
+ VERIFY3U(nvlist_merge(dst, src, KM_SLEEP), ==, 0);
+}
+
+void
+fnvlist_add_boolean(nvlist_t *nvl, const char *name)
+{
+ VERIFY3U(nvlist_add_boolean(nvl, name), ==, 0);
+}
+
+void
+fnvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t val)
+{
+ VERIFY3U(nvlist_add_boolean_value(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_byte(nvlist_t *nvl, const char *name, uchar_t val)
+{
+ VERIFY3U(nvlist_add_byte(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_int8(nvlist_t *nvl, const char *name, int8_t val)
+{
+ VERIFY3U(nvlist_add_int8(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t val)
+{
+ VERIFY3U(nvlist_add_uint8(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_int16(nvlist_t *nvl, const char *name, int16_t val)
+{
+ VERIFY3U(nvlist_add_int16(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t val)
+{
+ VERIFY3U(nvlist_add_uint16(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_int32(nvlist_t *nvl, const char *name, int32_t val)
+{
+ VERIFY3U(nvlist_add_int32(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t val)
+{
+ VERIFY3U(nvlist_add_uint32(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_int64(nvlist_t *nvl, const char *name, int64_t val)
+{
+ VERIFY3U(nvlist_add_int64(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t val)
+{
+ VERIFY3U(nvlist_add_uint64(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_string(nvlist_t *nvl, const char *name, const char *val)
+{
+ VERIFY3U(nvlist_add_string(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val)
+{
+ VERIFY3U(nvlist_add_nvlist(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_nvpair(nvlist_t *nvl, nvpair_t *pair)
+{
+ VERIFY3U(nvlist_add_nvpair(nvl, pair), ==, 0);
+}
+
+void
+fnvlist_add_boolean_array(nvlist_t *nvl, const char *name,
+ boolean_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_boolean_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_byte_array(nvlist_t *nvl, const char *name, uchar_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_byte_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_int8_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_uint8_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_int16_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_uint16_array(nvlist_t *nvl, const char *name,
+ uint16_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_uint16_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_int32_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_uint32_array(nvlist_t *nvl, const char *name,
+ uint32_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_uint32_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_int64_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_uint64_array(nvlist_t *nvl, const char *name,
+ uint64_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_uint64_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_string_array(nvlist_t *nvl, const char *name,
+ char * const *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_string_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_nvlist_array(nvlist_t *nvl, const char *name,
+ nvlist_t **val, uint_t n)
+{
+ VERIFY3U(nvlist_add_nvlist_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_remove(nvlist_t *nvl, const char *name)
+{
+ VERIFY3U(nvlist_remove_all(nvl, name), ==, 0);
+}
+
+void
+fnvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *pair)
+{
+ VERIFY3U(nvlist_remove_nvpair(nvl, pair), ==, 0);
+}
+
+nvpair_t *
+fnvlist_lookup_nvpair(nvlist_t *nvl, const char *name)
+{
+ nvpair_t *rv;
+ VERIFY3U(nvlist_lookup_nvpair(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+/* returns B_TRUE if the entry exists */
+boolean_t
+fnvlist_lookup_boolean(nvlist_t *nvl, const char *name)
+{
+ return (nvlist_lookup_boolean(nvl, name) == 0);
+}
+
+boolean_t
+fnvlist_lookup_boolean_value(nvlist_t *nvl, const char *name)
+{
+ boolean_t rv;
+ VERIFY3U(nvlist_lookup_boolean_value(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+uchar_t
+fnvlist_lookup_byte(nvlist_t *nvl, const char *name)
+{
+ uchar_t rv;
+ VERIFY3U(nvlist_lookup_byte(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+int8_t
+fnvlist_lookup_int8(nvlist_t *nvl, const char *name)
+{
+ int8_t rv;
+ VERIFY3U(nvlist_lookup_int8(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+int16_t
+fnvlist_lookup_int16(nvlist_t *nvl, const char *name)
+{
+ int16_t rv;
+ VERIFY3U(nvlist_lookup_int16(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+int32_t
+fnvlist_lookup_int32(nvlist_t *nvl, const char *name)
+{
+ int32_t rv;
+ VERIFY3U(nvlist_lookup_int32(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+int64_t
+fnvlist_lookup_int64(nvlist_t *nvl, const char *name)
+{
+ int64_t rv;
+ VERIFY3U(nvlist_lookup_int64(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+uint8_t
+fnvlist_lookup_uint8_t(nvlist_t *nvl, const char *name)
+{
+ uint8_t rv;
+ VERIFY3U(nvlist_lookup_uint8(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+uint16_t
+fnvlist_lookup_uint16(nvlist_t *nvl, const char *name)
+{
+ uint16_t rv;
+ VERIFY3U(nvlist_lookup_uint16(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+uint32_t
+fnvlist_lookup_uint32(nvlist_t *nvl, const char *name)
+{
+ uint32_t rv;
+ VERIFY3U(nvlist_lookup_uint32(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+uint64_t
+fnvlist_lookup_uint64(nvlist_t *nvl, const char *name)
+{
+ uint64_t rv;
+ VERIFY3U(nvlist_lookup_uint64(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+char *
+fnvlist_lookup_string(nvlist_t *nvl, const char *name)
+{
+ char *rv;
+ VERIFY3U(nvlist_lookup_string(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+nvlist_t *
+fnvlist_lookup_nvlist(nvlist_t *nvl, const char *name)
+{
+ nvlist_t *rv;
+ VERIFY3U(nvlist_lookup_nvlist(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+boolean_t
+fnvpair_value_boolean_value(nvpair_t *nvp)
+{
+ boolean_t rv;
+ VERIFY3U(nvpair_value_boolean_value(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+uchar_t
+fnvpair_value_byte(nvpair_t *nvp)
+{
+ uchar_t rv;
+ VERIFY3U(nvpair_value_byte(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+int8_t
+fnvpair_value_int8(nvpair_t *nvp)
+{
+ int8_t rv;
+ VERIFY3U(nvpair_value_int8(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+int16_t
+fnvpair_value_int16(nvpair_t *nvp)
+{
+ int16_t rv;
+ VERIFY3U(nvpair_value_int16(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+int32_t
+fnvpair_value_int32(nvpair_t *nvp)
+{
+ int32_t rv;
+ VERIFY3U(nvpair_value_int32(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+int64_t
+fnvpair_value_int64(nvpair_t *nvp)
+{
+ int64_t rv;
+ VERIFY3U(nvpair_value_int64(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+uint8_t
+fnvpair_value_uint8_t(nvpair_t *nvp)
+{
+ uint8_t rv;
+ VERIFY3U(nvpair_value_uint8(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+uint16_t
+fnvpair_value_uint16(nvpair_t *nvp)
+{
+ uint16_t rv;
+ VERIFY3U(nvpair_value_uint16(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+uint32_t
+fnvpair_value_uint32(nvpair_t *nvp)
+{
+ uint32_t rv;
+ VERIFY3U(nvpair_value_uint32(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+uint64_t
+fnvpair_value_uint64(nvpair_t *nvp)
+{
+ uint64_t rv;
+ VERIFY3U(nvpair_value_uint64(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+char *
+fnvpair_value_string(nvpair_t *nvp)
+{
+ char *rv;
+ VERIFY3U(nvpair_value_string(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+nvlist_t *
+fnvpair_value_nvlist(nvpair_t *nvp)
+{
+ nvlist_t *rv;
+ VERIFY3U(nvpair_value_nvlist(nvp, &rv), ==, 0);
+ return (rv);
+}
diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c b/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c
new file mode 100644
index 0000000..6dadd99
--- /dev/null
+++ b/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c
@@ -0,0 +1,155 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#ifdef _KERNEL
+#include <sys/systm.h>
+#else
+#include <errno.h>
+#include <string.h>
+#endif
+#include <sys/debug.h>
+#include <sys/fs/zfs.h>
+#include <sys/types.h>
+#include "zfeature_common.h"
+
+/*
+ * Set to disable all feature checks while opening pools, allowing pools with
+ * unsupported features to be opened. Set for testing only.
+ */
+boolean_t zfeature_checks_disable = B_FALSE;
+
+zfeature_info_t spa_feature_table[SPA_FEATURES];
+
+/*
+ * Valid characters for feature guids. This list is mainly for aesthetic
+ * purposes and could be expanded in the future. There are different allowed
+ * characters in the guids reverse dns portion (before the colon) and its
+ * short name (after the colon).
+ */
+static int
+valid_char(char c, boolean_t after_colon)
+{
+ return ((c >= 'a' && c <= 'z') ||
+ (c >= '0' && c <= '9') ||
+ c == (after_colon ? '_' : '.'));
+}
+
+/*
+ * Every feature guid must contain exactly one colon which separates a reverse
+ * dns organization name from the feature's "short" name (e.g.
+ * "com.company:feature_name").
+ */
+boolean_t
+zfeature_is_valid_guid(const char *name)
+{
+ int i;
+ boolean_t has_colon = B_FALSE;
+
+ i = 0;
+ while (name[i] != '\0') {
+ char c = name[i++];
+ if (c == ':') {
+ if (has_colon)
+ return (B_FALSE);
+ has_colon = B_TRUE;
+ continue;
+ }
+ if (!valid_char(c, has_colon))
+ return (B_FALSE);
+ }
+
+ return (has_colon);
+}
+
+boolean_t
+zfeature_is_supported(const char *guid)
+{
+ if (zfeature_checks_disable)
+ return (B_TRUE);
+
+ return (0 == zfeature_lookup_guid(guid, NULL));
+}
+
+int
+zfeature_lookup_guid(const char *guid, zfeature_info_t **res)
+{
+ for (int i = 0; i < SPA_FEATURES; i++) {
+ zfeature_info_t *feature = &spa_feature_table[i];
+ if (strcmp(guid, feature->fi_guid) == 0) {
+ if (res != NULL)
+ *res = feature;
+ return (0);
+ }
+ }
+
+ return (ENOENT);
+}
+
+int
+zfeature_lookup_name(const char *name, zfeature_info_t **res)
+{
+ for (int i = 0; i < SPA_FEATURES; i++) {
+ zfeature_info_t *feature = &spa_feature_table[i];
+ if (strcmp(name, feature->fi_uname) == 0) {
+ if (res != NULL)
+ *res = feature;
+ return (0);
+ }
+ }
+
+ return (ENOENT);
+}
+
+static void
+zfeature_register(int fid, const char *guid, const char *name, const char *desc,
+ boolean_t readonly, boolean_t mos, zfeature_info_t **deps)
+{
+ zfeature_info_t *feature = &spa_feature_table[fid];
+ static zfeature_info_t *nodeps[] = { NULL };
+
+ ASSERT(name != NULL);
+ ASSERT(desc != NULL);
+ ASSERT(!readonly || !mos);
+ ASSERT3U(fid, <, SPA_FEATURES);
+ ASSERT(zfeature_is_valid_guid(guid));
+
+ if (deps == NULL)
+ deps = nodeps;
+
+ feature->fi_guid = guid;
+ feature->fi_uname = name;
+ feature->fi_desc = desc;
+ feature->fi_can_readonly = readonly;
+ feature->fi_mos = mos;
+ feature->fi_depends = deps;
+}
+
+void
+zpool_feature_init(void)
+{
+ zfeature_register(SPA_FEATURE_ASYNC_DESTROY,
+ "com.delphix:async_destroy", "async_destroy",
+ "Destroy filesystems asynchronously.", B_TRUE, B_FALSE, NULL);
+}
diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h b/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h
new file mode 100644
index 0000000..b57eb43
--- /dev/null
+++ b/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h
@@ -0,0 +1,70 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#ifndef _ZFEATURE_COMMON_H
+#define _ZFEATURE_COMMON_H
+
+#include <sys/fs/zfs.h>
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct zfeature_info;
+
+typedef struct zfeature_info {
+ const char *fi_uname; /* User-facing feature name */
+ const char *fi_guid; /* On-disk feature identifier */
+ const char *fi_desc; /* Feature description */
+ boolean_t fi_can_readonly; /* Can open pool readonly w/o support? */
+ boolean_t fi_mos; /* Is the feature necessary to read the MOS? */
+ struct zfeature_info **fi_depends; /* array; null terminated */
+} zfeature_info_t;
+
+typedef int (zfeature_func_t)(zfeature_info_t *fi, void *arg);
+
+#define ZFS_FEATURE_DEBUG
+
+static enum spa_feature {
+ SPA_FEATURE_ASYNC_DESTROY,
+ SPA_FEATURES
+} spa_feature_t;
+
+extern zfeature_info_t spa_feature_table[SPA_FEATURES];
+
+extern boolean_t zfeature_is_valid_guid(const char *);
+
+extern boolean_t zfeature_is_supported(const char *);
+extern int zfeature_lookup_guid(const char *, zfeature_info_t **res);
+extern int zfeature_lookup_name(const char *, zfeature_info_t **res);
+
+extern void zpool_feature_init(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZFEATURE_COMMON_H */
diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c b/sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c
index 512e067..72db879 100644
--- a/sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c
+++ b/sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c
@@ -79,6 +79,8 @@ zpool_prop_init(void)
ZFS_TYPE_POOL, "<size>", "SIZE");
zprop_register_number(ZPOOL_PROP_FREE, "free", 0, PROP_READONLY,
ZFS_TYPE_POOL, "<size>", "FREE");
+ zprop_register_number(ZPOOL_PROP_FREEING, "freeing", 0, PROP_READONLY,
+ ZFS_TYPE_POOL, "<size>", "FREEING");
zprop_register_number(ZPOOL_PROP_ALLOCATED, "allocated", 0,
PROP_READONLY, ZFS_TYPE_POOL, "<size>", "ALLOC");
zprop_register_number(ZPOOL_PROP_EXPANDSZ, "expandsize", 0,
@@ -166,6 +168,26 @@ zpool_prop_default_numeric(zpool_prop_t prop)
return (zpool_prop_table[prop].pd_numdefault);
}
+/*
+ * Returns true if this is a valid feature@ property.
+ */
+boolean_t
+zpool_prop_feature(const char *name)
+{
+ static const char *prefix = "feature@";
+ return (strncmp(name, prefix, strlen(prefix)) == 0);
+}
+
+/*
+ * Returns true if this is a valid unsupported@ property.
+ */
+boolean_t
+zpool_prop_unsupported(const char *name)
+{
+ static const char *prefix = "unsupported@";
+ return (strncmp(name, prefix, strlen(prefix)) == 0);
+}
+
int
zpool_prop_string_to_index(zpool_prop_t prop, const char *string,
uint64_t *index)
diff --git a/sys/cddl/contrib/opensolaris/uts/common/Makefile.files b/sys/cddl/contrib/opensolaris/uts/common/Makefile.files
index 2ab1d7b..30d4f79 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/Makefile.files
+++ b/sys/cddl/contrib/opensolaris/uts/common/Makefile.files
@@ -21,6 +21,7 @@
#
# Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2012 by Delphix. All rights reserved.
#
#
# This Makefile defines all file modules for the directory uts/common
@@ -31,6 +32,7 @@ ZFS_COMMON_OBJS += \
arc.o \
bplist.o \
bpobj.o \
+ bptree.o \
dbuf.o \
ddt.o \
ddt_zap.o \
@@ -52,6 +54,7 @@ ZFS_COMMON_OBJS += \
dsl_deleg.o \
dsl_prop.o \
dsl_scan.o \
+ zfeature.o \
gzip.o \
lzjb.o \
metaslab.o \
@@ -94,11 +97,12 @@ ZFS_COMMON_OBJS += \
zrlock.o
ZFS_SHARED_OBJS += \
- zfs_namecheck.o \
- zfs_deleg.o \
- zfs_prop.o \
+ zfeature_common.o \
zfs_comutil.o \
+ zfs_deleg.o \
zfs_fletcher.o \
+ zfs_namecheck.o \
+ zfs_prop.o \
zpool_prop.o \
zprop_common.o
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
index cb81392..9dd4e07 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
@@ -2794,9 +2794,11 @@ arc_read_done(zio_t *zio)
callback_list = hdr->b_acb;
ASSERT(callback_list != NULL);
if (BP_SHOULD_BYTESWAP(zio->io_bp) && zio->io_error == 0) {
+ dmu_object_byteswap_t bswap =
+ DMU_OT_BYTESWAP(BP_GET_TYPE(zio->io_bp));
arc_byteswap_func_t *func = BP_GET_LEVEL(zio->io_bp) > 0 ?
byteswap_uint64_array :
- dmu_ot[BP_GET_TYPE(zio->io_bp)].ot_byteswap;
+ dmu_ot_byteswap[bswap].ob_func;
func(buf->b_data, hdr->b_size);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c
new file mode 100644
index 0000000..8c5a7d4
--- /dev/null
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c
@@ -0,0 +1,224 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#include <sys/arc.h>
+#include <sys/bptree.h>
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/dmu_tx.h>
+#include <sys/dmu_traverse.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_dir.h>
+#include <sys/dsl_pool.h>
+#include <sys/dnode.h>
+#include <sys/refcount.h>
+#include <sys/spa.h>
+
+/*
+ * A bptree is a queue of root block pointers from destroyed datasets. When a
+ * dataset is destroyed its root block pointer is put on the end of the pool's
+ * bptree queue so the dataset's blocks can be freed asynchronously by
+ * dsl_scan_sync. This allows the delete operation to finish without traversing
+ * all the dataset's blocks.
+ *
+ * Note that while bt_begin and bt_end are only ever incremented in this code
+ * they are effectively reset to 0 every time the entire bptree is freed because
+ * the bptree's object is destroyed and re-created.
+ */
+
+struct bptree_args {
+ bptree_phys_t *ba_phys; /* data in bonus buffer, dirtied if freeing */
+ boolean_t ba_free; /* true if freeing during traversal */
+
+ bptree_itor_t *ba_func; /* function to call for each blockpointer */
+ void *ba_arg; /* caller supplied argument to ba_func */
+ dmu_tx_t *ba_tx; /* caller supplied tx, NULL if not freeing */
+} bptree_args_t;
+
+uint64_t
+bptree_alloc(objset_t *os, dmu_tx_t *tx)
+{
+ uint64_t obj;
+ dmu_buf_t *db;
+ bptree_phys_t *bt;
+
+ obj = dmu_object_alloc(os, DMU_OTN_UINT64_METADATA,
+ SPA_MAXBLOCKSIZE, DMU_OTN_UINT64_METADATA,
+ sizeof (bptree_phys_t), tx);
+
+ /*
+ * Bonus buffer contents are already initialized to 0, but for
+ * readability we make it explicit.
+ */
+ VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
+ dmu_buf_will_dirty(db, tx);
+ bt = db->db_data;
+ bt->bt_begin = 0;
+ bt->bt_end = 0;
+ bt->bt_bytes = 0;
+ bt->bt_comp = 0;
+ bt->bt_uncomp = 0;
+ dmu_buf_rele(db, FTAG);
+
+ return (obj);
+}
+
+int
+bptree_free(objset_t *os, uint64_t obj, dmu_tx_t *tx)
+{
+ dmu_buf_t *db;
+ bptree_phys_t *bt;
+
+ VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
+ bt = db->db_data;
+ ASSERT3U(bt->bt_begin, ==, bt->bt_end);
+ ASSERT3U(bt->bt_bytes, ==, 0);
+ ASSERT3U(bt->bt_comp, ==, 0);
+ ASSERT3U(bt->bt_uncomp, ==, 0);
+ dmu_buf_rele(db, FTAG);
+
+ return (dmu_object_free(os, obj, tx));
+}
+
+void
+bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg,
+ uint64_t bytes, uint64_t comp, uint64_t uncomp, dmu_tx_t *tx)
+{
+ dmu_buf_t *db;
+ bptree_phys_t *bt;
+ bptree_entry_phys_t bte;
+
+ /*
+ * bptree objects are in the pool mos, therefore they can only be
+ * modified in syncing context. Furthermore, this is only modified
+ * by the sync thread, so no locking is necessary.
+ */
+ ASSERT(dmu_tx_is_syncing(tx));
+
+ VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
+ bt = db->db_data;
+
+ bte.be_birth_txg = birth_txg;
+ bte.be_bp = *bp;
+ bzero(&bte.be_zb, sizeof (bte.be_zb));
+ dmu_write(os, obj, bt->bt_end * sizeof (bte), sizeof (bte), &bte, tx);
+
+ dmu_buf_will_dirty(db, tx);
+ bt->bt_end++;
+ bt->bt_bytes += bytes;
+ bt->bt_comp += comp;
+ bt->bt_uncomp += uncomp;
+ dmu_buf_rele(db, FTAG);
+}
+
+/* ARGSUSED */
+static int
+bptree_visit_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
+ const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
+{
+ int err;
+ struct bptree_args *ba = arg;
+
+ if (bp == NULL)
+ return (0);
+
+ err = ba->ba_func(ba->ba_arg, bp, ba->ba_tx);
+ if (err == 0 && ba->ba_free) {
+ ba->ba_phys->bt_bytes -= bp_get_dsize_sync(spa, bp);
+ ba->ba_phys->bt_comp -= BP_GET_PSIZE(bp);
+ ba->ba_phys->bt_uncomp -= BP_GET_UCSIZE(bp);
+ }
+ return (err);
+}
+
+int
+bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
+ void *arg, dmu_tx_t *tx)
+{
+ int err;
+ uint64_t i;
+ dmu_buf_t *db;
+ struct bptree_args ba;
+
+ ASSERT(!free || dmu_tx_is_syncing(tx));
+
+ err = dmu_bonus_hold(os, obj, FTAG, &db);
+ if (err != 0)
+ return (err);
+
+ if (free)
+ dmu_buf_will_dirty(db, tx);
+
+ ba.ba_phys = db->db_data;
+ ba.ba_free = free;
+ ba.ba_func = func;
+ ba.ba_arg = arg;
+ ba.ba_tx = tx;
+
+ err = 0;
+ for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) {
+ bptree_entry_phys_t bte;
+
+ ASSERT(!free || i == ba.ba_phys->bt_begin);
+
+ err = dmu_read(os, obj, i * sizeof (bte), sizeof (bte),
+ &bte, DMU_READ_NO_PREFETCH);
+ if (err != 0)
+ break;
+
+ err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp,
+ bte.be_birth_txg, &bte.be_zb, TRAVERSE_POST,
+ bptree_visit_cb, &ba);
+ if (free) {
+ ASSERT(err == 0 || err == ERESTART);
+ if (err != 0) {
+ /* save bookmark for future resume */
+ ASSERT3U(bte.be_zb.zb_objset, ==,
+ ZB_DESTROYED_OBJSET);
+ ASSERT3U(bte.be_zb.zb_level, ==, 0);
+ dmu_write(os, obj, i * sizeof (bte),
+ sizeof (bte), &bte, tx);
+ break;
+ } else {
+ ba.ba_phys->bt_begin++;
+ (void) dmu_free_range(os, obj,
+ i * sizeof (bte), sizeof (bte), tx);
+ }
+ }
+ }
+
+ ASSERT(!free || err != 0 || ba.ba_phys->bt_begin == ba.ba_phys->bt_end);
+
+ /* if all blocks are free there should be no used space */
+ if (ba.ba_phys->bt_begin == ba.ba_phys->bt_end) {
+ ASSERT3U(ba.ba_phys->bt_bytes, ==, 0);
+ ASSERT3U(ba.ba_phys->bt_comp, ==, 0);
+ ASSERT3U(ba.ba_phys->bt_uncomp, ==, 0);
+ }
+
+ dmu_buf_rele(db, FTAG);
+
+ return (err);
+}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
index dc9f482..b75ee44 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -227,7 +228,7 @@ dbuf_is_metadata(dmu_buf_impl_t *db)
boolean_t is_metadata;
DB_DNODE_ENTER(db);
- is_metadata = dmu_ot[DB_DNODE(db)->dn_type].ot_metadata;
+ is_metadata = DMU_OT_IS_METADATA(DB_DNODE(db)->dn_type);
DB_DNODE_EXIT(db);
return (is_metadata);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c
index 0edf62e..ef3d0f4 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -1067,11 +1068,9 @@ ddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg)
ASSERT(spa->spa_uberblock.ub_version >= SPA_VERSION_DEDUP);
if (spa->spa_ddt_stat_object == 0) {
- spa->spa_ddt_stat_object = zap_create(ddt->ddt_os,
- DMU_OT_DDT_STATS, DMU_OT_NONE, 0, tx);
- VERIFY(zap_add(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_DDT_STATS, sizeof (uint64_t), 1,
- &spa->spa_ddt_stat_object, tx) == 0);
+ spa->spa_ddt_stat_object = zap_create_link(ddt->ddt_os,
+ DMU_OT_DDT_STATS, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_DDT_STATS, tx);
}
while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) {
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
index f7d471f..2204e9d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@@ -45,60 +46,73 @@
#endif
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
- { byteswap_uint8_array, TRUE, "unallocated" },
- { zap_byteswap, TRUE, "object directory" },
- { byteswap_uint64_array, TRUE, "object array" },
- { byteswap_uint8_array, TRUE, "packed nvlist" },
- { byteswap_uint64_array, TRUE, "packed nvlist size" },
- { byteswap_uint64_array, TRUE, "bpobj" },
- { byteswap_uint64_array, TRUE, "bpobj header" },
- { byteswap_uint64_array, TRUE, "SPA space map header" },
- { byteswap_uint64_array, TRUE, "SPA space map" },
- { byteswap_uint64_array, TRUE, "ZIL intent log" },
- { dnode_buf_byteswap, TRUE, "DMU dnode" },
- { dmu_objset_byteswap, TRUE, "DMU objset" },
- { byteswap_uint64_array, TRUE, "DSL directory" },
- { zap_byteswap, TRUE, "DSL directory child map"},
- { zap_byteswap, TRUE, "DSL dataset snap map" },
- { zap_byteswap, TRUE, "DSL props" },
- { byteswap_uint64_array, TRUE, "DSL dataset" },
- { zfs_znode_byteswap, TRUE, "ZFS znode" },
- { zfs_oldacl_byteswap, TRUE, "ZFS V0 ACL" },
- { byteswap_uint8_array, FALSE, "ZFS plain file" },
- { zap_byteswap, TRUE, "ZFS directory" },
- { zap_byteswap, TRUE, "ZFS master node" },
- { zap_byteswap, TRUE, "ZFS delete queue" },
- { byteswap_uint8_array, FALSE, "zvol object" },
- { zap_byteswap, TRUE, "zvol prop" },
- { byteswap_uint8_array, FALSE, "other uint8[]" },
- { byteswap_uint64_array, FALSE, "other uint64[]" },
- { zap_byteswap, TRUE, "other ZAP" },
- { zap_byteswap, TRUE, "persistent error log" },
- { byteswap_uint8_array, TRUE, "SPA history" },
- { byteswap_uint64_array, TRUE, "SPA history offsets" },
- { zap_byteswap, TRUE, "Pool properties" },
- { zap_byteswap, TRUE, "DSL permissions" },
- { zfs_acl_byteswap, TRUE, "ZFS ACL" },
- { byteswap_uint8_array, TRUE, "ZFS SYSACL" },
- { byteswap_uint8_array, TRUE, "FUID table" },
- { byteswap_uint64_array, TRUE, "FUID table size" },
- { zap_byteswap, TRUE, "DSL dataset next clones"},
- { zap_byteswap, TRUE, "scan work queue" },
- { zap_byteswap, TRUE, "ZFS user/group used" },
- { zap_byteswap, TRUE, "ZFS user/group quota" },
- { zap_byteswap, TRUE, "snapshot refcount tags"},
- { zap_byteswap, TRUE, "DDT ZAP algorithm" },
- { zap_byteswap, TRUE, "DDT statistics" },
- { byteswap_uint8_array, TRUE, "System attributes" },
- { zap_byteswap, TRUE, "SA master node" },
- { zap_byteswap, TRUE, "SA attr registration" },
- { zap_byteswap, TRUE, "SA attr layouts" },
- { zap_byteswap, TRUE, "scan translations" },
- { byteswap_uint8_array, FALSE, "deduplicated block" },
- { zap_byteswap, TRUE, "DSL deadlist map" },
- { byteswap_uint64_array, TRUE, "DSL deadlist map hdr" },
- { zap_byteswap, TRUE, "DSL dir clones" },
- { byteswap_uint64_array, TRUE, "bpobj subobj" },
+ { DMU_BSWAP_UINT8, TRUE, "unallocated" },
+ { DMU_BSWAP_ZAP, TRUE, "object directory" },
+ { DMU_BSWAP_UINT64, TRUE, "object array" },
+ { DMU_BSWAP_UINT8, TRUE, "packed nvlist" },
+ { DMU_BSWAP_UINT64, TRUE, "packed nvlist size" },
+ { DMU_BSWAP_UINT64, TRUE, "bpobj" },
+ { DMU_BSWAP_UINT64, TRUE, "bpobj header" },
+ { DMU_BSWAP_UINT64, TRUE, "SPA space map header" },
+ { DMU_BSWAP_UINT64, TRUE, "SPA space map" },
+ { DMU_BSWAP_UINT64, TRUE, "ZIL intent log" },
+ { DMU_BSWAP_DNODE, TRUE, "DMU dnode" },
+ { DMU_BSWAP_OBJSET, TRUE, "DMU objset" },
+ { DMU_BSWAP_UINT64, TRUE, "DSL directory" },
+ { DMU_BSWAP_ZAP, TRUE, "DSL directory child map"},
+ { DMU_BSWAP_ZAP, TRUE, "DSL dataset snap map" },
+ { DMU_BSWAP_ZAP, TRUE, "DSL props" },
+ { DMU_BSWAP_UINT64, TRUE, "DSL dataset" },
+ { DMU_BSWAP_ZNODE, TRUE, "ZFS znode" },
+ { DMU_BSWAP_OLDACL, TRUE, "ZFS V0 ACL" },
+ { DMU_BSWAP_UINT8, FALSE, "ZFS plain file" },
+ { DMU_BSWAP_ZAP, TRUE, "ZFS directory" },
+ { DMU_BSWAP_ZAP, TRUE, "ZFS master node" },
+ { DMU_BSWAP_ZAP, TRUE, "ZFS delete queue" },
+ { DMU_BSWAP_UINT8, FALSE, "zvol object" },
+ { DMU_BSWAP_ZAP, TRUE, "zvol prop" },
+ { DMU_BSWAP_UINT8, FALSE, "other uint8[]" },
+ { DMU_BSWAP_UINT64, FALSE, "other uint64[]" },
+ { DMU_BSWAP_ZAP, TRUE, "other ZAP" },
+ { DMU_BSWAP_ZAP, TRUE, "persistent error log" },
+ { DMU_BSWAP_UINT8, TRUE, "SPA history" },
+ { DMU_BSWAP_UINT64, TRUE, "SPA history offsets" },
+ { DMU_BSWAP_ZAP, TRUE, "Pool properties" },
+ { DMU_BSWAP_ZAP, TRUE, "DSL permissions" },
+ { DMU_BSWAP_ACL, TRUE, "ZFS ACL" },
+ { DMU_BSWAP_UINT8, TRUE, "ZFS SYSACL" },
+ { DMU_BSWAP_UINT8, TRUE, "FUID table" },
+ { DMU_BSWAP_UINT64, TRUE, "FUID table size" },
+ { DMU_BSWAP_ZAP, TRUE, "DSL dataset next clones"},
+ { DMU_BSWAP_ZAP, TRUE, "scan work queue" },
+ { DMU_BSWAP_ZAP, TRUE, "ZFS user/group used" },
+ { DMU_BSWAP_ZAP, TRUE, "ZFS user/group quota" },
+ { DMU_BSWAP_ZAP, TRUE, "snapshot refcount tags"},
+ { DMU_BSWAP_ZAP, TRUE, "DDT ZAP algorithm" },
+ { DMU_BSWAP_ZAP, TRUE, "DDT statistics" },
+ { DMU_BSWAP_UINT8, TRUE, "System attributes" },
+ { DMU_BSWAP_ZAP, TRUE, "SA master node" },
+ { DMU_BSWAP_ZAP, TRUE, "SA attr registration" },
+ { DMU_BSWAP_ZAP, TRUE, "SA attr layouts" },
+ { DMU_BSWAP_ZAP, TRUE, "scan translations" },
+ { DMU_BSWAP_UINT8, FALSE, "deduplicated block" },
+ { DMU_BSWAP_ZAP, TRUE, "DSL deadlist map" },
+ { DMU_BSWAP_UINT64, TRUE, "DSL deadlist map hdr" },
+ { DMU_BSWAP_ZAP, TRUE, "DSL dir clones" },
+ { DMU_BSWAP_UINT64, TRUE, "bpobj subobj" }
+};
+
+const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
+ { byteswap_uint8_array, "uint8" },
+ { byteswap_uint16_array, "uint16" },
+ { byteswap_uint32_array, "uint32" },
+ { byteswap_uint64_array, "uint64" },
+ { zap_byteswap, "zap" },
+ { dnode_buf_byteswap, "dnode" },
+ { dmu_objset_byteswap, "objset" },
+ { zfs_znode_byteswap, "znode" },
+ { zfs_oldacl_byteswap, "oldacl" },
+ { zfs_acl_byteswap, "acl" }
};
int
@@ -175,7 +189,7 @@ dmu_set_bonustype(dmu_buf_t *db_fake, dmu_object_type_t type, dmu_tx_t *tx)
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
- if (type > DMU_OT_NUMTYPES) {
+ if (!DMU_OT_IS_VALID(type)) {
error = EINVAL;
} else if (dn->dn_bonus != db) {
error = EINVAL;
@@ -1513,7 +1527,7 @@ void
dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
{
dmu_object_type_t type = dn ? dn->dn_type : DMU_OT_OBJSET;
- boolean_t ismd = (level > 0 || dmu_ot[type].ot_metadata ||
+ boolean_t ismd = (level > 0 || DMU_OT_IS_METADATA(type) ||
(wp & WP_SPILL));
enum zio_checksum checksum = os->os_checksum;
enum zio_compress compress = os->os_compress;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
index baed037..81fd9f2 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
@@ -20,11 +20,8 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
- */
-/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. All rights reserved.
*/
@@ -1117,8 +1114,8 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
void *data = NULL;
if (drro->drr_type == DMU_OT_NONE ||
- drro->drr_type >= DMU_OT_NUMTYPES ||
- drro->drr_bonustype >= DMU_OT_NUMTYPES ||
+ !DMU_OT_IS_VALID(drro->drr_type) ||
+ !DMU_OT_IS_VALID(drro->drr_bonustype) ||
drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS ||
drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS ||
P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
@@ -1183,7 +1180,9 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
bcopy(data, db->db_data, drro->drr_bonuslen);
if (ra->byteswap) {
- dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data,
+ dmu_object_byteswap_t byteswap =
+ DMU_OT_BYTESWAP(drro->drr_bonustype);
+ dmu_ot_byteswap[byteswap].ob_func(db->db_data,
drro->drr_bonuslen);
}
dmu_buf_rele(db, FTAG);
@@ -1226,7 +1225,7 @@ restore_write(struct restorearg *ra, objset_t *os,
int err;
if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
- drrw->drr_type >= DMU_OT_NUMTYPES)
+ !DMU_OT_IS_VALID(drrw->drr_type))
return (EINVAL);
data = restore_read(ra, drrw->drr_length);
@@ -1245,8 +1244,11 @@ restore_write(struct restorearg *ra, objset_t *os,
dmu_tx_abort(tx);
return (err);
}
- if (ra->byteswap)
- dmu_ot[drrw->drr_type].ot_byteswap(data, drrw->drr_length);
+ if (ra->byteswap) {
+ dmu_object_byteswap_t byteswap =
+ DMU_OT_BYTESWAP(drrw->drr_type);
+ dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length);
+ }
dmu_write(os, drrw->drr_object,
drrw->drr_offset, drrw->drr_length, data, tx);
dmu_tx_commit(tx);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
index 023f90e..bfe9e65 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -53,6 +54,7 @@ typedef struct traverse_data {
uint64_t td_objset;
blkptr_t *td_rootbp;
uint64_t td_min_txg;
+ zbookmark_t *td_resume;
int td_flags;
prefetch_data_t *td_pfd;
blkptr_cb_t *td_func;
@@ -128,6 +130,54 @@ traverse_zil(traverse_data_t *td, zil_header_t *zh)
zil_free(zilog);
}
+typedef enum resume_skip {
+ RESUME_SKIP_ALL,
+ RESUME_SKIP_NONE,
+ RESUME_SKIP_CHILDREN
+} resume_skip_t;
+
+/*
+ * Returns RESUME_SKIP_ALL if td indicates that we are resuming a traversal and
+ * the block indicated by zb does not need to be visited at all. Returns
+ * RESUME_SKIP_CHILDREN if we are resuming a post traversal and we reach the
+ * resume point. This indicates that this block should be visited but not its
+ * children (since they must have been visited in a previous traversal).
+ * Otherwise returns RESUME_SKIP_NONE.
+ */
+static resume_skip_t
+resume_skip_check(traverse_data_t *td, const dnode_phys_t *dnp,
+ const zbookmark_t *zb)
+{
+ if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume)) {
+ /*
+ * If we already visited this bp & everything below,
+ * don't bother doing it again.
+ */
+ if (zbookmark_is_before(dnp, zb, td->td_resume))
+ return (RESUME_SKIP_ALL);
+
+ /*
+ * If we found the block we're trying to resume from, zero
+ * the bookmark out to indicate that we have resumed.
+ */
+ ASSERT3U(zb->zb_object, <=, td->td_resume->zb_object);
+ if (bcmp(zb, td->td_resume, sizeof (*zb)) == 0) {
+ bzero(td->td_resume, sizeof (*zb));
+ if (td->td_flags & TRAVERSE_POST)
+ return (RESUME_SKIP_CHILDREN);
+ }
+ }
+ return (RESUME_SKIP_NONE);
+}
+
+static void
+traverse_pause(traverse_data_t *td, const zbookmark_t *zb)
+{
+ ASSERT(td->td_resume != NULL);
+ ASSERT3U(zb->zb_level, ==, 0);
+ bcopy(zb, td->td_resume, sizeof (*td->td_resume));
+}
+
static int
traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb)
@@ -137,8 +187,20 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
arc_buf_t *buf = NULL;
prefetch_data_t *pd = td->td_pfd;
boolean_t hard = td->td_flags & TRAVERSE_HARD;
+ boolean_t pause = B_FALSE;
+
+ switch (resume_skip_check(td, dnp, zb)) {
+ case RESUME_SKIP_ALL:
+ return (0);
+ case RESUME_SKIP_CHILDREN:
+ goto post;
+ case RESUME_SKIP_NONE:
+ break;
+ default:
+ ASSERT(0);
+ }
- if (bp->blk_birth == 0) {
+ if (BP_IS_HOLE(bp)) {
err = td->td_func(td->td_spa, NULL, NULL, pbuf, zb, dnp,
td->td_arg);
return (err);
@@ -164,8 +226,10 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
td->td_arg);
if (err == TRAVERSE_VISIT_NO_CHILDREN)
return (0);
- if (err)
- return (err);
+ if (err == ERESTART)
+ pause = B_TRUE; /* handle pausing at a common point */
+ if (err != 0)
+ goto post;
}
if (BP_GET_LEVEL(bp) > 0) {
@@ -253,9 +317,18 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
if (buf)
(void) arc_buf_remove_ref(buf, &buf);
+post:
if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) {
err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
td->td_arg);
+ if (err == ERESTART)
+ pause = B_TRUE;
+ }
+
+ if (pause && td->td_resume != NULL) {
+ ASSERT3U(err, ==, ERESTART);
+ ASSERT(!hard);
+ traverse_pause(td, zb);
}
return (err != 0 ? err : lasterr);
@@ -353,18 +426,23 @@ traverse_prefetch_thread(void *arg)
* in syncing context).
*/
static int
-traverse_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *rootbp,
- uint64_t txg_start, int flags, blkptr_cb_t func, void *arg)
+traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
+ uint64_t txg_start, zbookmark_t *resume, int flags,
+ blkptr_cb_t func, void *arg)
{
traverse_data_t td;
prefetch_data_t pd = { 0 };
zbookmark_t czb;
int err;
+ ASSERT(ds == NULL || objset == ds->ds_object);
+ ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST));
+
td.td_spa = spa;
- td.td_objset = ds ? ds->ds_object : 0;
+ td.td_objset = objset;
td.td_rootbp = rootbp;
td.td_min_txg = txg_start;
+ td.td_resume = resume;
td.td_func = func;
td.td_arg = arg;
td.td_pfd = &pd;
@@ -416,8 +494,17 @@ int
traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags,
blkptr_cb_t func, void *arg)
{
- return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds,
- &ds->ds_phys->ds_bp, txg_start, flags, func, arg));
+ return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object,
+ &ds->ds_phys->ds_bp, txg_start, NULL, flags, func, arg));
+}
+
+int
+traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr,
+ uint64_t txg_start, zbookmark_t *resume, int flags,
+ blkptr_cb_t func, void *arg)
+{
+ return (traverse_impl(spa, NULL, ZB_DESTROYED_OBJSET,
+ blkptr, txg_start, resume, flags, func, arg));
}
/*
@@ -434,8 +521,8 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
boolean_t hard = (flags & TRAVERSE_HARD);
/* visit the MOS */
- err = traverse_impl(spa, NULL, spa_get_rootblkptr(spa),
- txg_start, flags, func, arg);
+ err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa),
+ txg_start, NULL, flags, func, arg);
if (err)
return (err);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
index b4579e2..3dd5f2c 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
@@ -20,9 +20,8 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@@ -676,7 +675,7 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
return;
}
- ASSERT3P(dmu_ot[dn->dn_type].ot_byteswap, ==, zap_byteswap);
+ ASSERT3P(DMU_OT_BYTESWAP(dn->dn_type), ==, DMU_BSWAP_ZAP);
if (dn->dn_maxblkid == 0 && !add) {
blkptr_t *bp;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
index ca2b69a..f098e11 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -196,7 +197,7 @@ dnode_verify(dnode_t *dn)
ASSERT(dn->dn_objset);
ASSERT(dn->dn_handle->dnh_dnode == dn);
- ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES);
+ ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
if (!(zfs_flags & ZFS_DEBUG_DNODE_VERIFY))
return;
@@ -215,7 +216,7 @@ dnode_verify(dnode_t *dn)
ASSERT3U(1<<dn->dn_datablkshift, ==, dn->dn_datablksz);
}
ASSERT3U(dn->dn_nlevels, <=, 30);
- ASSERT3U(dn->dn_type, <=, DMU_OT_NUMTYPES);
+ ASSERT(DMU_OT_IS_VALID(dn->dn_type));
ASSERT3U(dn->dn_nblkptr, >=, 1);
ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR);
ASSERT3U(dn->dn_bonuslen, <=, DN_MAX_BONUSLEN);
@@ -281,8 +282,10 @@ dnode_byteswap(dnode_phys_t *dnp)
*/
int off = (dnp->dn_nblkptr-1) * sizeof (blkptr_t);
size_t len = DN_MAX_BONUSLEN - off;
- ASSERT3U(dnp->dn_bonustype, <, DMU_OT_NUMTYPES);
- dmu_ot[dnp->dn_bonustype].ot_byteswap(dnp->dn_bonus + off, len);
+ ASSERT(DMU_OT_IS_VALID(dnp->dn_bonustype));
+ dmu_object_byteswap_t byteswap =
+ DMU_OT_BYTESWAP(dnp->dn_bonustype);
+ dmu_ot_byteswap[byteswap].ob_func(dnp->dn_bonus + off, len);
}
/* Swap SPILL block if we have one */
@@ -410,7 +413,7 @@ dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
dmu_zfetch_init(&dn->dn_zfetch, dn);
- ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES);
+ ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
mutex_enter(&os->os_lock);
list_insert_head(&os->os_dnodes, dn);
@@ -499,11 +502,11 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
ASSERT(bcmp(dn->dn_phys, &dnode_phys_zero, sizeof (dnode_phys_t)) == 0);
ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE);
ASSERT(ot != DMU_OT_NONE);
- ASSERT3U(ot, <, DMU_OT_NUMTYPES);
+ ASSERT(DMU_OT_IS_VALID(ot));
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
(bonustype == DMU_OT_SA && bonuslen == 0) ||
(bonustype != DMU_OT_NONE && bonuslen != 0));
- ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
+ ASSERT(DMU_OT_IS_VALID(bonustype));
ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
ASSERT(dn->dn_type == DMU_OT_NONE);
ASSERT3U(dn->dn_maxblkid, ==, 0);
@@ -571,7 +574,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
(bonustype != DMU_OT_NONE && bonuslen != 0) ||
(bonustype == DMU_OT_SA && bonuslen == 0));
- ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
+ ASSERT(DMU_OT_IS_VALID(bonustype));
ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
/* clean up any unreferenced dbufs */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
index 32afe7d..fcabc74 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
@@ -18,8 +18,10 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -597,7 +599,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
}
if (dn->dn_next_bonustype[txgoff]) {
- ASSERT(dn->dn_next_bonustype[txgoff] < DMU_OT_NUMTYPES);
+ ASSERT(DMU_OT_IS_VALID(dn->dn_next_bonustype[txgoff]));
dnp->dn_bonustype = dn->dn_next_bonustype[txgoff];
dn->dn_next_bonustype[txgoff] = 0;
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
index 648f2f3..c445278 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved.
@@ -38,6 +38,7 @@
#include <sys/arc.h>
#include <sys/zio.h>
#include <sys/zap.h>
+#include <sys/zfeature.h>
#include <sys/unique.h>
#include <sys/zfs_context.h>
#include <sys/zfs_ioctl.h>
@@ -103,7 +104,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
if (BP_IS_HOLE(bp))
return;
ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
- ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
+ ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
if (ds == NULL) {
/*
* Account for the meta-objset space in its placeholder
@@ -120,7 +121,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
mutex_enter(&ds->ds_dir->dd_lock);
mutex_enter(&ds->ds_lock);
delta = parent_delta(ds, used);
- ds->ds_phys->ds_used_bytes += used;
+ ds->ds_phys->ds_referenced_bytes += used;
ds->ds_phys->ds_compressed_bytes += compressed;
ds->ds_phys->ds_uncompressed_bytes += uncompressed;
ds->ds_phys->ds_unique_bytes += used;
@@ -214,8 +215,8 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
}
}
mutex_enter(&ds->ds_lock);
- ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
- ds->ds_phys->ds_used_bytes -= used;
+ ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used);
+ ds->ds_phys->ds_referenced_bytes -= used;
ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
ds->ds_phys->ds_compressed_bytes -= compressed;
ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
@@ -827,8 +828,8 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
dsphys->ds_prev_snap_obj = origin->ds_object;
dsphys->ds_prev_snap_txg =
origin->ds_phys->ds_creation_txg;
- dsphys->ds_used_bytes =
- origin->ds_phys->ds_used_bytes;
+ dsphys->ds_referenced_bytes =
+ origin->ds_phys->ds_referenced_bytes;
dsphys->ds_compressed_bytes =
origin->ds_phys->ds_compressed_bytes;
dsphys->ds_uncompressed_bytes =
@@ -981,7 +982,6 @@ dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed)
for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
pair = nvlist_next_nvpair(snaps, pair)) {
dsl_dataset_t *ds;
- int err;
err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds);
if (err == 0) {
@@ -1130,19 +1130,23 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
goto out;
/*
- * remove the objects in open context, so that we won't
- * have too much to do in syncing context.
+ * If async destruction is not enabled try to remove all objects
+ * while in the open context so that there is less work to do in
+ * the syncing context.
*/
- for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
- ds->ds_phys->ds_prev_snap_txg)) {
- /*
- * Ignore errors, if there is not enough disk space
- * we will deal with it in dsl_dataset_destroy_sync().
- */
- (void) dmu_free_object(os, obj);
+ if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
+ &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
+ for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
+ ds->ds_phys->ds_prev_snap_txg)) {
+ /*
+ * Ignore errors, if there is not enough disk space
+ * we will deal with it in dsl_dataset_destroy_sync().
+ */
+ (void) dmu_free_object(os, obj);
+ }
+ if (err != ESRCH)
+ goto out;
}
- if (err != ESRCH)
- goto out;
/*
* Only the ZIL knows how to free log blocks.
@@ -1288,7 +1292,7 @@ dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
ASSERT(!dsl_dataset_is_snapshot(ds));
if (ds->ds_phys->ds_prev_snap_obj != 0)
- mrs_used = ds->ds_prev->ds_phys->ds_used_bytes;
+ mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes;
else
mrs_used = 0;
@@ -1296,7 +1300,7 @@ dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
ASSERT3U(dlused, <=, mrs_used);
ds->ds_phys->ds_unique_bytes =
- ds->ds_phys->ds_used_bytes - (mrs_used - dlused);
+ ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused);
if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
SPA_VERSION_UNIQUE_ACCURATE)
@@ -1655,6 +1659,30 @@ process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
ds_next->ds_phys->ds_deadlist_obj);
}
+static int
+old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+ int err;
+ struct killarg ka;
+
+ /*
+ * Free everything that we point to (that's born after
+ * the previous snapshot, if we are a clone)
+ *
+ * NB: this should be very quick, because we already
+ * freed all the objects in open context.
+ */
+ ka.ds = ds;
+ ka.tx = tx;
+ err = traverse_dataset(ds,
+ ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
+ kill_blkptr, &ka);
+ ASSERT3U(err, ==, 0);
+ ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
+
+ return (err);
+}
+
void
dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
{
@@ -1801,7 +1829,6 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
tx);
dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
DD_USED_HEAD, used, comp, uncomp, tx);
- dsl_dir_dirty(tx->tx_pool->dp_free_dir, tx);
/* Merge our deadlist into next's and free it. */
dsl_deadlist_merge(&ds_next->ds_deadlist,
@@ -1877,32 +1904,54 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
}
dsl_dataset_rele(ds_next, FTAG);
} else {
+ zfeature_info_t *async_destroy =
+ &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
+
/*
* There's no next snapshot, so this is a head dataset.
* Destroy the deadlist. Unless it's a clone, the
* deadlist should be empty. (If it's a clone, it's
* safe to ignore the deadlist contents.)
*/
- struct killarg ka;
-
dsl_deadlist_close(&ds->ds_deadlist);
dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
ds->ds_phys->ds_deadlist_obj = 0;
- /*
- * Free everything that we point to (that's born after
- * the previous snapshot, if we are a clone)
- *
- * NB: this should be very quick, because we already
- * freed all the objects in open context.
- */
- ka.ds = ds;
- ka.tx = tx;
- err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
- TRAVERSE_POST, kill_blkptr, &ka);
- ASSERT3U(err, ==, 0);
- ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
- ds->ds_phys->ds_unique_bytes == 0);
+ if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
+ err = old_synchronous_dataset_destroy(ds, tx);
+ } else {
+ /*
+ * Move the bptree into the pool's list of trees to
+ * clean up and update space accounting information.
+ */
+ uint64_t used, comp, uncomp;
+
+ ASSERT(err == 0 || err == EBUSY);
+ if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
+ spa_feature_incr(dp->dp_spa, async_destroy, tx);
+ dp->dp_bptree_obj = bptree_alloc(
+ dp->dp_meta_objset, tx);
+ VERIFY(zap_add(dp->dp_meta_objset,
+ DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
+ &dp->dp_bptree_obj, tx) == 0);
+ }
+
+ used = ds->ds_dir->dd_phys->dd_used_bytes;
+ comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
+ uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
+
+ ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
+ ds->ds_phys->ds_unique_bytes == used);
+
+ bptree_add(dp->dp_meta_objset, dp->dp_bptree_obj,
+ &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
+ used, comp, uncomp, tx);
+ dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
+ -used, -comp, -uncomp, tx);
+ dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
+ used, comp, uncomp, tx);
+ }
if (ds->ds_prev != NULL) {
if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
@@ -2095,7 +2144,7 @@ dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsphys->ds_creation_time = gethrestime_sec();
dsphys->ds_creation_txg = crtxg;
dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
- dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
+ dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes;
dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
dsphys->ds_flags = ds->ds_phys->ds_flags;
@@ -2219,10 +2268,22 @@ get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
zap_cursor_advance(&zc)) {
dsl_dataset_t *clone;
char buf[ZFS_MAXNAMELEN];
+ /*
+ * Even though we hold the dp_config_rwlock, the dataset
+ * may fail to open, returning ENOENT. If there is a
+ * thread concurrently attempting to destroy this
+ * dataset, it will have the ds_rwlock held for
+ * RW_WRITER. Our call to dsl_dataset_hold_obj() ->
+ * dsl_dataset_hold_ref() will fail its
+ * rw_tryenter(&ds->ds_rwlock, RW_READER), drop the
+ * dp_config_rwlock, and wait for the destroy progress
+ * and signal ds_exclusive_cv. If the destroy was
+ * successful, we will see that
+ * DSL_DATASET_IS_DESTROYED(), and return ENOENT.
+ */
if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
- za.za_first_integer, FTAG, &clone) != 0) {
- goto fail;
- }
+ za.za_first_integer, FTAG, &clone) != 0)
+ continue;
dsl_dir_name(clone->ds_dir, buf);
VERIFY(nvlist_add_boolean(val, buf) == 0);
dsl_dataset_rele(clone, FTAG);
@@ -2345,7 +2406,7 @@ dsl_dataset_space(dsl_dataset_t *ds,
uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp)
{
- *refdbytesp = ds->ds_phys->ds_used_bytes;
+ *refdbytesp = ds->ds_phys->ds_referenced_bytes;
*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
*availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
@@ -2688,7 +2749,7 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
* Note however, if we stop before we reach the ORIGIN we get:
* uN + kN + kN-1 + ... + kM - uM-1
*/
- pa->used = origin_ds->ds_phys->ds_used_bytes;
+ pa->used = origin_ds->ds_phys->ds_referenced_bytes;
pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
for (snap = list_head(&pa->shared_snaps); snap;
@@ -2722,7 +2783,7 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
* so we need to subtract out the clone origin's used space.
*/
if (pa->origin_origin) {
- pa->used -= pa->origin_origin->ds_phys->ds_used_bytes;
+ pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
}
@@ -3238,8 +3299,8 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_deadlist_space(&csa->ohds->ds_deadlist,
&odl_used, &odl_comp, &odl_uncomp);
- dused = csa->cds->ds_phys->ds_used_bytes + cdl_used -
- (csa->ohds->ds_phys->ds_used_bytes + odl_used);
+ dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used -
+ (csa->ohds->ds_phys->ds_referenced_bytes + odl_used);
dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
(csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
@@ -3268,8 +3329,8 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
}
/* swap ds_*_bytes */
- SWITCH64(csa->ohds->ds_phys->ds_used_bytes,
- csa->cds->ds_phys->ds_used_bytes);
+ SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes,
+ csa->cds->ds_phys->ds_referenced_bytes);
SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
csa->cds->ds_phys->ds_compressed_bytes);
SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
@@ -3398,8 +3459,9 @@ dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
* on-disk is over quota and there are no pending changes (which
* may free up space for us).
*/
- if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) {
- if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota)
+ if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) {
+ if (inflight > 0 ||
+ ds->ds_phys->ds_referenced_bytes < ds->ds_quota)
error = ERESTART;
else
error = EDQUOT;
@@ -3426,7 +3488,7 @@ dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
if (psa->psa_effective_value == 0)
return (0);
- if (psa->psa_effective_value < ds->ds_phys->ds_used_bytes ||
+ if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes ||
psa->psa_effective_value < ds->ds_reserved)
return (ENOSPC);
@@ -4180,8 +4242,8 @@ dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
dsl_pool_t *dp = new->ds_dir->dd_pool;
*usedp = 0;
- *usedp += new->ds_phys->ds_used_bytes;
- *usedp -= oldsnap->ds_phys->ds_used_bytes;
+ *usedp += new->ds_phys->ds_referenced_bytes;
+ *usedp -= oldsnap->ds_phys->ds_referenced_bytes;
*compp = 0;
*compp += new->ds_phys->ds_compressed_bytes;
@@ -4197,9 +4259,13 @@ dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
dsl_dataset_t *snap;
uint64_t used, comp, uncomp;
- err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
- if (err != 0)
- break;
+ if (snapobj == new->ds_object) {
+ snap = new;
+ } else {
+ err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
+ if (err != 0)
+ break;
+ }
if (snap->ds_phys->ds_prev_snap_txg ==
oldsnap->ds_phys->ds_creation_txg) {
@@ -4228,7 +4294,8 @@ dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
* was not a snapshot of/before new.
*/
snapobj = snap->ds_phys->ds_prev_snap_obj;
- dsl_dataset_rele(snap, FTAG);
+ if (snap != new)
+ dsl_dataset_rele(snap, FTAG);
if (snapobj == 0) {
err = EINVAL;
break;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c
index 0b5fa0b..8f9d2c5 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@@ -171,10 +171,8 @@ dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
VERIFY(nvpair_value_nvlist(whopair, &perms) == 0);
if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) {
- jumpobj = zap_create(mos, DMU_OT_DSL_PERMS,
- DMU_OT_NONE, 0, tx);
- VERIFY(zap_update(mos, zapobj,
- whokey, 8, 1, &jumpobj, tx) == 0);
+ jumpobj = zap_create_link(mos, DMU_OT_DSL_PERMS,
+ zapobj, whokey, tx);
}
while (permpair = nvlist_next_nvpair(perms, permpair)) {
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
index 7f7a3b9..d698e59 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dsl_pool.h>
@@ -40,6 +40,8 @@
#include <sys/zfs_znode.h>
#include <sys/spa_impl.h>
#include <sys/dsl_deadlist.h>
+#include <sys/bptree.h>
+#include <sys/zfeature.h>
int zfs_no_write_throttle = 0;
int zfs_write_limit_shift = 3; /* 1/8th of physical memory */
@@ -125,20 +127,32 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
}
int
-dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
+dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
{
int err;
dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
+
+ err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
+ &dp->dp_meta_objset);
+ if (err != 0)
+ dsl_pool_close(dp);
+ else
+ *dpp = dp;
+
+ return (err);
+}
+
+int
+dsl_pool_open(dsl_pool_t *dp)
+{
+ int err;
dsl_dir_t *dd;
dsl_dataset_t *ds;
uint64_t obj;
- rw_enter(&dp->dp_config_rwlock, RW_WRITER);
- err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
- &dp->dp_meta_objset);
- if (err)
- goto out;
+ ASSERT(!dmu_objset_is_dirty_anywhere(dp->dp_meta_objset));
+ rw_enter(&dp->dp_config_rwlock, RW_WRITER);
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
&dp->dp_root_dir_obj);
@@ -154,7 +168,7 @@ dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
if (err)
goto out;
- if (spa_version(spa) >= SPA_VERSION_ORIGIN) {
+ if (spa_version(dp->dp_spa) >= SPA_VERSION_ORIGIN) {
err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd);
if (err)
goto out;
@@ -171,7 +185,7 @@ dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
goto out;
}
- if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
+ if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME,
&dp->dp_free_dir);
if (err)
@@ -185,6 +199,15 @@ dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
dp->dp_meta_objset, obj));
}
+ if (spa_feature_is_active(dp->dp_spa,
+ &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
+ err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
+ &dp->dp_bptree_obj);
+ if (err != 0)
+ goto out;
+ }
+
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1,
&dp->dp_tmp_userrefs_obj);
@@ -193,15 +216,10 @@ dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
if (err)
goto out;
- err = dsl_scan_init(dp, txg);
+ err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg);
out:
rw_exit(&dp->dp_config_rwlock);
- if (err)
- dsl_pool_close(dp);
- else
- *dpp = dp;
-
return (err);
}
@@ -495,7 +513,7 @@ int
dsl_pool_sync_context(dsl_pool_t *dp)
{
return (curthread == dp->dp_tx.tx_sync_thread ||
- spa_get_dsl(dp->dp_spa) == NULL);
+ spa_is_initializing(dp->dp_spa));
}
uint64_t
@@ -813,11 +831,8 @@ dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx)
ASSERT(dp->dp_tmp_userrefs_obj == 0);
ASSERT(dmu_tx_is_syncing(tx));
- dp->dp_tmp_userrefs_obj = zap_create(mos, DMU_OT_USERREFS,
- DMU_OT_NONE, 0, tx);
-
- VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS,
- sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj, tx) == 0);
+ dp->dp_tmp_userrefs_obj = zap_create_link(mos, DMU_OT_USERREFS,
+ DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, tx);
}
static int
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
index 475b494..164a73a 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dsl_scan.h>
@@ -44,6 +45,7 @@
#include <sys/ddt.h>
#include <sys/sa.h>
#include <sys/sa_impl.h>
+#include <sys/zfeature.h>
#ifdef _KERNEL
#include <sys/zfs_vfsops.h>
#endif
@@ -381,55 +383,6 @@ dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp,
priority, zio_flags, arc_flags, zb));
}
-static boolean_t
-bookmark_is_zero(const zbookmark_t *zb)
-{
- return (zb->zb_objset == 0 && zb->zb_object == 0 &&
- zb->zb_level == 0 && zb->zb_blkid == 0);
-}
-
-/* dnp is the dnode for zb1->zb_object */
-static boolean_t
-bookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1,
- const zbookmark_t *zb2)
-{
- uint64_t zb1nextL0, zb2thisobj;
-
- ASSERT(zb1->zb_objset == zb2->zb_objset);
- ASSERT(zb2->zb_level == 0);
-
- /*
- * A bookmark in the deadlist is considered to be after
- * everything else.
- */
- if (zb2->zb_object == DMU_DEADLIST_OBJECT)
- return (B_TRUE);
-
- /* The objset_phys_t isn't before anything. */
- if (dnp == NULL)
- return (B_FALSE);
-
- zb1nextL0 = (zb1->zb_blkid + 1) <<
- ((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
-
- zb2thisobj = zb2->zb_object ? zb2->zb_object :
- zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT);
-
- if (zb1->zb_object == DMU_META_DNODE_OBJECT) {
- uint64_t nextobj = zb1nextL0 *
- (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT;
- return (nextobj <= zb2thisobj);
- }
-
- if (zb1->zb_object < zb2thisobj)
- return (B_TRUE);
- if (zb1->zb_object > zb2thisobj)
- return (B_FALSE);
- if (zb2->zb_object == DMU_META_DNODE_OBJECT)
- return (B_FALSE);
- return (zb1nextL0 <= zb2->zb_blkid);
-}
-
static uint64_t
dsl_scan_ds_maxtxg(dsl_dataset_t *ds)
{
@@ -461,7 +414,7 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_t *zb)
if (scn->scn_pausing)
return (B_TRUE); /* we're already pausing */
- if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark))
+ if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark))
return (B_FALSE); /* we're resuming */
/* We only know how to resume from level-0 blocks. */
@@ -616,13 +569,13 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp,
/*
* We never skip over user/group accounting objects (obj<0)
*/
- if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark) &&
+ if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark) &&
(int64_t)zb->zb_object >= 0) {
/*
* If we already visited this bp & everything below (in
* a prior txg sync), don't bother doing it again.
*/
- if (bookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark))
+ if (zbookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark))
return (B_TRUE);
/*
@@ -815,22 +768,6 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb,
if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
return;
- if (BP_GET_TYPE(bp) != DMU_OT_USERGROUP_USED) {
- /*
- * For non-user-accounting blocks, we need to read the
- * new bp (from a deleted snapshot, found in
- * check_existing_xlation). If we used the old bp,
- * pointers inside this block from before we resumed
- * would be untranslated.
- *
- * For user-accounting blocks, we need to read the old
- * bp, because we will apply the entire space delta to
- * it (original untranslated -> translations from
- * deleted snap -> now).
- */
- bp_toread = *bp;
- }
-
if (dsl_scan_recurse(scn, ds, ostype, dnp, &bp_toread, zb, tx,
&buf) != 0)
return;
@@ -1395,19 +1332,28 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
zap_cursor_fini(&zc);
}
-static int
-dsl_scan_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+static boolean_t
+dsl_scan_free_should_pause(dsl_scan_t *scn)
{
- dsl_scan_t *scn = arg;
uint64_t elapsed_nanosecs;
elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
-
- if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
+ return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
(elapsed_nanosecs / MICROSEC > zfs_free_min_time_ms &&
txg_sync_waiting(scn->scn_dp)) ||
- spa_shutting_down(scn->scn_dp->dp_spa))
- return (ERESTART);
+ spa_shutting_down(scn->scn_dp->dp_spa));
+}
+
+static int
+dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+{
+ dsl_scan_t *scn = arg;
+
+ if (!scn->scn_is_bptree ||
+ (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) {
+ if (dsl_scan_free_should_pause(scn))
+ return (ERESTART);
+ }
zio_nowait(zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa,
dmu_tx_get_txg(tx), bp, 0));
@@ -1432,6 +1378,10 @@ dsl_scan_active(dsl_scan_t *scn)
if (scn->scn_phys.scn_state == DSS_SCANNING)
return (B_TRUE);
+ if (spa_feature_is_active(spa,
+ &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
+ return (B_TRUE);
+ }
if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
(void) bpobj_space(&scn->scn_dp->dp_free_bpobj,
&used, &comp, &uncomp);
@@ -1478,14 +1428,40 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
* traversing it.
*/
if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
+ scn->scn_is_bptree = B_FALSE;
scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
NULL, ZIO_FLAG_MUSTSUCCEED);
err = bpobj_iterate(&dp->dp_free_bpobj,
- dsl_scan_free_cb, scn, tx);
+ dsl_scan_free_block_cb, scn, tx);
VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
+
+ if (err == 0 && spa_feature_is_active(spa,
+ &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
+ scn->scn_is_bptree = B_TRUE;
+ scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
+ NULL, ZIO_FLAG_MUSTSUCCEED);
+ err = bptree_iterate(dp->dp_meta_objset,
+ dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb,
+ scn, tx);
+ VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
+ if (err != 0)
+ return;
+
+ /* disable async destroy feature */
+ spa_feature_decr(spa,
+ &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY], tx);
+ ASSERT(!spa_feature_is_active(spa,
+ &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]));
+ VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset,
+ DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_BPTREE_OBJ, tx));
+ VERIFY3U(0, ==, bptree_free(dp->dp_meta_objset,
+ dp->dp_bptree_obj, tx));
+ dp->dp_bptree_obj = 0;
+ }
if (scn->scn_visited_this_txg) {
zfs_dbgmsg("freed %llu blocks in %llums from "
- "free_bpobj txg %llu",
+ "free_bpobj/bptree txg %llu",
(longlong_t)scn->scn_visited_this_txg,
(longlong_t)
(gethrtime() - scn->scn_sync_start_time) / MICROSEC,
@@ -1600,6 +1576,8 @@ count_block(zfs_all_blkstats_t *zab, const blkptr_t *bp)
for (i = 0; i < 4; i++) {
int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS;
int t = (i & 1) ? BP_GET_TYPE(bp) : DMU_OT_TOTAL;
+ if (t & DMU_OT_NEWTYPE)
+ t = DMU_OT_OTHER;
zfs_blkstat_t *zb = &zab->zab_type[l][t];
int equal;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c
index 69374fb..173198d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c
@@ -18,9 +18,11 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright 2011 iXsystems, Inc
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -427,10 +429,9 @@ sa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count,
char attr_name[8];
if (sa->sa_layout_attr_obj == 0) {
- sa->sa_layout_attr_obj = zap_create(os,
- DMU_OT_SA_ATTR_LAYOUTS, DMU_OT_NONE, 0, tx);
- VERIFY(zap_add(os, sa->sa_master_obj, SA_LAYOUTS, 8, 1,
- &sa->sa_layout_attr_obj, tx) == 0);
+ sa->sa_layout_attr_obj = zap_create_link(os,
+ DMU_OT_SA_ATTR_LAYOUTS,
+ sa->sa_master_obj, SA_LAYOUTS, tx);
}
(void) snprintf(attr_name, sizeof (attr_name),
@@ -1552,10 +1553,9 @@ sa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx)
}
if (sa->sa_reg_attr_obj == 0) {
- sa->sa_reg_attr_obj = zap_create(hdl->sa_os,
- DMU_OT_SA_ATTR_REGISTRATION, DMU_OT_NONE, 0, tx);
- VERIFY(zap_add(hdl->sa_os, sa->sa_master_obj,
- SA_REGISTRY, 8, 1, &sa->sa_reg_attr_obj, tx) == 0);
+ sa->sa_reg_attr_obj = zap_create_link(hdl->sa_os,
+ DMU_OT_SA_ATTR_REGISTRATION,
+ sa->sa_master_obj, SA_REGISTRY, tx);
}
for (i = 0; i != sa->sa_num_attrs; i++) {
if (sa->sa_attr_table[i].sa_registered)
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
index 2060aa8..c796533 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
@@ -60,6 +60,7 @@
#include <sys/spa_boot.h>
#include <sys/zfs_ioctl.h>
#include <sys/dsl_scan.h>
+#include <sys/zfeature.h>
#include <sys/zvol.h>
#ifdef _KERNEL
@@ -117,6 +118,7 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
};
+static dsl_syncfunc_t spa_sync_version;
static dsl_syncfunc_t spa_sync_props;
static boolean_t spa_has_active_shared_spare(spa_t *spa);
static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
@@ -176,6 +178,7 @@ static void
spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
{
vdev_t *rvd = spa->spa_root_vdev;
+ dsl_pool_t *pool = spa->spa_dsl_pool;
uint64_t size;
uint64_t alloc;
uint64_t space;
@@ -222,6 +225,22 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
}
+ if (pool != NULL) {
+ dsl_dir_t *freedir = pool->dp_free_dir;
+
+ /*
+ * The $FREE directory was introduced in SPA_VERSION_DEADLISTS,
+ * when opening pools before this version freedir will be NULL.
+ */
+ if (freedir != NULL) {
+ spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL,
+ freedir->dd_phys->dd_used_bytes, src);
+ } else {
+ spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING,
+ NULL, 0, src);
+ }
+ }
+
spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
if (spa->spa_comment != NULL) {
@@ -361,25 +380,55 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
nvpair_t *elem;
int error = 0, reset_bootfs = 0;
uint64_t objnum;
+ boolean_t has_feature = B_FALSE;
elem = NULL;
while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
- zpool_prop_t prop;
- char *propname, *strval;
uint64_t intval;
- objset_t *os;
- char *slash, *check;
+ char *strval, *slash, *check, *fname;
+ const char *propname = nvpair_name(elem);
+ zpool_prop_t prop = zpool_name_to_prop(propname);
+
+ switch (prop) {
+ case ZPROP_INVAL:
+ if (!zpool_prop_feature(propname)) {
+ error = EINVAL;
+ break;
+ }
- propname = nvpair_name(elem);
+ /*
+ * Sanitize the input.
+ */
+ if (nvpair_type(elem) != DATA_TYPE_UINT64) {
+ error = EINVAL;
+ break;
+ }
- if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL)
- return (EINVAL);
+ if (nvpair_value_uint64(elem, &intval) != 0) {
+ error = EINVAL;
+ break;
+ }
+
+ if (intval != 0) {
+ error = EINVAL;
+ break;
+ }
+
+ fname = strchr(propname, '@') + 1;
+ if (zfeature_lookup_name(fname, NULL) != 0) {
+ error = EINVAL;
+ break;
+ }
+
+ has_feature = B_TRUE;
+ break;
- switch (prop) {
case ZPOOL_PROP_VERSION:
error = nvpair_value_uint64(elem, &intval);
if (!error &&
- (intval < spa_version(spa) || intval > SPA_VERSION))
+ (intval < spa_version(spa) ||
+ intval > SPA_VERSION_BEFORE_FEATURES ||
+ has_feature))
error = EINVAL;
break;
@@ -416,6 +465,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
error = nvpair_value_string(elem, &strval);
if (!error) {
+ objset_t *os;
uint64_t compress;
if (strval == NULL || strval[0] == '\0') {
@@ -565,33 +615,58 @@ int
spa_prop_set(spa_t *spa, nvlist_t *nvp)
{
int error;
- nvpair_t *elem;
+ nvpair_t *elem = NULL;
boolean_t need_sync = B_FALSE;
- zpool_prop_t prop;
if ((error = spa_prop_validate(spa, nvp)) != 0)
return (error);
- elem = NULL;
while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) {
- if ((prop = zpool_name_to_prop(
- nvpair_name(elem))) == ZPROP_INVAL)
- return (EINVAL);
+ zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem));
if (prop == ZPOOL_PROP_CACHEFILE ||
prop == ZPOOL_PROP_ALTROOT ||
prop == ZPOOL_PROP_READONLY)
continue;
+ if (prop == ZPOOL_PROP_VERSION || prop == ZPROP_INVAL) {
+ uint64_t ver;
+
+ if (prop == ZPOOL_PROP_VERSION) {
+ VERIFY(nvpair_value_uint64(elem, &ver) == 0);
+ } else {
+ ASSERT(zpool_prop_feature(nvpair_name(elem)));
+ ver = SPA_VERSION_FEATURES;
+ need_sync = B_TRUE;
+ }
+
+ /* Save time if the version is already set. */
+ if (ver == spa_version(spa))
+ continue;
+
+ /*
+ * In addition to the pool directory object, we might
+ * create the pool properties object, the features for
+ * read object, the features for write object, or the
+ * feature descriptions object.
+ */
+ error = dsl_sync_task_do(spa_get_dsl(spa), NULL,
+ spa_sync_version, spa, &ver, 6);
+ if (error)
+ return (error);
+ continue;
+ }
+
need_sync = B_TRUE;
break;
}
- if (need_sync)
+ if (need_sync) {
return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
- spa, nvp, 3));
- else
- return (0);
+ spa, nvp, 6));
+ }
+
+ return (0);
}
/*
@@ -1630,7 +1705,7 @@ spa_load_verify_done(zio_t *zio)
int error = zio->io_error;
if (error) {
- if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) &&
+ if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) &&
type != DMU_OT_INTENT_LOG)
atomic_add_64(&sle->sle_meta_count, 1);
else
@@ -1860,6 +1935,9 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
KM_SLEEP) == 0);
}
+ nvlist_free(spa->spa_load_info);
+ spa->spa_load_info = fnvlist_alloc();
+
gethrestime(&spa->spa_loaded_ts);
error = spa_load_impl(spa, pool_guid, config, state, type,
mosconfig, &ereport);
@@ -1892,12 +1970,14 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
{
int error = 0;
nvlist_t *nvroot = NULL;
+ nvlist_t *label;
vdev_t *rvd;
uberblock_t *ub = &spa->spa_uberblock;
uint64_t children, config_cache_txg = spa->spa_config_txg;
int orig_mode = spa->spa_mode;
int parse;
uint64_t obj;
+ boolean_t missing_feat_write = B_FALSE;
/*
* If this is an untrusted config, access the pool in read-only mode.
@@ -1977,19 +2057,78 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
/*
* Find the best uberblock.
*/
- vdev_uberblock_load(NULL, rvd, ub);
+ vdev_uberblock_load(rvd, ub, &label);
/*
* If we weren't able to find a single valid uberblock, return failure.
*/
- if (ub->ub_txg == 0)
+ if (ub->ub_txg == 0) {
+ nvlist_free(label);
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO));
+ }
/*
- * If the pool is newer than the code, we can't open it.
+ * If the pool has an unsupported version we can't open it.
*/
- if (ub->ub_version > SPA_VERSION)
+ if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) {
+ nvlist_free(label);
return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP));
+ }
+
+ if (ub->ub_version >= SPA_VERSION_FEATURES) {
+ nvlist_t *features;
+
+ /*
+ * If we weren't able to find what's necessary for reading the
+ * MOS in the label, return failure.
+ */
+ if (label == NULL || nvlist_lookup_nvlist(label,
+ ZPOOL_CONFIG_FEATURES_FOR_READ, &features) != 0) {
+ nvlist_free(label);
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
+ ENXIO));
+ }
+
+ /*
+ * Update our in-core representation with the definitive values
+ * from the label.
+ */
+ nvlist_free(spa->spa_label_features);
+ VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0);
+ }
+
+ nvlist_free(label);
+
+ /*
+ * Look through entries in the label nvlist's features_for_read. If
+ * there is a feature listed there which we don't understand then we
+ * cannot open a pool.
+ */
+ if (ub->ub_version >= SPA_VERSION_FEATURES) {
+ nvlist_t *unsup_feat;
+
+ VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
+ 0);
+
+ for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features,
+ NULL); nvp != NULL;
+ nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) {
+ if (!zfeature_is_supported(nvpair_name(nvp))) {
+ VERIFY(nvlist_add_string(unsup_feat,
+ nvpair_name(nvp), "") == 0);
+ }
+ }
+
+ if (!nvlist_empty(unsup_feat)) {
+ VERIFY(nvlist_add_nvlist(spa->spa_load_info,
+ ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
+ nvlist_free(unsup_feat);
+ return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
+ ENOTSUP));
+ }
+
+ nvlist_free(unsup_feat);
+ }
/*
* If the vdev guid sum doesn't match the uberblock, we have an
@@ -2023,7 +2162,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
spa->spa_claim_max_txg = spa->spa_first_txg;
spa->spa_prev_software_version = ub->ub_software_version;
- error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
+ error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
if (error)
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
@@ -2031,6 +2170,84 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0)
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ if (spa_version(spa) >= SPA_VERSION_FEATURES) {
+ boolean_t missing_feat_read = B_FALSE;
+ nvlist_t *unsup_feat;
+
+ if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ,
+ &spa->spa_feat_for_read_obj) != 0) {
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ }
+
+ if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_WRITE,
+ &spa->spa_feat_for_write_obj) != 0) {
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ }
+
+ if (spa_dir_prop(spa, DMU_POOL_FEATURE_DESCRIPTIONS,
+ &spa->spa_feat_desc_obj) != 0) {
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ }
+
+ VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
+ 0);
+
+ if (!feature_is_supported(spa->spa_meta_objset,
+ spa->spa_feat_for_read_obj, spa->spa_feat_desc_obj,
+ unsup_feat))
+ missing_feat_read = B_TRUE;
+
+ if (spa_writeable(spa) || state == SPA_LOAD_TRYIMPORT) {
+ if (!feature_is_supported(spa->spa_meta_objset,
+ spa->spa_feat_for_write_obj, spa->spa_feat_desc_obj,
+ unsup_feat))
+ missing_feat_write = B_TRUE;
+ }
+
+ if (!nvlist_empty(unsup_feat)) {
+ VERIFY(nvlist_add_nvlist(spa->spa_load_info,
+ ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
+ }
+
+ nvlist_free(unsup_feat);
+
+ if (!missing_feat_read) {
+ fnvlist_add_boolean(spa->spa_load_info,
+ ZPOOL_CONFIG_CAN_RDONLY);
+ }
+
+ /*
+ * If the state is SPA_LOAD_TRYIMPORT, our objective is
+ * twofold: to determine whether the pool is available for
+ * import in read-write mode and (if it is not) whether the
+ * pool is available for import in read-only mode. If the pool
+ * is available for import in read-write mode, it is displayed
+ * as available in userland; if it is not available for import
+ * in read-only mode, it is displayed as unavailable in
+ * userland. If the pool is available for import in read-only
+ * mode but not read-write mode, it is displayed as unavailable
+ * in userland with a special note that the pool is actually
+ * available for open in read-only mode.
+ *
+ * As a result, if the state is SPA_LOAD_TRYIMPORT and we are
+ * missing a feature for write, we must first determine whether
+ * the pool can be opened read-only before returning to
+ * userland in order to know whether to display the
+ * abovementioned note.
+ */
+ if (missing_feat_read || (missing_feat_write &&
+ spa_writeable(spa))) {
+ return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
+ ENOTSUP));
+ }
+ }
+
+ spa->spa_is_initializing = B_TRUE;
+ error = dsl_pool_open(spa->spa_dsl_pool);
+ spa->spa_is_initializing = B_FALSE;
+ if (error != 0)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+
if (!mosconfig) {
uint64_t hostid;
nvlist_t *policy = NULL, *nvconfig;
@@ -2248,7 +2465,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
nvlist_free(nvconfig);
/*
- * Now that we've validate the config, check the state of the
+ * Now that we've validated the config, check the state of the
* root vdev. If it can't be opened, it indicates one or
* more toplevel vdevs are faulted.
*/
@@ -2261,6 +2478,17 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
}
}
+ if (missing_feat_write) {
+ ASSERT(state == SPA_LOAD_TRYIMPORT);
+
+ /*
+ * At this point, we know that we can open the pool in
+ * read-only mode but not read-write mode. We now have enough
+ * information and can return to userland.
+ */
+ return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP));
+ }
+
/*
* We've successfully opened the pool, verify that we're ready
* to start pushing transactions.
@@ -2370,10 +2598,18 @@ spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig));
}
+/*
+ * If spa_load() fails this function will try loading prior txg's. If
+ * 'state' is SPA_LOAD_RECOVER and one of these loads succeeds the pool
+ * will be rewound to that txg. If 'state' is not SPA_LOAD_RECOVER this
+ * function will not rewind the pool and will return the same error as
+ * spa_load().
+ */
static int
spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
uint64_t max_request, int rewind_flags)
{
+ nvlist_t *loadinfo = NULL;
nvlist_t *config = NULL;
int load_error, rewind_error;
uint64_t safe_rewind_txg;
@@ -2402,9 +2638,18 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
return (load_error);
}
- /* Price of rolling back is discarding txgs, including log */
- if (state == SPA_LOAD_RECOVER)
+ if (state == SPA_LOAD_RECOVER) {
+ /* Price of rolling back is discarding txgs, including log */
spa_set_log_state(spa, SPA_LOG_CLEAR);
+ } else {
+ /*
+ * If we aren't rolling back save the load info from our first
+ * import attempt so that we can restore it after attempting
+ * to rewind.
+ */
+ loadinfo = spa->spa_load_info;
+ spa->spa_load_info = fnvlist_alloc();
+ }
spa->spa_load_max_txg = spa->spa_last_ubsync_txg;
safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE;
@@ -2428,7 +2673,20 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
if (config && (rewind_error || state != SPA_LOAD_RECOVER))
spa_config_set(spa, config);
- return (state == SPA_LOAD_RECOVER ? rewind_error : load_error);
+ if (state == SPA_LOAD_RECOVER) {
+ ASSERT3P(loadinfo, ==, NULL);
+ return (rewind_error);
+ } else {
+ /* Store the rewind info as part of the initial load info */
+ fnvlist_add_nvlist(loadinfo, ZPOOL_CONFIG_REWIND_INFO,
+ spa->spa_load_info);
+
+ /* Restore the initial load info */
+ fnvlist_free(spa->spa_load_info);
+ spa->spa_load_info = loadinfo;
+
+ return (load_error);
+ }
}
/*
@@ -2707,8 +2965,50 @@ spa_add_l2cache(spa_t *spa, nvlist_t *config)
}
}
+static void
+spa_add_feature_stats(spa_t *spa, nvlist_t *config)
+{
+ nvlist_t *features;
+ zap_cursor_t zc;
+ zap_attribute_t za;
+
+ ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
+ VERIFY(nvlist_alloc(&features, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
+ if (spa->spa_feat_for_read_obj != 0) {
+ for (zap_cursor_init(&zc, spa->spa_meta_objset,
+ spa->spa_feat_for_read_obj);
+ zap_cursor_retrieve(&zc, &za) == 0;
+ zap_cursor_advance(&zc)) {
+ ASSERT(za.za_integer_length == sizeof (uint64_t) &&
+ za.za_num_integers == 1);
+ VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name,
+ za.za_first_integer));
+ }
+ zap_cursor_fini(&zc);
+ }
+
+ if (spa->spa_feat_for_write_obj != 0) {
+ for (zap_cursor_init(&zc, spa->spa_meta_objset,
+ spa->spa_feat_for_write_obj);
+ zap_cursor_retrieve(&zc, &za) == 0;
+ zap_cursor_advance(&zc)) {
+ ASSERT(za.za_integer_length == sizeof (uint64_t) &&
+ za.za_num_integers == 1);
+ VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name,
+ za.za_first_integer));
+ }
+ zap_cursor_fini(&zc);
+ }
+
+ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS,
+ features) == 0);
+ nvlist_free(features);
+}
+
int
-spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
+spa_get_stats(const char *name, nvlist_t **config,
+ char *altroot, size_t buflen)
{
int error;
spa_t *spa;
@@ -2743,6 +3043,7 @@ spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
spa_add_spares(spa, *config);
spa_add_l2cache(spa, *config);
+ spa_add_feature_stats(spa, *config);
}
}
@@ -2963,6 +3264,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
nvlist_t **spares, **l2cache;
uint_t nspares, nl2cache;
uint64_t version, obj;
+ boolean_t has_features;
/*
* If this pool already exists, return failure.
@@ -2988,10 +3290,18 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
return (error);
}
- if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION),
- &version) != 0)
+ has_features = B_FALSE;
+ for (nvpair_t *elem = nvlist_next_nvpair(props, NULL);
+ elem != NULL; elem = nvlist_next_nvpair(props, elem)) {
+ if (zpool_prop_feature(nvpair_name(elem)))
+ has_features = B_TRUE;
+ }
+
+ if (has_features || nvlist_lookup_uint64(props,
+ zpool_prop_to_name(ZPOOL_PROP_VERSION), &version) != 0) {
version = SPA_VERSION;
- ASSERT(version <= SPA_VERSION);
+ }
+ ASSERT(SPA_VERSION_IS_SUPPORTED(version));
spa->spa_first_txg = txg;
spa->spa_uberblock.ub_txg = txg - 1;
@@ -3067,8 +3377,10 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa->spa_l2cache.sav_sync = B_TRUE;
}
+ spa->spa_is_initializing = B_TRUE;
spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg);
spa->spa_meta_objset = dp->dp_meta_objset;
+ spa->spa_is_initializing = B_FALSE;
/*
* Create DDTs (dedup tables).
@@ -3092,6 +3404,9 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
cmn_err(CE_PANIC, "failed to add pool config");
}
+ if (spa_version(spa) >= SPA_VERSION_FEATURES)
+ spa_feature_create_zap_objects(spa, tx);
+
if (zap_add(spa->spa_meta_objset,
DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION,
sizeof (uint64_t), 1, &version, tx) != 0) {
@@ -3283,7 +3598,7 @@ spa_import_rootpool(char *devpath, char *devid)
}
#endif
if (config == NULL) {
- cmn_err(CE_NOTE, "Can not read the pool label from '%s'",
+ cmn_err(CE_NOTE, "Cannot read the pool label from '%s'",
devpath);
return (EIO);
}
@@ -3603,6 +3918,8 @@ spa_tryimport(nvlist_t *tryconfig)
state) == 0);
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
spa->spa_uberblock.ub_timestamp) == 0);
+ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
+ spa->spa_load_info) == 0);
/*
* If the bootfs property exists on this pool then we
@@ -5328,7 +5645,7 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
* information. This avoids the dbuf_will_dirty() path and
* saves us a pre-read to get data we don't actually care about.
*/
- bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE);
+ bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE);
packed = kmem_alloc(bufsize, KM_SLEEP);
VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
@@ -5413,6 +5730,24 @@ spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
}
+static void
+spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+ spa_t *spa = arg1;
+ uint64_t version = *(uint64_t *)arg2;
+
+ /*
+ * Setting the version is special cased when first creating the pool.
+ */
+ ASSERT(tx->tx_txg != TXG_INITIAL);
+
+ ASSERT(version <= SPA_VERSION);
+ ASSERT(version >= spa_version(spa));
+
+ spa->spa_uberblock.ub_version = version;
+ vdev_config_dirty(spa->spa_root_vdev);
+}
+
/*
* Set zpool properties.
*/
@@ -5422,32 +5757,38 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
spa_t *spa = arg1;
objset_t *mos = spa->spa_meta_objset;
nvlist_t *nvp = arg2;
- nvpair_t *elem;
- uint64_t intval;
- char *strval;
- zpool_prop_t prop;
- const char *propname;
- zprop_type_t proptype;
+ nvpair_t *elem = NULL;
mutex_enter(&spa->spa_props_lock);
- elem = NULL;
while ((elem = nvlist_next_nvpair(nvp, elem))) {
+ uint64_t intval;
+ char *strval, *fname;
+ zpool_prop_t prop;
+ const char *propname;
+ zprop_type_t proptype;
+ zfeature_info_t *feature;
+
switch (prop = zpool_name_to_prop(nvpair_name(elem))) {
+ case ZPROP_INVAL:
+ /*
+ * We checked this earlier in spa_prop_validate().
+ */
+ ASSERT(zpool_prop_feature(nvpair_name(elem)));
+
+ fname = strchr(nvpair_name(elem), '@') + 1;
+ VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature));
+
+ spa_feature_enable(spa, feature, tx);
+ break;
+
case ZPOOL_PROP_VERSION:
+ VERIFY(nvpair_value_uint64(elem, &intval) == 0);
/*
- * Only set version for non-zpool-creation cases
- * (set/import). spa_create() needs special care
- * for version setting.
+ * The version is synced seperatly before other
+ * properties and should be correct by now.
*/
- if (tx->tx_txg != TXG_INITIAL) {
- VERIFY(nvpair_value_uint64(elem,
- &intval) == 0);
- ASSERT(intval <= SPA_VERSION);
- ASSERT(intval >= spa_version(spa));
- spa->spa_uberblock.ub_version = intval;
- vdev_config_dirty(spa->spa_root_vdev);
- }
+ ASSERT3U(spa_version(spa), >=, intval);
break;
case ZPOOL_PROP_ALTROOT:
@@ -5484,14 +5825,10 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
* Set pool property values in the poolprops mos object.
*/
if (spa->spa_pool_props_object == 0) {
- VERIFY((spa->spa_pool_props_object =
- zap_create(mos, DMU_OT_POOL_PROPS,
- DMU_OT_NONE, 0, tx)) > 0);
-
- VERIFY(zap_update(mos,
+ spa->spa_pool_props_object =
+ zap_create_link(mos, DMU_OT_POOL_PROPS,
DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS,
- 8, 1, &spa->spa_pool_props_object, tx)
- == 0);
+ tx);
}
/* normalize the property name */
@@ -5590,6 +5927,11 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
/* Keeping the freedir open increases spa_minref */
spa->spa_minref += 3;
}
+
+ if (spa->spa_ubsync.ub_version < SPA_VERSION_FEATURES &&
+ spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) {
+ spa_feature_create_zap_objects(spa, tx);
+ }
}
/*
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c
index a072233..3dae0ba 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -35,6 +35,7 @@
#include <sys/zfs_ioctl.h>
#include <sys/utsname.h>
#include <sys/sunddi.h>
+#include <sys/zfeature.h>
#ifdef _KERNEL
#include <sys/kobj.h>
#include <sys/zone.h>
@@ -407,6 +408,12 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
nvlist_free(nvroot);
+ /*
+ * Store what's necessary for reading the MOS in the label.
+ */
+ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
+ spa->spa_label_features) == 0);
+
if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
ddt_histogram_t *ddh;
ddt_stat_t *dds;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
index 6342452..83c3d31 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
@@ -48,6 +48,7 @@
#include <sys/arc.h>
#include <sys/ddt.h>
#include "zfs_prop.h"
+#include "zfeature_common.h"
/*
* SPA locking
@@ -216,7 +217,7 @@
* Like spa_vdev_enter/exit, these are convenience wrappers -- the actual
* locking is, always, based on spa_namespace_lock and spa_config_lock[].
*
- * spa_rename() is also implemented within this file since is requires
+ * spa_rename() is also implemented within this file since it requires
* manipulation of the namespace.
*/
@@ -487,8 +488,22 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME,
KM_SLEEP) == 0);
- if (config != NULL)
+ if (config != NULL) {
+ nvlist_t *features;
+
+ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
+ &features) == 0) {
+ VERIFY(nvlist_dup(features, &spa->spa_label_features,
+ 0) == 0);
+ }
+
VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0);
+ }
+
+ if (spa->spa_label_features == NULL) {
+ VERIFY(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME,
+ KM_SLEEP) == 0);
+ }
return (spa);
}
@@ -525,6 +540,7 @@ spa_remove(spa_t *spa)
list_destroy(&spa->spa_config_list);
+ nvlist_free(spa->spa_label_features);
nvlist_free(spa->spa_load_info);
spa_config_set(spa, NULL);
@@ -1033,6 +1049,20 @@ spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
* ==========================================================================
*/
+void
+spa_activate_mos_feature(spa_t *spa, const char *feature)
+{
+ (void) nvlist_add_boolean(spa->spa_label_features, feature);
+ vdev_config_dirty(spa->spa_root_vdev);
+}
+
+void
+spa_deactivate_mos_feature(spa_t *spa, const char *feature)
+{
+ (void) nvlist_remove_all(spa->spa_label_features, feature);
+ vdev_config_dirty(spa->spa_root_vdev);
+}
+
/*
* Rename a spa_t.
*/
@@ -1183,12 +1213,22 @@ spa_generate_guid(spa_t *spa)
void
sprintf_blkptr(char *buf, const blkptr_t *bp)
{
- char *type = NULL;
+ char type[256];
char *checksum = NULL;
char *compress = NULL;
if (bp != NULL) {
- type = dmu_ot[BP_GET_TYPE(bp)].ot_name;
+ if (BP_GET_TYPE(bp) & DMU_OT_NEWTYPE) {
+ dmu_object_byteswap_t bswap =
+ DMU_OT_BYTESWAP(BP_GET_TYPE(bp));
+ (void) snprintf(type, sizeof (type), "bswap %s %s",
+ DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) ?
+ "metadata" : "data",
+ dmu_ot_byteswap[bswap].ob_name);
+ } else {
+ (void) strlcpy(type, dmu_ot[BP_GET_TYPE(bp)].ot_name,
+ sizeof (type));
+ }
checksum = zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name;
compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name;
}
@@ -1270,6 +1310,12 @@ spa_get_dsl(spa_t *spa)
return (spa->spa_dsl_pool);
}
+boolean_t
+spa_is_initializing(spa_t *spa)
+{
+ return (spa->spa_is_initializing);
+}
+
blkptr_t *
spa_get_rootblkptr(spa_t *spa)
{
@@ -1553,6 +1599,7 @@ spa_init(int mode)
vdev_cache_stat_init();
zfs_prop_init();
zpool_prop_init();
+ zpool_feature_init();
spa_config_load();
l2arc_start();
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bptree.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bptree.h
new file mode 100644
index 0000000..9715072
--- /dev/null
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bptree.h
@@ -0,0 +1,64 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#ifndef _SYS_BPTREE_H
+#define _SYS_BPTREE_H
+
+#include <sys/spa.h>
+#include <sys/zio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct bptree_phys {
+ uint64_t bt_begin;
+ uint64_t bt_end;
+ uint64_t bt_bytes;
+ uint64_t bt_comp;
+ uint64_t bt_uncomp;
+} bptree_phys_t;
+
+typedef struct bptree_entry_phys {
+ blkptr_t be_bp;
+ uint64_t be_birth_txg; /* only delete blocks born after this txg */
+ zbookmark_t be_zb; /* holds traversal resume point if needed */
+} bptree_entry_phys_t;
+
+typedef int bptree_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
+
+uint64_t bptree_alloc(objset_t *os, dmu_tx_t *tx);
+int bptree_free(objset_t *os, uint64_t obj, dmu_tx_t *tx);
+
+void bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg,
+ uint64_t bytes, uint64_t comp, uint64_t uncomp, dmu_tx_t *tx);
+
+int bptree_iterate(objset_t *os, uint64_t obj, boolean_t free,
+ bptree_itor_t func, void *arg, dmu_tx_t *tx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_BPTREE_H */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
index 4d24f57..c6943e1 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
@@ -18,11 +18,10 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
- */
-/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
@@ -75,6 +74,53 @@ typedef struct objset objset_t;
typedef struct dmu_tx dmu_tx_t;
typedef struct dsl_dir dsl_dir_t;
+typedef enum dmu_object_byteswap {
+ DMU_BSWAP_UINT8,
+ DMU_BSWAP_UINT16,
+ DMU_BSWAP_UINT32,
+ DMU_BSWAP_UINT64,
+ DMU_BSWAP_ZAP,
+ DMU_BSWAP_DNODE,
+ DMU_BSWAP_OBJSET,
+ DMU_BSWAP_ZNODE,
+ DMU_BSWAP_OLDACL,
+ DMU_BSWAP_ACL,
+ /*
+ * Allocating a new byteswap type number makes the on-disk format
+ * incompatible with any other format that uses the same number.
+ *
+ * Data can usually be structured to work with one of the
+ * DMU_BSWAP_UINT* or DMU_BSWAP_ZAP types.
+ */
+ DMU_BSWAP_NUMFUNCS
+} dmu_object_byteswap_t;
+
+#define DMU_OT_NEWTYPE 0x80
+#define DMU_OT_METADATA 0x40
+#define DMU_OT_BYTESWAP_MASK 0x3f
+
+/*
+ * Defines a uint8_t object type. Object types specify if the data
+ * in the object is metadata (boolean) and how to byteswap the data
+ * (dmu_object_byteswap_t).
+ */
+#define DMU_OT(byteswap, metadata) \
+ (DMU_OT_NEWTYPE | \
+ ((metadata) ? DMU_OT_METADATA : 0) | \
+ ((byteswap) & DMU_OT_BYTESWAP_MASK))
+
+#define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \
+ ((ot) & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS : \
+ (ot) < DMU_OT_NUMTYPES)
+
+#define DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \
+ ((ot) & DMU_OT_METADATA) : \
+ dmu_ot[(ot)].ot_metadata)
+
+#define DMU_OT_BYTESWAP(ot) (((ot) & DMU_OT_NEWTYPE) ? \
+ ((ot) & DMU_OT_BYTESWAP_MASK) : \
+ dmu_ot[(ot)].ot_byteswap)
+
typedef enum dmu_object_type {
DMU_OT_NONE,
/* general: */
@@ -139,7 +185,35 @@ typedef enum dmu_object_type {
DMU_OT_DEADLIST_HDR, /* UINT64 */
DMU_OT_DSL_CLONES, /* ZAP */
DMU_OT_BPOBJ_SUBOBJ, /* UINT64 */
- DMU_OT_NUMTYPES
+ /*
+ * Do not allocate new object types here. Doing so makes the on-disk
+ * format incompatible with any other format that uses the same object
+ * type number.
+ *
+ * When creating an object which does not have one of the above types
+ * use the DMU_OTN_* type with the correct byteswap and metadata
+ * values.
+ *
+ * The DMU_OTN_* types do not have entries in the dmu_ot table,
+ * use the DMU_OT_IS_METDATA() and DMU_OT_BYTESWAP() macros instead
+ * of indexing into dmu_ot directly (this works for both DMU_OT_* types
+ * and DMU_OTN_* types).
+ */
+ DMU_OT_NUMTYPES,
+
+ /*
+ * Names for valid types declared with DMU_OT().
+ */
+ DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE),
+ DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE),
+ DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE),
+ DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE),
+ DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE),
+ DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE),
+ DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE),
+ DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE),
+ DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE),
+ DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
} dmu_object_type_t;
typedef enum dmu_objset_type {
@@ -221,6 +295,9 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
*/
#define DMU_POOL_DIRECTORY_OBJECT 1
#define DMU_POOL_CONFIG "config"
+#define DMU_POOL_FEATURES_FOR_WRITE "features_for_write"
+#define DMU_POOL_FEATURES_FOR_READ "features_for_read"
+#define DMU_POOL_FEATURE_DESCRIPTIONS "feature_descriptions"
#define DMU_POOL_ROOT_DATASET "root_dataset"
#define DMU_POOL_SYNC_BPOBJ "sync_bplist"
#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub"
@@ -236,6 +313,7 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
#define DMU_POOL_CREATION_VERSION "creation_version"
#define DMU_POOL_SCAN "scan"
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
+#define DMU_POOL_BPTREE_OBJ "bptree_obj"
/*
* Allocate an object from this objset. The range of object numbers
@@ -496,7 +574,7 @@ void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
/*
* Free up the data blocks for a defined range of a file. If size is
- * zero, the range from offset to end-of-file is freed.
+ * -1, the range from offset to end-of-file is freed.
*/
int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size, dmu_tx_t *tx);
@@ -566,12 +644,18 @@ typedef struct dmu_object_info {
typedef void arc_byteswap_func_t(void *buf, size_t size);
typedef struct dmu_object_type_info {
- arc_byteswap_func_t *ot_byteswap;
+ dmu_object_byteswap_t ot_byteswap;
boolean_t ot_metadata;
char *ot_name;
} dmu_object_type_info_t;
+typedef struct dmu_object_byteswap_info {
+ arc_byteswap_func_t *ob_func;
+ char *ob_name;
+} dmu_object_byteswap_info_t;
+
extern const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES];
+extern const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS];
/*
* Get information on a DMU object.
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_traverse.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_traverse.h
index 5b326cd..3cbf42f 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_traverse.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_traverse.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DMU_TRAVERSE_H
@@ -54,6 +55,9 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
int traverse_dataset(struct dsl_dataset *ds,
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
+int traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr,
+ uint64_t txg_start, zbookmark_t *resume, int flags,
+ blkptr_cb_t func, void *arg);
int traverse_pool(spa_t *spa,
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
index 56d2f6b..e3affeb 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
@@ -22,7 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
@@ -88,7 +88,12 @@ typedef struct dsl_dataset_phys {
uint64_t ds_creation_time; /* seconds since 1970 */
uint64_t ds_creation_txg;
uint64_t ds_deadlist_obj; /* DMU_OT_DEADLIST */
- uint64_t ds_used_bytes;
+ /*
+ * ds_referenced_bytes, ds_compressed_bytes, and ds_uncompressed_bytes
+ * include all blocks referenced by this dataset, including those
+ * shared with any other datasets.
+ */
+ uint64_t ds_referenced_bytes;
uint64_t ds_compressed_bytes;
uint64_t ds_uncompressed_bytes;
uint64_t ds_unique_bytes; /* only relevant to snapshots */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
index 57725b5..e2c12ab 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DSL_POOL_H
@@ -34,6 +35,7 @@
#include <sys/ddt.h>
#include <sys/arc.h>
#include <sys/bpobj.h>
+#include <sys/bptree.h>
#ifdef __cplusplus
extern "C" {
@@ -48,7 +50,8 @@ struct dsl_scan;
/* These macros are for indexing into the zfs_all_blkstats_t. */
#define DMU_OT_DEFERRED DMU_OT_NONE
-#define DMU_OT_TOTAL DMU_OT_NUMTYPES
+#define DMU_OT_OTHER DMU_OT_NUMTYPES /* place holder for DMU_OT() types */
+#define DMU_OT_TOTAL (DMU_OT_NUMTYPES + 1)
typedef struct zfs_blkstat {
uint64_t zb_count;
@@ -85,6 +88,7 @@ typedef struct dsl_pool {
uint64_t dp_write_limit;
uint64_t dp_tmp_userrefs_obj;
bpobj_t dp_free_bpobj;
+ uint64_t dp_bptree_obj;
struct dsl_scan *dp_scan;
@@ -110,7 +114,8 @@ typedef struct dsl_pool {
zfs_all_blkstats_t *dp_blkstats;
} dsl_pool_t;
-int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
+int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
+int dsl_pool_open(dsl_pool_t *dp);
void dsl_pool_close(dsl_pool_t *dp);
dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg);
void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_scan.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_scan.h
index c79666e..5691f4d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_scan.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_scan.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DSL_SCAN_H
@@ -79,6 +80,9 @@ typedef struct dsl_scan {
uint64_t scn_sync_start_time;
zio_t *scn_zio_root;
+ /* for freeing blocks */
+ boolean_t scn_is_bptree;
+
/* for debugging / information */
uint64_t scn_visited_this_txg;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
index a2a76e3..ec6914e 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
@@ -94,7 +94,7 @@ struct dsl_pool;
/*
* Size of block to hold the configuration data (a packed nvlist)
*/
-#define SPA_CONFIG_BLOCKSIZE (1 << 14)
+#define SPA_CONFIG_BLOCKSIZE (1ULL << 14)
/*
* The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
@@ -262,7 +262,7 @@ typedef struct blkptr {
DVA_GET_ASIZE(&(bp)->blk_dva[2]))
#define BP_GET_UCSIZE(bp) \
- ((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
+ ((BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) ? \
BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
#define BP_GET_NDVAS(bp) \
@@ -403,8 +403,8 @@ typedef struct blkptr {
#include <sys/dmu.h>
#define BP_GET_BUFC_TYPE(bp) \
- (((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \
- ARC_BUFC_METADATA : ARC_BUFC_DATA);
+ (((BP_GET_LEVEL(bp) > 0) || (DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))) ? \
+ ARC_BUFC_METADATA : ARC_BUFC_DATA)
typedef enum spa_import_type {
SPA_IMPORT_EXISTING,
@@ -415,8 +415,8 @@ typedef enum spa_import_type {
extern int spa_open(const char *pool, spa_t **, void *tag);
extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
nvlist_t *policy, nvlist_t **config);
-extern int spa_get_stats(const char *pool, nvlist_t **config,
- char *altroot, size_t buflen);
+extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
+ size_t buflen);
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
const char *history_str, nvlist_t *zplprops);
extern int spa_import_rootpool(char *devpath, char *devid);
@@ -573,6 +573,7 @@ extern void spa_claim_notify(zio_t *zio);
/* Accessor functions */
extern boolean_t spa_shutting_down(spa_t *spa);
extern struct dsl_pool *spa_get_dsl(spa_t *spa);
+extern boolean_t spa_is_initializing(spa_t *spa);
extern blkptr_t *spa_get_rootblkptr(spa_t *spa);
extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
extern void spa_altroot(spa_t *, char *, size_t);
@@ -604,6 +605,8 @@ extern uint64_t spa_delegation(spa_t *spa);
extern objset_t *spa_meta_objset(spa_t *spa);
/* Miscellaneous support routines */
+extern void spa_activate_mos_feature(spa_t *spa, const char *feature);
+extern void spa_deactivate_mos_feature(spa_t *spa, const char *feature);
extern int spa_rename(const char *oldname, const char *newname);
extern spa_t *spa_by_guid(uint64_t pool_guid, uint64_t device_guid);
extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
index 88d1477..b427674 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
@@ -127,6 +127,7 @@ struct spa {
uint64_t spa_import_flags; /* import specific flags */
taskq_t *spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
dsl_pool_t *spa_dsl_pool;
+ boolean_t spa_is_initializing; /* true while opening pool */
metaslab_class_t *spa_normal_class; /* normal data class */
metaslab_class_t *spa_log_class; /* intent log data class */
uint64_t spa_first_txg; /* first txg after spa_open() */
@@ -144,6 +145,7 @@ struct spa {
list_t spa_state_dirty_list; /* vdevs with dirty state */
spa_aux_vdev_t spa_spares; /* hot spares */
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
+ nvlist_t *spa_label_features; /* Features for reading MOS */
uint64_t spa_config_object; /* MOS object for pool config */
uint64_t spa_config_generation; /* config generation number */
uint64_t spa_syncing_txg; /* txg currently syncing */
@@ -220,7 +222,10 @@ struct spa {
boolean_t spa_autoreplace; /* autoreplace set in open */
int spa_vdev_locks; /* locks grabbed */
uint64_t spa_creation_version; /* version at pool creation */
- uint64_t spa_prev_software_version;
+ uint64_t spa_prev_software_version; /* See ub_software_version */
+ uint64_t spa_feat_for_write_obj; /* required to write to pool */
+ uint64_t spa_feat_for_read_obj; /* required to read from pool */
+ uint64_t spa_feat_desc_obj; /* Feature descriptions */
/*
* spa_refcnt & spa_config_lock must be the last elements
* because refcount_t changes size based on compilation options.
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
index aa6559c..2329d5b 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
@@ -18,6 +18,7 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
@@ -141,8 +142,8 @@ extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
struct uberblock;
extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
extern int vdev_label_number(uint64_t psise, uint64_t offset);
-extern nvlist_t *vdev_label_read_config(vdev_t *vd);
-extern void vdev_uberblock_load(zio_t *zio, vdev_t *vd, struct uberblock *ub);
+extern nvlist_t *vdev_label_read_config(vdev_t *vd, int label);
+extern void vdev_uberblock_load(vdev_t *, struct uberblock *, nvlist_t **);
typedef enum {
VDEV_LABEL_CREATE, /* create/add a new device */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
index 019d2be..7eed4a1 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
@@ -202,7 +202,7 @@ struct vdev {
* For DTrace to work in userland (libzpool) context, these fields must
* remain at the end of the structure. DTrace will use the kernel's
* CTF definition for 'struct vdev', and since the size of a kmutex_t is
- * larger in userland, the offsets for the rest fields would be
+ * larger in userland, the offsets for the rest of the fields would be
* incorrect.
*/
kmutex_t vdev_dtl_lock; /* vdev_dtl_{map,resilver} */
@@ -257,6 +257,7 @@ typedef struct vdev_label {
#define VDEV_LABEL_START_SIZE (2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE)
#define VDEV_LABEL_END_SIZE (2 * sizeof (vdev_label_t))
#define VDEV_LABELS 4
+#define VDEV_BEST_LABEL VDEV_LABELS
#define VDEV_ALLOC_LOAD 0
#define VDEV_ALLOC_ADD 1
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h
index a1130bb..4d7b315 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZAP_H
@@ -132,6 +133,8 @@ uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot,
uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+uint64_t zap_create_link(objset_t *os, dmu_object_type_t ot,
+ uint64_t parent_obj, const char *name, dmu_tx_t *tx);
/*
* Create a new zapobj with no attributes from the given (unallocated)
@@ -300,12 +303,6 @@ int zap_add_int_key(objset_t *os, uint64_t obj,
int zap_lookup_int_key(objset_t *os, uint64_t obj,
uint64_t key, uint64_t *valuep);
-/*
- * They name is a stringified version of key; increment its value by
- * delta. Zero values will be zap_remove()-ed.
- */
-int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
- dmu_tx_t *tx);
int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
dmu_tx_t *tx);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfeature.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfeature.h
new file mode 100644
index 0000000..9ff1c93
--- /dev/null
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfeature.h
@@ -0,0 +1,52 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#ifndef _SYS_ZFEATURE_H
+#define _SYS_ZFEATURE_H
+
+#include <sys/dmu.h>
+#include <sys/nvpair.h>
+#include "zfeature_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern boolean_t feature_is_supported(objset_t *os, uint64_t obj,
+ uint64_t desc_obj, nvlist_t *unsup_feat);
+
+struct spa;
+extern void spa_feature_create_zap_objects(struct spa *, dmu_tx_t *);
+extern void spa_feature_enable(struct spa *, zfeature_info_t *, dmu_tx_t *);
+extern void spa_feature_incr(struct spa *, zfeature_info_t *, dmu_tx_t *);
+extern void spa_feature_decr(struct spa *, zfeature_info_t *, dmu_tx_t *);
+extern boolean_t spa_feature_is_enabled(struct spa *, zfeature_info_t *);
+extern boolean_t spa_feature_is_active(struct spa *, zfeature_info_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_ZFEATURE_H */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
index 4a4e843..80d9336 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _ZIO_H
@@ -270,6 +271,14 @@ typedef struct zbookmark {
#define ZB_ZIL_OBJECT (0ULL)
#define ZB_ZIL_LEVEL (-2LL)
+#define ZB_IS_ZERO(zb) \
+ ((zb)->zb_objset == 0 && (zb)->zb_object == 0 && \
+ (zb)->zb_level == 0 && (zb)->zb_blkid == 0)
+#define ZB_IS_ROOT(zb) \
+ ((zb)->zb_object == ZB_ROOT_OBJECT && \
+ (zb)->zb_level == ZB_ROOT_LEVEL && \
+ (zb)->zb_blkid == ZB_ROOT_BLKID)
+
typedef struct zio_prop {
enum zio_checksum zp_checksum;
enum zio_compress zp_compress;
@@ -287,6 +296,7 @@ typedef void zio_cksum_finish_f(zio_cksum_report_t *rep,
typedef void zio_cksum_free_f(void *cbdata, size_t size);
struct zio_bad_cksum; /* defined in zio_checksum.h */
+struct dnode_phys;
struct zio_cksum_report {
struct zio_cksum_report *zcr_next;
@@ -559,6 +569,10 @@ extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
/* Called from spa_sync(), but primarily an injection handler */
extern void spa_handle_ignored_writes(spa_t *spa);
+/* zbookmark functions */
+boolean_t zbookmark_is_before(const struct dnode_phys *dnp,
+ const zbookmark_t *zb1, const zbookmark_t *zb2);
+
#ifdef __cplusplus
}
#endif
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
index ffcefb9..950f2f1 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
@@ -1329,7 +1329,8 @@ vdev_validate(vdev_t *vd, boolean_t strict)
uint64_t aux_guid = 0;
nvlist_t *nvl;
- if ((label = vdev_label_read_config(vd)) == NULL) {
+ if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) ==
+ NULL) {
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_BAD_LABEL);
return (0);
@@ -1970,14 +1971,14 @@ vdev_validate_aux(vdev_t *vd)
if (!vdev_readable(vd))
return (0);
- if ((label = vdev_label_read_config(vd)) == NULL) {
+ if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == NULL) {
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_CORRUPT_DATA);
return (-1);
}
if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_VERSION, &version) != 0 ||
- version > SPA_VERSION ||
+ !SPA_VERSION_IS_SUPPORTED(version) ||
nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) != 0 ||
guid != vd->vdev_guid ||
nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state) != 0) {
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c
index c08ed8b..b943647 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c
@@ -18,8 +18,10 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@@ -121,6 +123,8 @@
* txg Transaction group in which this label was written
* pool_guid Unique identifier for this pool
* vdev_tree An nvlist describing vdev tree.
+ * features_for_read
+ * An nvlist of the features necessary for reading the MOS.
*
* Each leaf device label also contains the following:
*
@@ -428,8 +432,13 @@ vdev_top_config_generate(spa_t *spa, nvlist_t *config)
kmem_free(array, rvd->vdev_children * sizeof (uint64_t));
}
+/*
+ * Returns the configuration from the label of the given vdev. If 'label' is
+ * VDEV_BEST_LABEL, each label of the vdev will be read until a valid
+ * configuration is found; otherwise, only the specified label will be read.
+ */
nvlist_t *
-vdev_label_read_config(vdev_t *vd)
+vdev_label_read_config(vdev_t *vd, int label)
{
spa_t *spa = vd->vdev_spa;
nvlist_t *config = NULL;
@@ -447,6 +456,8 @@ vdev_label_read_config(vdev_t *vd)
retry:
for (int l = 0; l < VDEV_LABELS; l++) {
+ if (label >= 0 && label < VDEV_LABELS && label != l)
+ continue;
zio = zio_root(spa, NULL, NULL, flags);
@@ -496,7 +507,7 @@ vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason,
/*
* Read the label, if any, and perform some basic sanity checks.
*/
- if ((label = vdev_label_read_config(vd)) == NULL)
+ if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == NULL)
return (B_FALSE);
(void) nvlist_lookup_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
@@ -833,7 +844,7 @@ retry:
* come back up, we fail to see the uberblock for txg + 1 because, say,
* it was on a mirrored device and the replica to which we wrote txg + 1
* is now offline. If we then make some changes and sync txg + 1, and then
- * the missing replica comes back, then for a new seconds we'll have two
+ * the missing replica comes back, then for a few seconds we'll have two
* conflicting uberblocks on disk with the same txg. The solution is simple:
* among uberblocks with equal txg, choose the one with the latest timestamp.
*/
@@ -853,46 +864,50 @@ vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
return (0);
}
+struct ubl_cbdata {
+ uberblock_t *ubl_ubbest; /* Best uberblock */
+ vdev_t *ubl_vd; /* vdev associated with the above */
+ int ubl_label; /* Label associated with the above */
+};
+
static void
vdev_uberblock_load_done(zio_t *zio)
{
+ vdev_t *vd = zio->io_vd;
spa_t *spa = zio->io_spa;
zio_t *rio = zio->io_private;
uberblock_t *ub = zio->io_data;
- uberblock_t *ubbest = rio->io_private;
+ struct ubl_cbdata *cbp = rio->io_private;
- ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(zio->io_vd));
+ ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(vd));
if (zio->io_error == 0 && uberblock_verify(ub) == 0) {
mutex_enter(&rio->io_lock);
if (ub->ub_txg <= spa->spa_load_max_txg &&
- vdev_uberblock_compare(ub, ubbest) > 0)
- *ubbest = *ub;
+ vdev_uberblock_compare(ub, cbp->ubl_ubbest) > 0) {
+ /*
+ * Keep track of the vdev and label in which this
+ * uberblock was found. We will use this information
+ * later to obtain the config nvlist associated with
+ * this uberblock.
+ */
+ *cbp->ubl_ubbest = *ub;
+ cbp->ubl_vd = vd;
+ cbp->ubl_label = vdev_label_number(vd->vdev_psize,
+ zio->io_offset);
+ }
mutex_exit(&rio->io_lock);
}
zio_buf_free(zio->io_data, zio->io_size);
}
-void
-vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest)
+static void
+vdev_uberblock_load_impl(zio_t *zio, vdev_t *vd, int flags,
+ struct ubl_cbdata *cbp)
{
- spa_t *spa = vd->vdev_spa;
- vdev_t *rvd = spa->spa_root_vdev;
- int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
- ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
-
- if (vd == rvd) {
- ASSERT(zio == NULL);
- spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
- zio = zio_root(spa, NULL, ubbest, flags);
- bzero(ubbest, sizeof (uberblock_t));
- }
-
- ASSERT(zio != NULL);
-
for (int c = 0; c < vd->vdev_children; c++)
- vdev_uberblock_load(zio, vd->vdev_child[c], ubbest);
+ vdev_uberblock_load_impl(zio, vd->vdev_child[c], flags, cbp);
if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) {
for (int l = 0; l < VDEV_LABELS; l++) {
@@ -905,11 +920,45 @@ vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest)
}
}
}
+}
- if (vd == rvd) {
- (void) zio_wait(zio);
- spa_config_exit(spa, SCL_ALL, FTAG);
+/*
+ * Reads the 'best' uberblock from disk along with its associated
+ * configuration. First, we read the uberblock array of each label of each
+ * vdev, keeping track of the uberblock with the highest txg in each array.
+ * Then, we read the configuration from the same label as the best uberblock.
+ */
+void
+vdev_uberblock_load(vdev_t *rvd, uberblock_t *ub, nvlist_t **config)
+{
+ int i;
+ zio_t *zio;
+ spa_t *spa = rvd->vdev_spa;
+ struct ubl_cbdata cb;
+ int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
+ ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
+
+ ASSERT(ub);
+ ASSERT(config);
+
+ bzero(ub, sizeof (uberblock_t));
+ *config = NULL;
+
+ cb.ubl_ubbest = ub;
+ cb.ubl_vd = NULL;
+
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+ zio = zio_root(spa, NULL, &cb, flags);
+ vdev_uberblock_load_impl(zio, rvd, flags, &cb);
+ (void) zio_wait(zio);
+ if (cb.ubl_vd != NULL) {
+ for (i = cb.ubl_label % 2; i < VDEV_LABELS; i += 2) {
+ *config = vdev_label_read_config(cb.ubl_vd, i);
+ if (*config != NULL)
+ break;
+ }
}
+ spa_config_exit(spa, SCL_ALL, FTAG);
}
/*
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c
index 288a4d9..fa1d99f 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@@ -946,6 +947,19 @@ fzap_prefetch(zap_name_t *zn)
* Helper functions for consumers.
*/
+uint64_t
+zap_create_link(objset_t *os, dmu_object_type_t ot, uint64_t parent_obj,
+ const char *name, dmu_tx_t *tx)
+{
+ uint64_t new_obj;
+
+ VERIFY((new_obj = zap_create(os, ot, DMU_OT_NONE, 0, tx)) > 0);
+ VERIFY(zap_add(os, parent_obj, name, sizeof (uint64_t), 1, &new_obj,
+ tx) == 0);
+
+ return (new_obj);
+}
+
int
zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, uint64_t mask,
char *name)
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
index 6e506a4..dfdce04 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zio.h>
@@ -472,7 +472,7 @@ zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
{
dmu_object_info_t doi;
dmu_object_info_from_db(db, &doi);
- ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap);
+ ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
}
#endif
@@ -596,7 +596,7 @@ mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags,
{
dmu_object_info_t doi;
dmu_object_info_from_db(db, &doi);
- ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap);
+ ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
}
#endif
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c
new file mode 100644
index 0000000..ba72208
--- /dev/null
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c
@@ -0,0 +1,414 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/zfeature.h>
+#include <sys/dmu.h>
+#include <sys/nvpair.h>
+#include <sys/zap.h>
+#include <sys/dmu_tx.h>
+#include "zfeature_common.h"
+#include <sys/spa_impl.h>
+
+/*
+ * ZFS Feature Flags
+ * -----------------
+ *
+ * ZFS feature flags are used to provide fine-grained versioning to the ZFS
+ * on-disk format. Once enabled on a pool feature flags replace the old
+ * spa_version() number.
+ *
+ * Each new on-disk format change will be given a uniquely identifying string
+ * guid rather than a version number. This avoids the problem of different
+ * organizations creating new on-disk formats with the same version number. To
+ * keep feature guids unique they should consist of the reverse dns name of the
+ * organization which implemented the feature and a short name for the feature,
+ * separated by a colon (e.g. com.delphix:async_destroy).
+ *
+ * Reference Counts
+ * ----------------
+ *
+ * Within each pool features can be in one of three states: disabled, enabled,
+ * or active. These states are differentiated by a reference count stored on
+ * disk for each feature:
+ *
+ * 1) If there is no reference count stored on disk the feature is disabled.
+ * 2) If the reference count is 0 a system administrator has enabled the
+ * feature, but the feature has not been used yet, so no on-disk
+ * format changes have been made.
+ * 3) If the reference count is greater than 0 the feature is active.
+ * The format changes required by the feature are currently on disk.
+ * Note that if the feature's format changes are reversed the feature
+ * may choose to set its reference count back to 0.
+ *
+ * Feature flags makes no differentiation between non-zero reference counts
+ * for an active feature (e.g. a reference count of 1 means the same thing as a
+ * reference count of 27834721), but feature implementations may choose to use
+ * the reference count to store meaningful information. For example, a new RAID
+ * implementation might set the reference count to the number of vdevs using
+ * it. If all those disks are removed from the pool the feature goes back to
+ * having a reference count of 0.
+ *
+ * It is the responsibility of the individual features to maintain a non-zero
+ * reference count as long as the feature's format changes are present on disk.
+ *
+ * Dependencies
+ * ------------
+ *
+ * Each feature may depend on other features. The only effect of this
+ * relationship is that when a feature is enabled all of its dependencies are
+ * automatically enabled as well. Any future work to support disabling of
+ * features would need to ensure that features cannot be disabled if other
+ * enabled features depend on them.
+ *
+ * On-disk Format
+ * --------------
+ *
+ * When feature flags are enabled spa_version() is set to SPA_VERSION_FEATURES
+ * (5000). In order for this to work the pool is automatically upgraded to
+ * SPA_VERSION_BEFORE_FEATURES (28) first, so all pre-feature flags on disk
+ * format changes will be in use.
+ *
+ * Information about features is stored in 3 ZAP objects in the pool's MOS.
+ * These objects are linked to by the following names in the pool directory
+ * object:
+ *
+ * 1) features_for_read: feature guid -> reference count
+ * Features needed to open the pool for reading.
+ * 2) features_for_write: feature guid -> reference count
+ * Features needed to open the pool for writing.
+ * 3) feature_descriptions: feature guid -> descriptive string
+ * A human readable string.
+ *
+ * All enabled features appear in either features_for_read or
+ * features_for_write, but not both.
+ *
+ * To open a pool in read-only mode only the features listed in
+ * features_for_read need to be supported.
+ *
+ * To open the pool in read-write mode features in both features_for_read and
+ * features_for_write need to be supported.
+ *
+ * Some features may be required to read the ZAP objects containing feature
+ * information. To allow software to check for compatibility with these features
+ * before the pool is opened their names must be stored in the label in a
+ * new "features_for_read" entry (note that features that are only required
+ * to write to a pool never need to be stored in the label since the
+ * features_for_write ZAP object can be read before the pool is written to).
+ * To save space in the label features must be explicitly marked as needing to
+ * be written to the label. Also, reference counts are not stored in the label,
+ * instead any feature whose reference count drops to 0 is removed from the
+ * label.
+ *
+ * Adding New Features
+ * -------------------
+ *
+ * Features must be registered in zpool_feature_init() function in
+ * zfeature_common.c using the zfeature_register() function. This function
+ * has arguments to specify if the feature should be stored in the
+ * features_for_read or features_for_write ZAP object and if it needs to be
+ * written to the label when active.
+ *
+ * Once a feature is registered it will appear as a "feature@<feature name>"
+ * property which can be set by an administrator. Feature implementors should
+ * use the spa_feature_is_enabled() and spa_feature_is_active() functions to
+ * query the state of a feature and the spa_feature_incr() and
+ * spa_feature_decr() functions to change an enabled feature's reference count.
+ * Reference counts may only be updated in the syncing context.
+ *
+ * Features may not perform enable-time initialization. Instead, any such
+ * initialization should occur when the feature is first used. This design
+ * enforces that on-disk changes be made only when features are used. Code
+ * should only check if a feature is enabled using spa_feature_is_enabled(),
+ * not by relying on any feature specific metadata existing. If a feature is
+ * enabled, but the feature's metadata is not on disk yet then it should be
+ * created as needed.
+ *
+ * As an example, consider the com.delphix:async_destroy feature. This feature
+ * relies on the existence of a bptree in the MOS that store blocks for
+ * asynchronous freeing. This bptree is not created when async_destroy is
+ * enabled. Instead, when a dataset is destroyed spa_feature_is_enabled() is
+ * called to check if async_destroy is enabled. If it is and the bptree object
+ * does not exist yet, the bptree object is created as part of the dataset
+ * destroy and async_destroy's reference count is incremented to indicate it
+ * has made an on-disk format change. Later, after the destroyed dataset's
+ * blocks have all been asynchronously freed there is no longer any use for the
+ * bptree object, so it is destroyed and async_destroy's reference count is
+ * decremented back to 0 to indicate that it has undone its on-disk format
+ * changes.
+ */
+
+typedef enum {
+ FEATURE_ACTION_ENABLE,
+ FEATURE_ACTION_INCR,
+ FEATURE_ACTION_DECR,
+} feature_action_t;
+
+/*
+ * Checks that the features active in the specified object are supported by
+ * this software. Adds each unsupported feature (name -> description) to
+ * the supplied nvlist.
+ */
+boolean_t
+feature_is_supported(objset_t *os, uint64_t obj, uint64_t desc_obj,
+ nvlist_t *unsup_feat)
+{
+ boolean_t supported;
+ zap_cursor_t zc;
+ zap_attribute_t za;
+
+ supported = B_TRUE;
+ for (zap_cursor_init(&zc, os, obj);
+ zap_cursor_retrieve(&zc, &za) == 0;
+ zap_cursor_advance(&zc)) {
+ ASSERT(za.za_integer_length == sizeof (uint64_t) &&
+ za.za_num_integers == 1);
+
+ if (za.za_first_integer != 0 &&
+ !zfeature_is_supported(za.za_name)) {
+ supported = B_FALSE;
+
+ if (unsup_feat != NULL) {
+ char *desc = "";
+ char buf[MAXPATHLEN];
+
+ if (zap_lookup(os, desc_obj, za.za_name,
+ 1, sizeof (buf), buf) == 0)
+ desc = buf;
+
+ VERIFY(nvlist_add_string(unsup_feat, za.za_name,
+ desc) == 0);
+ }
+ }
+ }
+ zap_cursor_fini(&zc);
+
+ return (supported);
+}
+
+static int
+feature_get_refcount(objset_t *os, uint64_t read_obj, uint64_t write_obj,
+ zfeature_info_t *feature, uint64_t *res)
+{
+ int err;
+ uint64_t refcount;
+ uint64_t zapobj = feature->fi_can_readonly ? write_obj : read_obj;
+
+ ASSERT(0 != zapobj);
+
+ err = zap_lookup(os, zapobj, feature->fi_guid, sizeof (uint64_t), 1,
+ &refcount);
+ if (err != 0) {
+ if (err == ENOENT)
+ return (ENOTSUP);
+ else
+ return (err);
+ }
+ *res = refcount;
+ return (0);
+}
+
+static int
+feature_do_action(objset_t *os, uint64_t read_obj, uint64_t write_obj,
+ uint64_t desc_obj, zfeature_info_t *feature, feature_action_t action,
+ dmu_tx_t *tx)
+{
+ int error;
+ uint64_t refcount;
+ uint64_t zapobj = feature->fi_can_readonly ? write_obj : read_obj;
+
+ ASSERT(0 != zapobj);
+ ASSERT(zfeature_is_valid_guid(feature->fi_guid));
+
+ error = zap_lookup(os, zapobj, feature->fi_guid,
+ sizeof (uint64_t), 1, &refcount);
+
+ /*
+ * If we can't ascertain the status of the specified feature, an I/O
+ * error occurred.
+ */
+ if (error != 0 && error != ENOENT)
+ return (error);
+
+ switch (action) {
+ case FEATURE_ACTION_ENABLE:
+ /*
+ * If the feature is already enabled, ignore the request.
+ */
+ if (error == 0)
+ return (0);
+ refcount = 0;
+ break;
+ case FEATURE_ACTION_INCR:
+ if (error == ENOENT)
+ return (ENOTSUP);
+ if (refcount == UINT64_MAX)
+ return (EOVERFLOW);
+ refcount++;
+ break;
+ case FEATURE_ACTION_DECR:
+ if (error == ENOENT)
+ return (ENOTSUP);
+ if (refcount == 0)
+ return (EOVERFLOW);
+ refcount--;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+
+ if (action == FEATURE_ACTION_ENABLE) {
+ int i;
+
+ for (i = 0; feature->fi_depends[i] != NULL; i++) {
+ zfeature_info_t *dep = feature->fi_depends[i];
+
+ error = feature_do_action(os, read_obj, write_obj,
+ desc_obj, dep, FEATURE_ACTION_ENABLE, tx);
+ if (error != 0)
+ return (error);
+ }
+ }
+
+ error = zap_update(os, zapobj, feature->fi_guid,
+ sizeof (uint64_t), 1, &refcount, tx);
+ if (error != 0)
+ return (error);
+
+ if (action == FEATURE_ACTION_ENABLE) {
+ error = zap_update(os, desc_obj,
+ feature->fi_guid, 1, strlen(feature->fi_desc) + 1,
+ feature->fi_desc, tx);
+ if (error != 0)
+ return (error);
+ }
+
+ if (action == FEATURE_ACTION_INCR && refcount == 1 && feature->fi_mos) {
+ spa_activate_mos_feature(dmu_objset_spa(os), feature->fi_guid);
+ }
+
+ if (action == FEATURE_ACTION_DECR && refcount == 0) {
+ spa_deactivate_mos_feature(dmu_objset_spa(os),
+ feature->fi_guid);
+ }
+
+ return (0);
+}
+
+void
+spa_feature_create_zap_objects(spa_t *spa, dmu_tx_t *tx)
+{
+ /*
+ * We create feature flags ZAP objects in two instances: during pool
+ * creation and during pool upgrade.
+ */
+ ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)) || (!spa->spa_sync_on &&
+ tx->tx_txg == TXG_INITIAL));
+
+ spa->spa_feat_for_read_obj = zap_create_link(spa->spa_meta_objset,
+ DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_FEATURES_FOR_READ, tx);
+ spa->spa_feat_for_write_obj = zap_create_link(spa->spa_meta_objset,
+ DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_FEATURES_FOR_WRITE, tx);
+ spa->spa_feat_desc_obj = zap_create_link(spa->spa_meta_objset,
+ DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_FEATURE_DESCRIPTIONS, tx);
+}
+
+/*
+ * Enable any required dependencies, then enable the requested feature.
+ */
+void
+spa_feature_enable(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
+{
+ ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
+ VERIFY3U(0, ==, feature_do_action(spa->spa_meta_objset,
+ spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
+ spa->spa_feat_desc_obj, feature, FEATURE_ACTION_ENABLE, tx));
+}
+
+/*
+ * If the specified feature has not yet been enabled, this function returns
+ * ENOTSUP; otherwise, this function increments the feature's refcount (or
+ * returns EOVERFLOW if the refcount cannot be incremented). This function must
+ * be called from syncing context.
+ */
+void
+spa_feature_incr(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
+{
+ ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
+ VERIFY3U(0, ==, feature_do_action(spa->spa_meta_objset,
+ spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
+ spa->spa_feat_desc_obj, feature, FEATURE_ACTION_INCR, tx));
+}
+
+/*
+ * If the specified feature has not yet been enabled, this function returns
+ * ENOTSUP; otherwise, this function decrements the feature's refcount (or
+ * returns EOVERFLOW if the refcount is already 0). This function must
+ * be called from syncing context.
+ */
+void
+spa_feature_decr(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
+{
+ ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
+ VERIFY3U(0, ==, feature_do_action(spa->spa_meta_objset,
+ spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
+ spa->spa_feat_desc_obj, feature, FEATURE_ACTION_DECR, tx));
+}
+
+boolean_t
+spa_feature_is_enabled(spa_t *spa, zfeature_info_t *feature)
+{
+ int err;
+ uint64_t refcount;
+
+ if (spa_version(spa) < SPA_VERSION_FEATURES)
+ return (B_FALSE);
+
+ err = feature_get_refcount(spa->spa_meta_objset,
+ spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
+ feature, &refcount);
+ ASSERT(err == 0 || err == ENOTSUP);
+ return (err == 0);
+}
+
+boolean_t
+spa_feature_is_active(spa_t *spa, zfeature_info_t *feature)
+{
+ int err;
+ uint64_t refcount;
+
+ if (spa_version(spa) < SPA_VERSION_FEATURES)
+ return (B_FALSE);
+
+ err = feature_get_refcount(spa->spa_meta_objset,
+ spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
+ feature, &refcount);
+ ASSERT(err == 0 || err == ENOTSUP);
+ return (err == 0 && refcount > 0);
+}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
index 7c75aca..73a4f75 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
@@ -18,6 +18,7 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011-2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
@@ -1157,6 +1158,8 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
/*
* Find a zfsvfs_t for a mounted filesystem, or create our own, in which
* case its z_vfs will be NULL, and it will be opened as the owner.
+ * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
+ * which prevents all vnode ops from running.
*/
static int
zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
@@ -1220,7 +1223,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
(void) nvlist_lookup_uint64(props,
zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
- if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) {
+ if (!SPA_VERSION_IS_SUPPORTED(version)) {
error = EINVAL;
goto pool_props_bad;
}
@@ -1344,6 +1347,15 @@ zfs_ioc_pool_configs(zfs_cmd_t *zc)
return (error);
}
+/*
+ * inputs:
+ * zc_name name of the pool
+ *
+ * outputs:
+ * zc_cookie real errno
+ * zc_nvlist_dst config nvlist
+ * zc_nvlist_dst_size size of config nvlist
+ */
static int
zfs_ioc_pool_stats(zfs_cmd_t *zc)
{
@@ -1445,7 +1457,8 @@ zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
return (error);
- if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) {
+ if (zc->zc_cookie < spa_version(spa) ||
+ !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
spa_close(spa, FTAG);
return (EINVAL);
}
@@ -5452,7 +5465,7 @@ zfs_modevent(module_t mod, int type, void *unused __unused)
tsd_create(&zfs_fsyncer_key, NULL);
tsd_create(&rrw_tsd_key, NULL);
- printf("ZFS storage pool version " SPA_VERSION_STRING "\n");
+ printf("ZFS storage pool version: features support (" SPA_VERSION_STRING ")\n");
root_mount_rel(zfs_root_token);
zfsdev_init();
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
index 467b6a6..01eddbd 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
@@ -2278,7 +2278,7 @@ void
zfs_init(void)
{
- printf("ZFS filesystem version " ZPL_VERSION_STRING "\n");
+ printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n");
/*
* Initialize .zfs directory structures
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
index 694302e..ca402f6 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
@@ -640,7 +640,7 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zp->zp_checksum < ZIO_CHECKSUM_FUNCTIONS &&
zp->zp_compress >= ZIO_COMPRESS_OFF &&
zp->zp_compress < ZIO_COMPRESS_FUNCTIONS &&
- zp->zp_type < DMU_OT_NUMTYPES &&
+ DMU_OT_IS_VALID(zp->zp_type) &&
zp->zp_level < 32 &&
zp->zp_copies > 0 &&
zp->zp_copies <= spa_max_replication(spa) &&
@@ -924,7 +924,7 @@ zio_read_bp_init(zio_t *zio)
zio_push_transform(zio, cbuf, psize, psize, zio_decompress);
}
- if (!dmu_ot[BP_GET_TYPE(bp)].ot_metadata && BP_GET_LEVEL(bp) == 0)
+ if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0)
zio->io_flags |= ZIO_FLAG_DONT_CACHE;
if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP)
@@ -3015,3 +3015,45 @@ static zio_pipe_stage_t *zio_pipeline[] = {
zio_checksum_verify,
zio_done
};
+
+/* dnp is the dnode for zb1->zb_object */
+boolean_t
+zbookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1,
+ const zbookmark_t *zb2)
+{
+ uint64_t zb1nextL0, zb2thisobj;
+
+ ASSERT(zb1->zb_objset == zb2->zb_objset);
+ ASSERT(zb2->zb_level == 0);
+
+ /*
+ * A bookmark in the deadlist is considered to be after
+ * everything else.
+ */
+ if (zb2->zb_object == DMU_DEADLIST_OBJECT)
+ return (B_TRUE);
+
+ /* The objset_phys_t isn't before anything. */
+ if (dnp == NULL)
+ return (B_FALSE);
+
+ zb1nextL0 = (zb1->zb_blkid + 1) <<
+ ((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
+
+ zb2thisobj = zb2->zb_object ? zb2->zb_object :
+ zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT);
+
+ if (zb1->zb_object == DMU_META_DNODE_OBJECT) {
+ uint64_t nextobj = zb1nextL0 *
+ (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT;
+ return (nextobj <= zb2thisobj);
+ }
+
+ if (zb1->zb_object < zb2thisobj)
+ return (B_TRUE);
+ if (zb1->zb_object > zb2thisobj)
+ return (B_FALSE);
+ if (zb2->zb_object == DMU_META_DNODE_OBJECT)
+ return (B_FALSE);
+ return (zb1nextL0 <= zb2->zb_blkid);
+}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h b/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
index 1b23dc2..f2d996a 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
@@ -172,6 +172,7 @@ typedef enum {
ZPOOL_PROP_READONLY,
ZPOOL_PROP_COMMENT,
ZPOOL_PROP_EXPANDSZ,
+ ZPOOL_PROP_FREEING,
ZPOOL_NUM_PROPS
} zpool_prop_t;
@@ -245,6 +246,8 @@ const char *zpool_prop_to_name(zpool_prop_t);
const char *zpool_prop_default_string(zpool_prop_t);
uint64_t zpool_prop_default_numeric(zpool_prop_t);
boolean_t zpool_prop_readonly(zpool_prop_t);
+boolean_t zpool_prop_feature(const char *);
+boolean_t zpool_prop_unsupported(const char *name);
int zpool_prop_index_to_string(zpool_prop_t, uint64_t, const char **);
int zpool_prop_string_to_index(zpool_prop_t, const char *, uint64_t *);
uint64_t zpool_prop_random_value(zpool_prop_t, uint64_t seed);
@@ -352,6 +355,7 @@ typedef enum {
#define SPA_VERSION_26 26ULL
#define SPA_VERSION_27 27ULL
#define SPA_VERSION_28 28ULL
+#define SPA_VERSION_5000 5000ULL
/*
* When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
@@ -359,8 +363,8 @@ typedef enum {
* and do the appropriate changes. Also bump the version number in
* usr/src/grub/capability.
*/
-#define SPA_VERSION SPA_VERSION_28
-#define SPA_VERSION_STRING "28"
+#define SPA_VERSION SPA_VERSION_5000
+#define SPA_VERSION_STRING "5000"
/*
* Symbolic names for the changes that caused a SPA_VERSION switch.
@@ -411,6 +415,12 @@ typedef enum {
#define SPA_VERSION_DEADLISTS SPA_VERSION_26
#define SPA_VERSION_FAST_SNAP SPA_VERSION_27
#define SPA_VERSION_MULTI_REPLACE SPA_VERSION_28
+#define SPA_VERSION_BEFORE_FEATURES SPA_VERSION_28
+#define SPA_VERSION_FEATURES SPA_VERSION_5000
+
+#define SPA_VERSION_IS_SUPPORTED(v) \
+ (((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \
+ ((v) >= SPA_VERSION_FEATURES && (v) <= SPA_VERSION))
/*
* ZPL version - rev'd whenever an incompatible on-disk format change
@@ -508,6 +518,11 @@ typedef struct zpool_rewind_policy {
#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */
#define ZPOOL_CONFIG_MISSING_DEVICES "missing_vdevs" /* not stored on disk */
#define ZPOOL_CONFIG_LOAD_INFO "load_info" /* not stored on disk */
+#define ZPOOL_CONFIG_REWIND_INFO "rewind_info" /* not stored on disk */
+#define ZPOOL_CONFIG_UNSUP_FEAT "unsup_feat" /* not stored on disk */
+#define ZPOOL_CONFIG_CAN_RDONLY "can_rdonly" /* not stored on disk */
+#define ZPOOL_CONFIG_FEATURES_FOR_READ "features_for_read"
+#define ZPOOL_CONFIG_FEATURE_STATS "feature_stats" /* not stored on disk */
/*
* The persistent vdev state is stored as separate values rather than a single
* 'vdev_state' entry. This is because a device can be in multiple states, such
@@ -586,6 +601,7 @@ typedef enum vdev_aux {
VDEV_AUX_BAD_LABEL, /* the label is OK but invalid */
VDEV_AUX_VERSION_NEWER, /* on-disk version is too new */
VDEV_AUX_VERSION_OLDER, /* on-disk version is too old */
+ VDEV_AUX_UNSUP_FEAT, /* unsupported features */
VDEV_AUX_SPARED, /* hot spare used in another pool */
VDEV_AUX_ERR_EXCEEDED, /* too many errors */
VDEV_AUX_IO_FAILURE, /* experienced I/O failure */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/nvpair.h b/sys/cddl/contrib/opensolaris/uts/common/sys/nvpair.h
index abf84cf..3062dd9 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/sys/nvpair.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/sys/nvpair.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_NVPAIR_H
@@ -274,6 +275,73 @@ int nvpair_value_hrtime(nvpair_t *, hrtime_t *);
int nvpair_value_double(nvpair_t *, double *);
#endif
+nvlist_t *fnvlist_alloc(void);
+void fnvlist_free(nvlist_t *);
+size_t fnvlist_size(nvlist_t *);
+char *fnvlist_pack(nvlist_t *, size_t *);
+void fnvlist_pack_free(char *, size_t);
+nvlist_t *fnvlist_unpack(char *, size_t);
+nvlist_t *fnvlist_dup(nvlist_t *);
+void fnvlist_merge(nvlist_t *, nvlist_t *);
+
+void fnvlist_add_boolean(nvlist_t *, const char *);
+void fnvlist_add_boolean_value(nvlist_t *, const char *, boolean_t);
+void fnvlist_add_byte(nvlist_t *, const char *, uchar_t);
+void fnvlist_add_int8(nvlist_t *, const char *, int8_t);
+void fnvlist_add_uint8(nvlist_t *, const char *, uint8_t);
+void fnvlist_add_int16(nvlist_t *, const char *, int16_t);
+void fnvlist_add_uint16(nvlist_t *, const char *, uint16_t);
+void fnvlist_add_int32(nvlist_t *, const char *, int32_t);
+void fnvlist_add_uint32(nvlist_t *, const char *, uint32_t);
+void fnvlist_add_int64(nvlist_t *, const char *, int64_t);
+void fnvlist_add_uint64(nvlist_t *, const char *, uint64_t);
+void fnvlist_add_string(nvlist_t *, const char *, const char *);
+void fnvlist_add_nvlist(nvlist_t *, const char *, nvlist_t *);
+void fnvlist_add_nvpair(nvlist_t *, nvpair_t *);
+void fnvlist_add_boolean_array(nvlist_t *, const char *, boolean_t *, uint_t);
+void fnvlist_add_byte_array(nvlist_t *, const char *, uchar_t *, uint_t);
+void fnvlist_add_int8_array(nvlist_t *, const char *, int8_t *, uint_t);
+void fnvlist_add_uint8_array(nvlist_t *, const char *, uint8_t *, uint_t);
+void fnvlist_add_int16_array(nvlist_t *, const char *, int16_t *, uint_t);
+void fnvlist_add_uint16_array(nvlist_t *, const char *, uint16_t *, uint_t);
+void fnvlist_add_int32_array(nvlist_t *, const char *, int32_t *, uint_t);
+void fnvlist_add_uint32_array(nvlist_t *, const char *, uint32_t *, uint_t);
+void fnvlist_add_int64_array(nvlist_t *, const char *, int64_t *, uint_t);
+void fnvlist_add_uint64_array(nvlist_t *, const char *, uint64_t *, uint_t);
+void fnvlist_add_string_array(nvlist_t *, const char *, char * const *, uint_t);
+void fnvlist_add_nvlist_array(nvlist_t *, const char *, nvlist_t **, uint_t);
+
+void fnvlist_remove(nvlist_t *, const char *);
+void fnvlist_remove_nvpair(nvlist_t *, nvpair_t *);
+
+nvpair_t *fnvlist_lookup_nvpair(nvlist_t *nvl, const char *name);
+boolean_t fnvlist_lookup_boolean(nvlist_t *nvl, const char *name);
+boolean_t fnvlist_lookup_boolean_value(nvlist_t *nvl, const char *name);
+uchar_t fnvlist_lookup_byte(nvlist_t *nvl, const char *name);
+int8_t fnvlist_lookup_int8(nvlist_t *nvl, const char *name);
+int16_t fnvlist_lookup_int16(nvlist_t *nvl, const char *name);
+int32_t fnvlist_lookup_int32(nvlist_t *nvl, const char *name);
+int64_t fnvlist_lookup_int64(nvlist_t *nvl, const char *name);
+uint8_t fnvlist_lookup_uint8_t(nvlist_t *nvl, const char *name);
+uint16_t fnvlist_lookup_uint16(nvlist_t *nvl, const char *name);
+uint32_t fnvlist_lookup_uint32(nvlist_t *nvl, const char *name);
+uint64_t fnvlist_lookup_uint64(nvlist_t *nvl, const char *name);
+char *fnvlist_lookup_string(nvlist_t *nvl, const char *name);
+nvlist_t *fnvlist_lookup_nvlist(nvlist_t *nvl, const char *name);
+
+boolean_t fnvpair_value_boolean_value(nvpair_t *nvp);
+uchar_t fnvpair_value_byte(nvpair_t *nvp);
+int8_t fnvpair_value_int8(nvpair_t *nvp);
+int16_t fnvpair_value_int16(nvpair_t *nvp);
+int32_t fnvpair_value_int32(nvpair_t *nvp);
+int64_t fnvpair_value_int64(nvpair_t *nvp);
+uint8_t fnvpair_value_uint8_t(nvpair_t *nvp);
+uint16_t fnvpair_value_uint16(nvpair_t *nvp);
+uint32_t fnvpair_value_uint32(nvpair_t *nvp);
+uint64_t fnvpair_value_uint64(nvpair_t *nvp);
+char *fnvpair_value_string(nvpair_t *nvp);
+nvlist_t *fnvpair_value_nvlist(nvpair_t *nvp);
+
#ifdef __cplusplus
}
#endif
OpenPOWER on IntegriCloud