summaryrefslogtreecommitdiffstats
path: root/cddl
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2011-02-27 19:41:40 +0000
committerpjd <pjd@FreeBSD.org>2011-02-27 19:41:40 +0000
commit1b03c5bf41222b723415638f03e00ed12cac076a (patch)
treeef515cadc08bf427e4d3f1360199ec9827b1596b /cddl
parentc67d387baf03726323703774b1b320235fb1f24b (diff)
downloadFreeBSD-src-1b03c5bf41222b723415638f03e00ed12cac076a.zip
FreeBSD-src-1b03c5bf41222b723415638f03e00ed12cac076a.tar.gz
Finally... Import the latest open-source ZFS version - (SPA) 28.
Few new things available from now on: - Data deduplication. - Triple parity RAIDZ (RAIDZ3). - zfs diff. - zpool split. - Snapshot holds. - zpool import -F. Allows to rewind corrupted pool to earlier transaction group. - Possibility to import pool in read-only mode. MFC after: 1 month
Diffstat (limited to 'cddl')
-rw-r--r--cddl/compat/opensolaris/include/fcntl.h3
-rw-r--r--cddl/compat/opensolaris/include/mnttab.h4
-rw-r--r--cddl/compat/opensolaris/include/priv.h2
-rw-r--r--cddl/compat/opensolaris/include/sha2.h38
-rw-r--r--cddl/compat/opensolaris/include/solaris.h6
-rw-r--r--cddl/compat/opensolaris/include/thread_pool.h39
-rw-r--r--cddl/compat/opensolaris/misc/fsshare.c14
-rw-r--r--cddl/compat/opensolaris/misc/zmount.c5
-rw-r--r--cddl/contrib/opensolaris/cmd/stat/common/statcommon.h50
-rw-r--r--cddl/contrib/opensolaris/cmd/stat/common/timestamp.c49
-rw-r--r--cddl/contrib/opensolaris/cmd/zdb/zdb.c2241
-rw-r--r--cddl/contrib/opensolaris/cmd/zdb/zdb_il.c131
-rw-r--r--cddl/contrib/opensolaris/cmd/zfs/zfs.8580
-rw-r--r--cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c29
-rw-r--r--cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h1
-rw-r--r--cddl/contrib/opensolaris/cmd/zfs/zfs_main.c3667
-rw-r--r--cddl/contrib/opensolaris/cmd/zfs/zfs_util.h6
-rw-r--r--cddl/contrib/opensolaris/cmd/zinject/translate.c55
-rw-r--r--cddl/contrib/opensolaris/cmd/zinject/zinject.c255
-rw-r--r--cddl/contrib/opensolaris/cmd/zinject/zinject.h9
-rw-r--r--cddl/contrib/opensolaris/cmd/zlook/zlook.c411
-rw-r--r--cddl/contrib/opensolaris/cmd/zpool/zpool.8684
-rw-r--r--cddl/contrib/opensolaris/cmd/zpool/zpool_main.c977
-rw-r--r--cddl/contrib/opensolaris/cmd/zpool/zpool_util.c20
-rw-r--r--cddl/contrib/opensolaris/cmd/zpool/zpool_util.h8
-rw-r--r--cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c471
-rw-r--r--cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.167
-rw-r--r--cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c429
-rw-r--r--cddl/contrib/opensolaris/cmd/ztest/ztest.c4851
-rw-r--r--cddl/contrib/opensolaris/head/synch.h27
-rw-r--r--cddl/contrib/opensolaris/lib/libnvpair/libnvpair.c783
-rw-r--r--cddl/contrib/opensolaris/lib/libnvpair/libnvpair.h160
-rw-r--r--cddl/contrib/opensolaris/lib/libuutil/common/libuutil.h13
-rw-r--r--cddl/contrib/opensolaris/lib/libuutil/common/uu_alloc.c41
-rw-r--r--cddl/contrib/opensolaris/lib/libuutil/common/uu_misc.c33
-rw-r--r--cddl/contrib/opensolaris/lib/libuutil/common/uu_string.c56
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h211
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c94
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_config.c22
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c1503
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_diff.c832
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_fru.c452
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h53
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c598
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c354
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c1533
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c1346
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c99
-rw-r--r--cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c185
-rw-r--r--cddl/contrib/opensolaris/lib/libzpool/common/kernel.c128
-rw-r--r--cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h109
-rw-r--r--cddl/contrib/opensolaris/lib/libzpool/common/taskq.c49
-rw-r--r--cddl/contrib/opensolaris/lib/libzpool/common/util.c5
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/__init__.py5
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/allow.py16
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/dataset.py37
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/groupspace.py5
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/holds.py75
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/ioctl.c117
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/table.py70
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/unallow.py5
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/userspace.py77
-rw-r--r--cddl/contrib/opensolaris/lib/pyzfs/common/util.py13
-rw-r--r--cddl/lib/libzfs/Makefile29
-rw-r--r--cddl/lib/libzpool/Makefile18
-rw-r--r--cddl/sbin/zfs/Makefile8
-rw-r--r--cddl/sbin/zpool/Makefile15
-rw-r--r--cddl/usr.bin/Makefile4
-rw-r--r--cddl/usr.bin/zlook/Makefile25
-rw-r--r--cddl/usr.bin/zstreamdump/Makefile27
-rw-r--r--cddl/usr.bin/ztest/Makefile1
-rw-r--r--cddl/usr.sbin/zdb/Makefile1
72 files changed, 18054 insertions, 6282 deletions
diff --git a/cddl/compat/opensolaris/include/fcntl.h b/cddl/compat/opensolaris/include/fcntl.h
index 9b6c3f9..548918a 100644
--- a/cddl/compat/opensolaris/include/fcntl.h
+++ b/cddl/compat/opensolaris/include/fcntl.h
@@ -32,6 +32,7 @@
#include_next <fcntl.h>
-#define open64 open
+#define open64(...) open(__VA_ARGS__)
+#define openat64(...) openat(__VA_ARGS__)
#endif
diff --git a/cddl/compat/opensolaris/include/mnttab.h b/cddl/compat/opensolaris/include/mnttab.h
index a18dd8d..227196a 100644
--- a/cddl/compat/opensolaris/include/mnttab.h
+++ b/cddl/compat/opensolaris/include/mnttab.h
@@ -12,6 +12,10 @@
#define MNTTAB _PATH_DEVZERO
#define MNT_LINE_MAX 1024
+#define MS_OVERLAY 0x0
+#define MS_NOMNTTAB 0x0
+#define MS_RDONLY 0x1
+
#define umount2(p, f) unmount(p, f)
struct mnttab {
diff --git a/cddl/compat/opensolaris/include/priv.h b/cddl/compat/opensolaris/include/priv.h
index 32696ae..2fee5b0 100644
--- a/cddl/compat/opensolaris/include/priv.h
+++ b/cddl/compat/opensolaris/include/priv.h
@@ -10,7 +10,7 @@
#define PRIV_SYS_CONFIG 0
static __inline int
-priv_ineffect(priv)
+priv_ineffect(int priv)
{
assert(priv == PRIV_SYS_CONFIG);
diff --git a/cddl/compat/opensolaris/include/sha2.h b/cddl/compat/opensolaris/include/sha2.h
new file mode 100644
index 0000000..488f2db
--- /dev/null
+++ b/cddl/compat/opensolaris/include/sha2.h
@@ -0,0 +1,38 @@
+/*-
+ * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SHA2_H_
+#define _OPENSOLARIS_SHA2_H_
+
+#include_next <sha256.h>
+
+#define SHA256Init(c) SHA256_Init(c)
+#define SHA256Update(c, d, s) SHA256_Update((c), (d), (s))
+#define SHA256Final(b, c) SHA256_Final((unsigned char *)(b), (c))
+
+#endif /* !_OPENSOLARIS_SHA2_H_ */
diff --git a/cddl/compat/opensolaris/include/solaris.h b/cddl/compat/opensolaris/include/solaris.h
index 01f9d47..9bead01 100644
--- a/cddl/compat/opensolaris/include/solaris.h
+++ b/cddl/compat/opensolaris/include/solaris.h
@@ -5,6 +5,10 @@
#include <sys/ccompile.h>
-#define dirent64 dirent
+#include <fcntl.h>
+
+#define NOTE(s)
+
+int mkdirp(const char *, mode_t);
#endif /* !_SOLARIS_H_ */
diff --git a/cddl/compat/opensolaris/include/thread_pool.h b/cddl/compat/opensolaris/include/thread_pool.h
new file mode 100644
index 0000000..25ac55d
--- /dev/null
+++ b/cddl/compat/opensolaris/include/thread_pool.h
@@ -0,0 +1,39 @@
+/*-
+ * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_THREAD_POOL_H_
+#define _OPENSOLARIS_THREAD_POOL_H_
+
+typedef int tpool_t;
+
+#define tpool_create(a, b, c, d) (0)
+#define tpool_dispatch(pool, func, arg) func(arg)
+#define tpool_wait(pool) do { } while (0)
+#define tpool_destroy(pool) do { } while (0)
+
+#endif /* !_OPENSOLARIS_THREAD_POOL_H_ */
diff --git a/cddl/compat/opensolaris/misc/fsshare.c b/cddl/compat/opensolaris/misc/fsshare.c
index 10ed591..e8faa92 100644
--- a/cddl/compat/opensolaris/misc/fsshare.c
+++ b/cddl/compat/opensolaris/misc/fsshare.c
@@ -28,15 +28,17 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <string.h>
+
+#include <assert.h>
#include <errno.h>
+#include <fcntl.h>
+#include <fsshare.h>
#include <libutil.h>
-#include <assert.h>
#include <pathnames.h> /* _PATH_MOUNTDPID */
-#include <fsshare.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
#define FILE_HEADER "# !!! DO NOT EDIT THIS FILE MANUALLY !!!\n\n"
#define OPTSSIZE 1024
diff --git a/cddl/compat/opensolaris/misc/zmount.c b/cddl/compat/opensolaris/misc/zmount.c
index 493a4fc..b4f99e3 100644
--- a/cddl/compat/opensolaris/misc/zmount.c
+++ b/cddl/compat/opensolaris/misc/zmount.c
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <mnttab.h>
static void
build_iovec(struct iovec **iov, int *iovlen, const char *name, void *val,
@@ -78,7 +79,7 @@ zmount(const char *spec, const char *dir, int mflag, char *fstype,
assert(spec != NULL);
assert(dir != NULL);
- assert(mflag == 0);
+ assert(mflag == 0 || mflag == MS_RDONLY);
assert(fstype != NULL);
assert(strcmp(fstype, MNTTYPE_ZFS) == 0);
assert(dataptr == NULL);
@@ -91,6 +92,8 @@ zmount(const char *spec, const char *dir, int mflag, char *fstype,
iov = NULL;
iovlen = 0;
+ if (mflag & MS_RDONLY)
+ build_iovec(&iov, &iovlen, "ro", NULL, 0);
build_iovec(&iov, &iovlen, "fstype", fstype, (size_t)-1);
build_iovec(&iov, &iovlen, "fspath", __DECONST(char *, dir),
(size_t)-1);
diff --git a/cddl/contrib/opensolaris/cmd/stat/common/statcommon.h b/cddl/contrib/opensolaris/cmd/stat/common/statcommon.h
new file mode 100644
index 0000000..f82495f
--- /dev/null
+++ b/cddl/contrib/opensolaris/cmd/stat/common/statcommon.h
@@ -0,0 +1,50 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Common routines for acquiring snapshots of kstats for
+ * iostat, mpstat, and vmstat.
+ */
+
+#ifndef _STATCOMMON_H
+#define _STATCOMMON_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <time.h>
+
+#define NODATE 0 /* Default: No time stamp */
+#define DDATE 1 /* Standard date format */
+#define UDATE 2 /* Internal representation of Unix time */
+
+/* Print a timestamp in either Unix or standard format. */
+void print_timestamp(uint_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _STATCOMMON_H */
diff --git a/cddl/contrib/opensolaris/cmd/stat/common/timestamp.c b/cddl/contrib/opensolaris/cmd/stat/common/timestamp.c
new file mode 100644
index 0000000..be7b30c
--- /dev/null
+++ b/cddl/contrib/opensolaris/cmd/stat/common/timestamp.c
@@ -0,0 +1,49 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include "statcommon.h"
+
+#include <langinfo.h>
+
+/*
+ * Print timestamp as decimal reprentation of time_t value (-T u was specified)
+ * or in date(1) format (-T d was specified).
+ */
+void
+print_timestamp(uint_t timestamp_fmt)
+{
+ time_t t = time(NULL);
+
+ if (timestamp_fmt == UDATE) {
+ (void) printf("%ld\n", t);
+ } else if (timestamp_fmt == DDATE) {
+ char dstr[64];
+ int len;
+
+ len = strftime(dstr, sizeof (dstr), "%+", localtime(&t));
+ if (len > 0)
+ (void) printf("%s\n", dstr);
+ }
+}
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.c b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
index 915ea19..c6e219d 100644
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb.c
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <stdio.h>
@@ -34,6 +33,9 @@
#include <sys/zap.h>
#include <sys/fs/zfs.h>
#include <sys/zfs_znode.h>
+#include <sys/zfs_sa.h>
+#include <sys/sa.h>
+#include <sys/sa_impl.h>
#include <sys/vdev.h>
#include <sys/vdev_impl.h>
#include <sys/metaslab_impl.h>
@@ -51,10 +53,25 @@
#include <sys/zio_compress.h>
#include <sys/zfs_fuid.h>
#include <sys/arc.h>
+#include <sys/ddt.h>
#undef ZFS_MAXNAMELEN
#undef verify
#include <libzfs.h>
+#define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
+ zio_compress_table[(idx)].ci_name : "UNKNOWN")
+#define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
+ zio_checksum_table[(idx)].ci_name : "UNKNOWN")
+#define ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \
+ dmu_ot[(idx)].ot_name : "UNKNOWN")
+#define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : DMU_OT_NUMTYPES)
+
+#ifndef lint
+extern int zfs_recover;
+#else
+int zfs_recover;
+#endif
+
const char cmdname[] = "zdb";
uint8_t dump_opt[256];
@@ -64,8 +81,6 @@ extern void dump_intent_log(zilog_t *);
uint64_t *zopt_object = NULL;
int zopt_objects = 0;
libzfs_handle_t *g_zfs;
-boolean_t zdb_sig_user_data = B_TRUE;
-int zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
/*
* These libumem hooks provide a reasonable set of defaults for the allocator's
@@ -87,39 +102,56 @@ static void
usage(void)
{
(void) fprintf(stderr,
- "Usage: %s [-udibcsvL] [-U cachefile_path] [-t txg]\n"
- "\t [-S user:cksumalg] "
- "dataset [object...]\n"
- " %s -C [pool]\n"
- " %s -l dev\n"
- " %s -R pool:vdev:offset:size:flags\n"
- " %s [-p path_to_vdev_dir]\n"
- " %s -e pool | GUID | devid ...\n",
- cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
-
- (void) fprintf(stderr, " -u uberblock\n");
- (void) fprintf(stderr, " -d datasets\n");
- (void) fprintf(stderr, " -C cached pool configuration\n");
- (void) fprintf(stderr, " -i intent logs\n");
- (void) fprintf(stderr, " -b block statistics\n");
- (void) fprintf(stderr, " -m metaslabs\n");
- (void) fprintf(stderr, " -c checksum all metadata (twice for "
+ "Usage: %s [-CumdibcsDvhL] poolname [object...]\n"
+ " %s [-div] dataset [object...]\n"
+ " %s -m [-L] poolname [vdev [metaslab...]]\n"
+ " %s -R poolname vdev:offset:size[:flags]\n"
+ " %s -S poolname\n"
+ " %s -l [-u] device\n"
+ " %s -C\n\n",
+ cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
+
+ (void) fprintf(stderr, " Dataset name must include at least one "
+ "separator character '/' or '@'\n");
+ (void) fprintf(stderr, " If dataset name is specified, only that "
+ "dataset is dumped\n");
+ (void) fprintf(stderr, " If object numbers are specified, only "
+ "those objects are dumped\n\n");
+ (void) fprintf(stderr, " Options to control amount of output:\n");
+ (void) fprintf(stderr, " -u uberblock\n");
+ (void) fprintf(stderr, " -d dataset(s)\n");
+ (void) fprintf(stderr, " -i intent logs\n");
+ (void) fprintf(stderr, " -C config (or cachefile if alone)\n");
+ (void) fprintf(stderr, " -h pool history\n");
+ (void) fprintf(stderr, " -b block statistics\n");
+ (void) fprintf(stderr, " -m metaslabs\n");
+ (void) fprintf(stderr, " -c checksum all metadata (twice for "
"all data) blocks\n");
- (void) fprintf(stderr, " -s report stats on zdb's I/O\n");
- (void) fprintf(stderr, " -S <user|all>:<cksum_alg|all> -- "
- "dump blkptr signatures\n");
- (void) fprintf(stderr, " -v verbose (applies to all others)\n");
+ (void) fprintf(stderr, " -s report stats on zdb's I/O\n");
+ (void) fprintf(stderr, " -D dedup statistics\n");
+ (void) fprintf(stderr, " -S simulate dedup to measure effect\n");
+ (void) fprintf(stderr, " -v verbose (applies to all others)\n");
(void) fprintf(stderr, " -l dump label contents\n");
(void) fprintf(stderr, " -L disable leak tracking (do not "
"load spacemaps)\n");
- (void) fprintf(stderr, " -U cachefile_path -- use alternate "
- "cachefile\n");
(void) fprintf(stderr, " -R read and display block from a "
- "device\n");
- (void) fprintf(stderr, " -e Pool is exported/destroyed/"
- "has altroot\n");
- (void) fprintf(stderr, " -p <Path to vdev dir> (use with -e)\n");
- (void) fprintf(stderr, " -t <txg> highest txg to use when "
+ "device\n\n");
+ (void) fprintf(stderr, " Below options are intended for use "
+ "with other options (except -l):\n");
+ (void) fprintf(stderr, " -A ignore assertions (-A), enable "
+ "panic recovery (-AA) or both (-AAA)\n");
+ (void) fprintf(stderr, " -F attempt automatic rewind within "
+ "safe range of transaction groups\n");
+ (void) fprintf(stderr, " -U <cachefile_path> -- use alternate "
+ "cachefile\n");
+ (void) fprintf(stderr, " -X attempt extreme rewind (does not "
+ "work with dataset)\n");
+ (void) fprintf(stderr, " -e pool is exported/destroyed/"
+ "has altroot/not in a cachefile\n");
+ (void) fprintf(stderr, " -p <path> -- use one or more with "
+ "-e to specify path to vdev dir\n");
+ (void) fprintf(stderr, " -P print numbers parsable\n");
+ (void) fprintf(stderr, " -t <txg> -- highest txg to use when "
"searching for uberblocks\n");
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
"to make only that option verbose\n");
@@ -146,68 +178,6 @@ fatal(const char *fmt, ...)
exit(1);
}
-static void
-dump_nvlist(nvlist_t *list, int indent)
-{
- nvpair_t *elem = NULL;
-
- while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
- switch (nvpair_type(elem)) {
- case DATA_TYPE_STRING:
- {
- char *value;
-
- VERIFY(nvpair_value_string(elem, &value) == 0);
- (void) printf("%*s%s='%s'\n", indent, "",
- nvpair_name(elem), value);
- }
- break;
-
- case DATA_TYPE_UINT64:
- {
- uint64_t value;
-
- VERIFY(nvpair_value_uint64(elem, &value) == 0);
- (void) printf("%*s%s=%llu\n", indent, "",
- nvpair_name(elem), (u_longlong_t)value);
- }
- break;
-
- case DATA_TYPE_NVLIST:
- {
- nvlist_t *value;
-
- VERIFY(nvpair_value_nvlist(elem, &value) == 0);
- (void) printf("%*s%s\n", indent, "",
- nvpair_name(elem));
- dump_nvlist(value, indent + 4);
- }
- break;
-
- case DATA_TYPE_NVLIST_ARRAY:
- {
- nvlist_t **value;
- uint_t c, count;
-
- VERIFY(nvpair_value_nvlist_array(elem, &value,
- &count) == 0);
-
- for (c = 0; c < count; c++) {
- (void) printf("%*s%s[%u]\n", indent, "",
- nvpair_name(elem), c);
- dump_nvlist(value[c], indent + 8);
- }
- }
- break;
-
- default:
-
- (void) printf("bad config type %d for %s\n",
- nvpair_type(elem), nvpair_name(elem));
- }
- }
-}
-
/* ARGSUSED */
static void
dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
@@ -227,6 +197,15 @@ dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
nvlist_free(nv);
}
+static void
+zdb_nicenum(uint64_t num, char *buf)
+{
+ if (dump_opt['P'])
+ (void) sprintf(buf, "%llu", (longlong_t)num);
+ else
+ nicenum(num, buf);
+}
+
const char dump_zap_stars[] = "****************************************";
const int dump_zap_width = sizeof (dump_zap_stars) - 1;
@@ -325,6 +304,13 @@ dump_none(objset_t *os, uint64_t object, void *data, size_t size)
}
/*ARGSUSED*/
+static void
+dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
+{
+ (void) printf("\tUNKNOWN OBJECT TYPE\n");
+}
+
+/*ARGSUSED*/
void
dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
{
@@ -388,6 +374,79 @@ dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
/*ARGSUSED*/
static void
+dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
+{
+ dump_zap_stats(os, object);
+ /* contents are printed elsewhere, properly decoded */
+}
+
+/*ARGSUSED*/
+static void
+dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
+{
+ zap_cursor_t zc;
+ zap_attribute_t attr;
+
+ dump_zap_stats(os, object);
+ (void) printf("\n");
+
+ for (zap_cursor_init(&zc, os, object);
+ zap_cursor_retrieve(&zc, &attr) == 0;
+ zap_cursor_advance(&zc)) {
+ (void) printf("\t\t%s = ", attr.za_name);
+ if (attr.za_num_integers == 0) {
+ (void) printf("\n");
+ continue;
+ }
+ (void) printf(" %llx : [%d:%d:%d]\n",
+ (u_longlong_t)attr.za_first_integer,
+ (int)ATTR_LENGTH(attr.za_first_integer),
+ (int)ATTR_BSWAP(attr.za_first_integer),
+ (int)ATTR_NUM(attr.za_first_integer));
+ }
+ zap_cursor_fini(&zc);
+}
+
+/*ARGSUSED*/
+static void
+dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
+{
+ zap_cursor_t zc;
+ zap_attribute_t attr;
+ uint16_t *layout_attrs;
+ int i;
+
+ dump_zap_stats(os, object);
+ (void) printf("\n");
+
+ for (zap_cursor_init(&zc, os, object);
+ zap_cursor_retrieve(&zc, &attr) == 0;
+ zap_cursor_advance(&zc)) {
+ (void) printf("\t\t%s = [", attr.za_name);
+ if (attr.za_num_integers == 0) {
+ (void) printf("\n");
+ continue;
+ }
+
+ VERIFY(attr.za_integer_length == 2);
+ layout_attrs = umem_zalloc(attr.za_num_integers *
+ attr.za_integer_length, UMEM_NOFAIL);
+
+ VERIFY(zap_lookup(os, object, attr.za_name,
+ attr.za_integer_length,
+ attr.za_num_integers, layout_attrs) == 0);
+
+ for (i = 0; i != attr.za_num_integers; i++)
+ (void) printf(" %d ", (int)layout_attrs[i]);
+ (void) printf("]\n");
+ umem_free(layout_attrs,
+ attr.za_num_integers * attr.za_integer_length);
+ }
+ zap_cursor_fini(&zc);
+}
+
+/*ARGSUSED*/
+static void
dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
{
zap_cursor_t zc;
@@ -441,17 +500,17 @@ dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
*/
alloc = 0;
for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
- VERIFY(0 == dmu_read(os, smo->smo_object, offset,
+ VERIFY3U(0, ==, dmu_read(os, smo->smo_object, offset,
sizeof (entry), &entry, DMU_READ_PREFETCH));
if (SM_DEBUG_DECODE(entry)) {
- (void) printf("\t\t[%4llu] %s: txg %llu, pass %llu\n",
+ (void) printf("\t [%6llu] %s: txg %llu, pass %llu\n",
(u_longlong_t)(offset / sizeof (entry)),
ddata[SM_DEBUG_ACTION_DECODE(entry)],
(u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
(u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
} else {
- (void) printf("\t\t[%4llu] %c range:"
- " %08llx-%08llx size: %06llx\n",
+ (void) printf("\t [%6llu] %c range:"
+ " %010llx-%010llx size: %06llx\n",
(u_longlong_t)(offset / sizeof (entry)),
SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
(u_longlong_t)((SM_OFFSET_DECODE(entry) <<
@@ -476,14 +535,14 @@ dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
static void
dump_metaslab_stats(metaslab_t *msp)
{
- char maxbuf[5];
+ char maxbuf[32];
space_map_t *sm = &msp->ms_map;
avl_tree_t *t = sm->sm_pp_root;
int free_pct = sm->sm_space * 100 / sm->sm_size;
- nicenum(space_map_maxsize(sm), maxbuf);
+ zdb_nicenum(space_map_maxsize(sm), maxbuf);
- (void) printf("\t %20s %10lu %7s %6s %4s %4d%%\n",
+ (void) printf("\t %25s %10lu %7s %6s %4s %4d%%\n",
"segments", avl_numnodes(t), "maxsize", maxbuf,
"freepct", free_pct);
}
@@ -495,16 +554,16 @@ dump_metaslab(metaslab_t *msp)
spa_t *spa = vd->vdev_spa;
space_map_t *sm = &msp->ms_map;
space_map_obj_t *smo = &msp->ms_smo;
- char freebuf[5];
+ char freebuf[32];
- nicenum(sm->sm_size - smo->smo_alloc, freebuf);
+ zdb_nicenum(sm->sm_size - smo->smo_alloc, freebuf);
(void) printf(
- "\tvdev %5llu offset %12llx spacemap %6llu free %5s\n",
+ "\tmetaslab %6llu offset %12llx spacemap %6llu free %5s\n",
(u_longlong_t)(sm->sm_start / sm->sm_size),
(u_longlong_t)sm->sm_start, (u_longlong_t)smo->smo_object, freebuf);
- if (dump_opt['m'] > 1) {
+ if (dump_opt['m'] > 1 && !dump_opt['L']) {
mutex_enter(&msp->ms_lock);
space_map_load_wait(sm);
if (!sm->sm_loaded)
@@ -525,22 +584,52 @@ dump_metaslab(metaslab_t *msp)
}
static void
+print_vdev_metaslab_header(vdev_t *vd)
+{
+ (void) printf("\tvdev %10llu\n\t%-10s%5llu %-19s %-15s %-10s\n",
+ (u_longlong_t)vd->vdev_id,
+ "metaslabs", (u_longlong_t)vd->vdev_ms_count,
+ "offset", "spacemap", "free");
+ (void) printf("\t%15s %19s %15s %10s\n",
+ "---------------", "-------------------",
+ "---------------", "-------------");
+}
+
+static void
dump_metaslabs(spa_t *spa)
{
- vdev_t *rvd = spa->spa_root_vdev;
- vdev_t *vd;
- int c, m;
+ vdev_t *vd, *rvd = spa->spa_root_vdev;
+ uint64_t m, c = 0, children = rvd->vdev_children;
(void) printf("\nMetaslabs:\n");
- for (c = 0; c < rvd->vdev_children; c++) {
- vd = rvd->vdev_child[c];
+ if (!dump_opt['d'] && zopt_objects > 0) {
+ c = zopt_object[0];
+
+ if (c >= children)
+ (void) fatal("bad vdev id: %llu", (u_longlong_t)c);
- (void) printf("\t%-10s %-19s %-15s %-10s\n",
- "vdev", "offset", "spacemap", "free");
- (void) printf("\t%10s %19s %15s %10s\n",
- "----------", "-------------------",
- "---------------", "-------------");
+ if (zopt_objects > 1) {
+ vd = rvd->vdev_child[c];
+ print_vdev_metaslab_header(vd);
+
+ for (m = 1; m < zopt_objects; m++) {
+ if (zopt_object[m] < vd->vdev_ms_count)
+ dump_metaslab(
+ vd->vdev_ms[zopt_object[m]]);
+ else
+ (void) fprintf(stderr, "bad metaslab "
+ "number %llu\n",
+ (u_longlong_t)zopt_object[m]);
+ }
+ (void) printf("\n");
+ return;
+ }
+ children = c + 1;
+ }
+ for (; c < children; c++) {
+ vd = rvd->vdev_child[c];
+ print_vdev_metaslab_header(vd);
for (m = 0; m < vd->vdev_ms_count; m++)
dump_metaslab(vd->vdev_ms[m]);
@@ -549,6 +638,133 @@ dump_metaslabs(spa_t *spa)
}
static void
+dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
+{
+ const ddt_phys_t *ddp = dde->dde_phys;
+ const ddt_key_t *ddk = &dde->dde_key;
+ char *types[4] = { "ditto", "single", "double", "triple" };
+ char blkbuf[BP_SPRINTF_LEN];
+ blkptr_t blk;
+
+ for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
+ if (ddp->ddp_phys_birth == 0)
+ continue;
+ ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
+ sprintf_blkptr(blkbuf, &blk);
+ (void) printf("index %llx refcnt %llu %s %s\n",
+ (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
+ types[p], blkbuf);
+ }
+}
+
+static void
+dump_dedup_ratio(const ddt_stat_t *dds)
+{
+ double rL, rP, rD, D, dedup, compress, copies;
+
+ if (dds->dds_blocks == 0)
+ return;
+
+ rL = (double)dds->dds_ref_lsize;
+ rP = (double)dds->dds_ref_psize;
+ rD = (double)dds->dds_ref_dsize;
+ D = (double)dds->dds_dsize;
+
+ dedup = rD / D;
+ compress = rL / rP;
+ copies = rD / rP;
+
+ (void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
+ "dedup * compress / copies = %.2f\n\n",
+ dedup, compress, copies, dedup * compress / copies);
+}
+
+static void
+dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
+{
+ char name[DDT_NAMELEN];
+ ddt_entry_t dde;
+ uint64_t walk = 0;
+ dmu_object_info_t doi;
+ uint64_t count, dspace, mspace;
+ int error;
+
+ error = ddt_object_info(ddt, type, class, &doi);
+
+ if (error == ENOENT)
+ return;
+ ASSERT(error == 0);
+
+ if ((count = ddt_object_count(ddt, type, class)) == 0)
+ return;
+
+ dspace = doi.doi_physical_blocks_512 << 9;
+ mspace = doi.doi_fill_count * doi.doi_data_block_size;
+
+ ddt_object_name(ddt, type, class, name);
+
+ (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
+ name,
+ (u_longlong_t)count,
+ (u_longlong_t)(dspace / count),
+ (u_longlong_t)(mspace / count));
+
+ if (dump_opt['D'] < 3)
+ return;
+
+ zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
+
+ if (dump_opt['D'] < 4)
+ return;
+
+ if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
+ return;
+
+ (void) printf("%s contents:\n\n", name);
+
+ while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
+ dump_dde(ddt, &dde, walk);
+
+ ASSERT(error == ENOENT);
+
+ (void) printf("\n");
+}
+
+static void
+dump_all_ddts(spa_t *spa)
+{
+ ddt_histogram_t ddh_total = { 0 };
+ ddt_stat_t dds_total = { 0 };
+
+ for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
+ ddt_t *ddt = spa->spa_ddt[c];
+ for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
+ for (enum ddt_class class = 0; class < DDT_CLASSES;
+ class++) {
+ dump_ddt(ddt, type, class);
+ }
+ }
+ }
+
+ ddt_get_dedup_stats(spa, &dds_total);
+
+ if (dds_total.dds_blocks == 0) {
+ (void) printf("All DDTs are empty\n");
+ return;
+ }
+
+ (void) printf("\n");
+
+ if (dump_opt['D'] > 1) {
+ (void) printf("DDT histogram (aggregated over all DDTs):\n");
+ ddt_get_dedup_histogram(spa, &ddh_total);
+ zpool_dump_ddt(&dds_total, &ddh_total);
+ }
+
+ dump_dedup_ratio(&dds_total);
+}
+
+static void
dump_dtl_seg(space_map_t *sm, uint64_t start, uint64_t size)
{
char *prefix = (void *)sm;
@@ -568,7 +784,7 @@ dump_dtl(vdev_t *vd, int indent)
char *name[DTL_TYPES] = { "missing", "partial", "scrub", "outage" };
char prefix[256];
- spa_vdev_state_enter(spa);
+ spa_vdev_state_enter(spa, SCL_NONE);
required = vdev_dtl_required(vd);
(void) spa_vdev_state_exit(spa, NULL, 0);
@@ -598,6 +814,68 @@ dump_dtl(vdev_t *vd, int indent)
dump_dtl(vd->vdev_child[c], indent + 4);
}
+static void
+dump_history(spa_t *spa)
+{
+ nvlist_t **events = NULL;
+ char buf[SPA_MAXBLOCKSIZE];
+ uint64_t resid, len, off = 0;
+ uint_t num = 0;
+ int error;
+ time_t tsec;
+ struct tm t;
+ char tbuf[30];
+ char internalstr[MAXPATHLEN];
+
+ do {
+ len = sizeof (buf);
+
+ if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
+ (void) fprintf(stderr, "Unable to read history: "
+ "error %d\n", error);
+ return;
+ }
+
+ if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
+ break;
+
+ off -= resid;
+ } while (len != 0);
+
+ (void) printf("\nHistory:\n");
+ for (int i = 0; i < num; i++) {
+ uint64_t time, txg, ievent;
+ char *cmd, *intstr;
+
+ if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
+ &time) != 0)
+ continue;
+ if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
+ &cmd) != 0) {
+ if (nvlist_lookup_uint64(events[i],
+ ZPOOL_HIST_INT_EVENT, &ievent) != 0)
+ continue;
+ verify(nvlist_lookup_uint64(events[i],
+ ZPOOL_HIST_TXG, &txg) == 0);
+ verify(nvlist_lookup_string(events[i],
+ ZPOOL_HIST_INT_STR, &intstr) == 0);
+ if (ievent >= LOG_END)
+ continue;
+
+ (void) snprintf(internalstr,
+ sizeof (internalstr),
+ "[internal %s txg:%lld] %s",
+ zfs_history_event_names[ievent], txg,
+ intstr);
+ cmd = internalstr;
+ }
+ tsec = time;
+ (void) localtime_r(&tsec, &t);
+ (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
+ (void) printf("%s %s\n", tbuf, cmd);
+ }
+}
+
/*ARGSUSED*/
static void
dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
@@ -605,35 +883,48 @@ dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
}
static uint64_t
-blkid2offset(const dnode_phys_t *dnp, int level, uint64_t blkid)
+blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb)
{
- if (level < 0)
- return (blkid);
+ if (dnp == NULL) {
+ ASSERT(zb->zb_level < 0);
+ if (zb->zb_object == 0)
+ return (zb->zb_blkid);
+ return (zb->zb_blkid * BP_GET_LSIZE(bp));
+ }
- return ((blkid << (level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
+ ASSERT(zb->zb_level >= 0);
+
+ return ((zb->zb_blkid <<
+ (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
}
static void
-sprintf_blkptr_compact(char *blkbuf, blkptr_t *bp, int alldvas)
+sprintf_blkptr_compact(char *blkbuf, const blkptr_t *bp)
{
- dva_t *dva = bp->blk_dva;
- int ndvas = alldvas ? BP_GET_NDVAS(bp) : 1;
- int i;
+ const dva_t *dva = bp->blk_dva;
+ int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
+
+ if (dump_opt['b'] >= 5) {
+ sprintf_blkptr(blkbuf, bp);
+ return;
+ }
blkbuf[0] = '\0';
- for (i = 0; i < ndvas; i++)
+ for (int i = 0; i < ndvas; i++)
(void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
(u_longlong_t)DVA_GET_VDEV(&dva[i]),
(u_longlong_t)DVA_GET_OFFSET(&dva[i]),
(u_longlong_t)DVA_GET_ASIZE(&dva[i]));
- (void) sprintf(blkbuf + strlen(blkbuf), "%llxL/%llxP F=%llu B=%llu",
+ (void) sprintf(blkbuf + strlen(blkbuf),
+ "%llxL/%llxP F=%llu B=%llu/%llu",
(u_longlong_t)BP_GET_LSIZE(bp),
(u_longlong_t)BP_GET_PSIZE(bp),
(u_longlong_t)bp->blk_fill,
- (u_longlong_t)bp->blk_birth);
+ (u_longlong_t)bp->blk_birth,
+ (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
}
static void
@@ -646,8 +937,7 @@ print_indirect(blkptr_t *bp, const zbookmark_t *zb,
ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
- (void) printf("%16llx ",
- (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid));
+ (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
ASSERT(zb->zb_level >= 0);
@@ -659,23 +949,15 @@ print_indirect(blkptr_t *bp, const zbookmark_t *zb,
}
}
- sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
+ sprintf_blkptr_compact(blkbuf, bp);
(void) printf("%s\n", blkbuf);
}
-#define SET_BOOKMARK(zb, objset, object, level, blkid) \
-{ \
- (zb)->zb_objset = objset; \
- (zb)->zb_object = object; \
- (zb)->zb_level = level; \
- (zb)->zb_blkid = blkid; \
-}
-
static int
visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
blkptr_t *bp, const zbookmark_t *zb)
{
- int err;
+ int err = 0;
if (bp->blk_birth == 0)
return (0);
@@ -694,6 +976,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err)
return (err);
+ ASSERT(buf->b_data);
/* recursively visit blocks below this */
cbp = buf->b_data;
@@ -726,11 +1009,11 @@ dump_indirect(dnode_t *dn)
(void) printf("Indirect blocks:\n");
- SET_BOOKMARK(&czb, dmu_objset_id(&dn->dn_objset->os),
+ SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
dn->dn_object, dnp->dn_nlevels - 1, 0);
for (j = 0; j < dnp->dn_nblkptr; j++) {
czb.zb_blkid = j;
- (void) visit_indirect(dmu_objset_spa(&dn->dn_objset->os), dnp,
+ (void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
&dnp->dn_blkptr[j], &czb);
}
@@ -743,7 +1026,7 @@ dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
{
dsl_dir_phys_t *dd = data;
time_t crtime;
- char nice[6];
+ char nice[32];
if (dd == NULL)
return;
@@ -760,15 +1043,15 @@ dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
(u_longlong_t)dd->dd_origin_obj);
(void) printf("\t\tchild_dir_zapobj = %llu\n",
(u_longlong_t)dd->dd_child_dir_zapobj);
- nicenum(dd->dd_used_bytes, nice);
+ zdb_nicenum(dd->dd_used_bytes, nice);
(void) printf("\t\tused_bytes = %s\n", nice);
- nicenum(dd->dd_compressed_bytes, nice);
+ zdb_nicenum(dd->dd_compressed_bytes, nice);
(void) printf("\t\tcompressed_bytes = %s\n", nice);
- nicenum(dd->dd_uncompressed_bytes, nice);
+ zdb_nicenum(dd->dd_uncompressed_bytes, nice);
(void) printf("\t\tuncompressed_bytes = %s\n", nice);
- nicenum(dd->dd_quota, nice);
+ zdb_nicenum(dd->dd_quota, nice);
(void) printf("\t\tquota = %s\n", nice);
- nicenum(dd->dd_reserved, nice);
+ zdb_nicenum(dd->dd_reserved, nice);
(void) printf("\t\treserved = %s\n", nice);
(void) printf("\t\tprops_zapobj = %llu\n",
(u_longlong_t)dd->dd_props_zapobj);
@@ -778,7 +1061,7 @@ dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
(u_longlong_t)dd->dd_flags);
#define DO(which) \
- nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
+ zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
(void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
DO(HEAD);
DO(SNAP);
@@ -794,7 +1077,7 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
{
dsl_dataset_phys_t *ds = data;
time_t crtime;
- char used[6], compressed[6], uncompressed[6], unique[6];
+ char used[32], compressed[32], uncompressed[32], unique[32];
char blkbuf[BP_SPRINTF_LEN];
if (ds == NULL)
@@ -802,11 +1085,11 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
ASSERT(size == sizeof (*ds));
crtime = ds->ds_creation_time;
- nicenum(ds->ds_used_bytes, used);
- nicenum(ds->ds_compressed_bytes, compressed);
- nicenum(ds->ds_uncompressed_bytes, uncompressed);
- nicenum(ds->ds_unique_bytes, unique);
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ds->ds_bp);
+ zdb_nicenum(ds->ds_used_bytes, used);
+ zdb_nicenum(ds->ds_compressed_bytes, compressed);
+ zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed);
+ zdb_nicenum(ds->ds_unique_bytes, unique);
+ sprintf_blkptr(blkbuf, &ds->ds_bp);
(void) printf("\t\tdir_obj = %llu\n",
(u_longlong_t)ds->ds_dir_obj);
@@ -820,6 +1103,8 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
(u_longlong_t)ds->ds_snapnames_zapobj);
(void) printf("\t\tnum_children = %llu\n",
(u_longlong_t)ds->ds_num_children);
+ (void) printf("\t\tuserrefs_obj = %llu\n",
+ (u_longlong_t)ds->ds_userrefs_obj);
(void) printf("\t\tcreation_time = %s", ctime(&crtime));
(void) printf("\t\tcreation_txg = %llu\n",
(u_longlong_t)ds->ds_creation_txg);
@@ -842,63 +1127,88 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
(void) printf("\t\tbp = %s\n", blkbuf);
}
+/* ARGSUSED */
+static int
+dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+{
+ char blkbuf[BP_SPRINTF_LEN];
+
+ ASSERT(bp->blk_birth != 0);
+ sprintf_blkptr_compact(blkbuf, bp);
+ (void) printf("\t%s\n", blkbuf);
+ return (0);
+}
+
static void
-dump_bplist(objset_t *mos, uint64_t object, char *name)
+dump_bpobj(bpobj_t *bpo, char *name)
{
- bplist_t bpl = { 0 };
- blkptr_t blk, *bp = &blk;
- uint64_t itor = 0;
- char bytes[6];
- char comp[6];
- char uncomp[6];
+ char bytes[32];
+ char comp[32];
+ char uncomp[32];
if (dump_opt['d'] < 3)
return;
- mutex_init(&bpl.bpl_lock, NULL, MUTEX_DEFAULT, NULL);
- VERIFY(0 == bplist_open(&bpl, mos, object));
- if (bplist_empty(&bpl)) {
- bplist_close(&bpl);
- mutex_destroy(&bpl.bpl_lock);
- return;
- }
-
- nicenum(bpl.bpl_phys->bpl_bytes, bytes);
- if (bpl.bpl_dbuf->db_size == sizeof (bplist_phys_t)) {
- nicenum(bpl.bpl_phys->bpl_comp, comp);
- nicenum(bpl.bpl_phys->bpl_uncomp, uncomp);
- (void) printf("\n %s: %llu entries, %s (%s/%s comp)\n",
- name, (u_longlong_t)bpl.bpl_phys->bpl_entries,
+ zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes);
+ if (bpo->bpo_havesubobj) {
+ zdb_nicenum(bpo->bpo_phys->bpo_comp, comp);
+ zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp);
+ (void) printf("\n %s: %llu local blkptrs, %llu subobjs, "
+ "%s (%s/%s comp)\n",
+ name, (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
+ (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
bytes, comp, uncomp);
} else {
- (void) printf("\n %s: %llu entries, %s\n",
- name, (u_longlong_t)bpl.bpl_phys->bpl_entries, bytes);
+ (void) printf("\n %s: %llu blkptrs, %s\n",
+ name, (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, bytes);
}
- if (dump_opt['d'] < 5) {
- bplist_close(&bpl);
- mutex_destroy(&bpl.bpl_lock);
+ if (dump_opt['d'] < 5)
return;
- }
(void) printf("\n");
- while (bplist_iterate(&bpl, &itor, bp) == 0) {
- char blkbuf[BP_SPRINTF_LEN];
+ (void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL);
+}
- ASSERT(bp->blk_birth != 0);
- sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
- (void) printf("\tItem %3llu: %s\n",
- (u_longlong_t)itor - 1, blkbuf);
- }
+static void
+dump_deadlist(dsl_deadlist_t *dl)
+{
+ dsl_deadlist_entry_t *dle;
+ char bytes[32];
+ char comp[32];
+ char uncomp[32];
+
+ if (dump_opt['d'] < 3)
+ return;
+
+ zdb_nicenum(dl->dl_phys->dl_used, bytes);
+ zdb_nicenum(dl->dl_phys->dl_comp, comp);
+ zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp);
+ (void) printf("\n Deadlist: %s (%s/%s comp)\n",
+ bytes, comp, uncomp);
- bplist_close(&bpl);
- mutex_destroy(&bpl.bpl_lock);
+ if (dump_opt['d'] < 4)
+ return;
+
+ (void) printf("\n");
+
+ for (dle = avl_first(&dl->dl_tree); dle;
+ dle = AVL_NEXT(&dl->dl_tree, dle)) {
+ (void) printf(" mintxg %llu -> obj %llu\n",
+ (longlong_t)dle->dle_mintxg,
+ (longlong_t)dle->dle_bpobj.bpo_object);
+
+ if (dump_opt['d'] >= 5)
+ dump_bpobj(&dle->dle_bpobj, "");
+ }
}
static avl_tree_t idx_tree;
static avl_tree_t domain_tree;
static boolean_t fuid_table_loaded;
+static boolean_t sa_loaded;
+sa_attr_type_t *sa_attr_table;
static void
fuid_table_destroy()
@@ -931,12 +1241,12 @@ print_idstr(uint64_t id, const char *id_type)
}
static void
-dump_uidgid(objset_t *os, znode_phys_t *zp)
+dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
{
uint32_t uid_idx, gid_idx;
- uid_idx = FUID_INDEX(zp->zp_uid);
- gid_idx = FUID_INDEX(zp->zp_gid);
+ uid_idx = FUID_INDEX(uid);
+ gid_idx = FUID_INDEX(gid);
/* Load domain table, if not already loaded */
if (!fuid_table_loaded && (uid_idx || gid_idx)) {
@@ -951,50 +1261,111 @@ dump_uidgid(objset_t *os, znode_phys_t *zp)
fuid_table_loaded = B_TRUE;
}
- print_idstr(zp->zp_uid, "uid");
- print_idstr(zp->zp_gid, "gid");
+ print_idstr(uid, "uid");
+ print_idstr(gid, "gid");
}
/*ARGSUSED*/
static void
dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
{
- znode_phys_t *zp = data;
- time_t z_crtime, z_atime, z_mtime, z_ctime;
char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */
+ sa_handle_t *hdl;
+ uint64_t xattr, rdev, gen;
+ uint64_t uid, gid, mode, fsize, parent, links;
+ uint64_t pflags;
+ uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
+ time_t z_crtime, z_atime, z_mtime, z_ctime;
+ sa_bulk_attr_t bulk[12];
+ int idx = 0;
int error;
- ASSERT(size >= sizeof (znode_phys_t));
+ if (!sa_loaded) {
+ uint64_t sa_attrs = 0;
+ uint64_t version;
+
+ VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
+ 8, 1, &version) == 0);
+ if (version >= ZPL_VERSION_SA) {
+ VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
+ 8, 1, &sa_attrs) == 0);
+ }
+ if ((error = sa_setup(os, sa_attrs, zfs_attr_table,
+ ZPL_END, &sa_attr_table)) != 0) {
+ (void) printf("sa_setup failed errno %d, can't "
+ "display znode contents\n", error);
+ return;
+ }
+ sa_loaded = B_TRUE;
+ }
+
+ if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
+ (void) printf("Failed to get handle for SA znode\n");
+ return;
+ }
+
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
+ &links, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
+ &mode, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
+ NULL, &parent, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
+ &fsize, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
+ acctm, 16);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
+ modtm, 16);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
+ crtm, 16);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
+ chgtm, 16);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL,
+ &pflags, 8);
+
+ if (sa_bulk_lookup(hdl, bulk, idx)) {
+ (void) sa_handle_destroy(hdl);
+ return;
+ }
error = zfs_obj_to_path(os, object, path, sizeof (path));
if (error != 0) {
(void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
(u_longlong_t)object);
}
-
if (dump_opt['d'] < 3) {
(void) printf("\t%s\n", path);
+ (void) sa_handle_destroy(hdl);
return;
}
- z_crtime = (time_t)zp->zp_crtime[0];
- z_atime = (time_t)zp->zp_atime[0];
- z_mtime = (time_t)zp->zp_mtime[0];
- z_ctime = (time_t)zp->zp_ctime[0];
+ z_crtime = (time_t)crtm[0];
+ z_atime = (time_t)acctm[0];
+ z_mtime = (time_t)modtm[0];
+ z_ctime = (time_t)chgtm[0];
(void) printf("\tpath %s\n", path);
- dump_uidgid(os, zp);
+ dump_uidgid(os, uid, gid);
(void) printf("\tatime %s", ctime(&z_atime));
(void) printf("\tmtime %s", ctime(&z_mtime));
(void) printf("\tctime %s", ctime(&z_ctime));
(void) printf("\tcrtime %s", ctime(&z_crtime));
- (void) printf("\tgen %llu\n", (u_longlong_t)zp->zp_gen);
- (void) printf("\tmode %llo\n", (u_longlong_t)zp->zp_mode);
- (void) printf("\tsize %llu\n", (u_longlong_t)zp->zp_size);
- (void) printf("\tparent %llu\n", (u_longlong_t)zp->zp_parent);
- (void) printf("\tlinks %llu\n", (u_longlong_t)zp->zp_links);
- (void) printf("\txattr %llu\n", (u_longlong_t)zp->zp_xattr);
- (void) printf("\trdev 0x%016llx\n", (u_longlong_t)zp->zp_rdev);
+ (void) printf("\tgen %llu\n", (u_longlong_t)gen);
+ (void) printf("\tmode %llo\n", (u_longlong_t)mode);
+ (void) printf("\tsize %llu\n", (u_longlong_t)fsize);
+ (void) printf("\tparent %llu\n", (u_longlong_t)parent);
+ (void) printf("\tlinks %llu\n", (u_longlong_t)links);
+ (void) printf("\tpflags %llx\n", (u_longlong_t)pflags);
+ if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
+ sizeof (uint64_t)) == 0)
+ (void) printf("\txattr %llu\n", (u_longlong_t)xattr);
+ if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
+ sizeof (uint64_t)) == 0)
+ (void) printf("\trdev 0x%016llx\n", (u_longlong_t)rdev);
+ sa_handle_destroy(hdl);
}
/*ARGSUSED*/
@@ -1009,7 +1380,7 @@ dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
{
}
-static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = {
+static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
dump_none, /* unallocated */
dump_zap, /* object directory */
dump_uint64, /* object array */
@@ -1051,6 +1422,20 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = {
dump_zap, /* DSL scrub queue */
dump_zap, /* ZFS user/group used */
dump_zap, /* ZFS user/group quota */
+ dump_zap, /* snapshot refcount tags */
+ dump_ddt_zap, /* DDT ZAP object */
+ dump_zap, /* DDT statistics */
+ dump_znode, /* SA object */
+ dump_zap, /* SA Master Node */
+ dump_sa_attrs, /* SA attribute registration */
+ dump_sa_layouts, /* SA attribute layouts */
+ dump_zap, /* DSL scrub translations */
+ dump_none, /* fake dedup BP */
+ dump_zap, /* deadlist */
+ dump_none, /* deadlist hdr */
+ dump_zap, /* dsl clones */
+ dump_none, /* bpobj subobjs */
+ dump_unknown, /* Unknown type, must be last */
};
static void
@@ -1061,18 +1446,20 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
dnode_t *dn;
void *bonus = NULL;
size_t bsize = 0;
- char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], segsize[6];
+ char iblk[32], dblk[32], lsize[32], asize[32], fill[32];
+ char bonus_size[32];
char aux[50];
int error;
if (*print_header) {
- (void) printf("\n Object lvl iblk dblk lsize"
- " asize type\n");
+ (void) printf("\n%10s %3s %5s %5s %5s %5s %6s %s\n",
+ "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
+ "%full", "type");
*print_header = 0;
}
if (object == 0) {
- dn = os->os->os_meta_dnode;
+ dn = DMU_META_DNODE(os);
} else {
error = dmu_bonus_hold(os, object, FTAG, &db);
if (error)
@@ -1080,50 +1467,55 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
object, error);
bonus = db->db_data;
bsize = db->db_size;
- dn = ((dmu_buf_impl_t *)db)->db_dnode;
+ dn = DB_DNODE((dmu_buf_impl_t *)db);
}
dmu_object_info_from_dnode(dn, &doi);
- nicenum(doi.doi_metadata_block_size, iblk);
- nicenum(doi.doi_data_block_size, dblk);
- nicenum(doi.doi_data_block_size * (doi.doi_max_block_offset + 1),
- lsize);
- nicenum(doi.doi_physical_blks << 9, asize);
- nicenum(doi.doi_bonus_size, bonus_size);
+ zdb_nicenum(doi.doi_metadata_block_size, iblk);
+ zdb_nicenum(doi.doi_data_block_size, dblk);
+ zdb_nicenum(doi.doi_max_offset, lsize);
+ zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize);
+ zdb_nicenum(doi.doi_bonus_size, bonus_size);
+ (void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
+ doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
+ doi.doi_max_offset);
aux[0] = '\0';
if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
(void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
- zio_checksum_table[doi.doi_checksum].ci_name);
+ ZDB_CHECKSUM_NAME(doi.doi_checksum));
}
if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
(void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
- zio_compress_table[doi.doi_compress].ci_name);
+ ZDB_COMPRESS_NAME(doi.doi_compress));
}
- (void) printf("%10lld %3u %5s %5s %5s %5s %s%s\n",
- (u_longlong_t)object, doi.doi_indirection, iblk, dblk, lsize,
- asize, dmu_ot[doi.doi_type].ot_name, aux);
+ (void) printf("%10lld %3u %5s %5s %5s %5s %6s %s%s\n",
+ (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
+ asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
- (void) printf("%10s %3s %5s %5s %5s %5s %s\n",
- "", "", "", "", bonus_size, "bonus",
- dmu_ot[doi.doi_bonus_type].ot_name);
+ (void) printf("%10s %3s %5s %5s %5s %5s %6s %s\n",
+ "", "", "", "", "", bonus_size, "bonus",
+ ZDB_OT_NAME(doi.doi_bonus_type));
}
if (verbosity >= 4) {
- (void) printf("\tdnode flags: %s%s\n",
+ (void) printf("\tdnode flags: %s%s%s\n",
(dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
"USED_BYTES " : "",
(dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
- "USERUSED_ACCOUNTED " : "");
+ "USERUSED_ACCOUNTED " : "",
+ (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
+ "SPILL_BLKPTR" : "");
(void) printf("\tdnode maxblkid: %llu\n",
(longlong_t)dn->dn_phys->dn_maxblkid);
- object_viewer[doi.doi_bonus_type](os, object, bonus, bsize);
- object_viewer[doi.doi_type](os, object, NULL, 0);
+ object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
+ bonus, bsize);
+ object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
*print_header = 1;
}
@@ -1145,6 +1537,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
}
for (;;) {
+ char segsize[32];
error = dnode_next_offset(dn,
0, &start, minlvl, blkfill, 0);
if (error)
@@ -1152,7 +1545,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
end = start;
error = dnode_next_offset(dn,
DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
- nicenum(end - start, segsize);
+ zdb_nicenum(end - start, segsize);
(void) printf("\t\tsegment [%016llx, %016llx)"
" size %5s\n", (u_longlong_t)start,
(u_longlong_t)end, segsize);
@@ -1175,7 +1568,7 @@ dump_dir(objset_t *os)
dmu_objset_stats_t dds;
uint64_t object, object_count;
uint64_t refdbytes, usedobjs, scratch;
- char numbuf[8];
+ char numbuf[32];
char blkbuf[BP_SPRINTF_LEN + 20];
char osname[MAXNAMELEN];
char *type = "UNKNOWN";
@@ -1190,21 +1583,20 @@ dump_dir(objset_t *os)
if (dds.dds_type == DMU_OST_META) {
dds.dds_creation_txg = TXG_INITIAL;
- usedobjs = os->os->os_rootbp->blk_fill;
- refdbytes = os->os->os_spa->spa_dsl_pool->
+ usedobjs = os->os_rootbp->blk_fill;
+ refdbytes = os->os_spa->spa_dsl_pool->
dp_mos_dir->dd_phys->dd_used_bytes;
} else {
dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
}
- ASSERT3U(usedobjs, ==, os->os->os_rootbp->blk_fill);
+ ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
- nicenum(refdbytes, numbuf);
+ zdb_nicenum(refdbytes, numbuf);
if (verbosity >= 4) {
- (void) sprintf(blkbuf + strlen(blkbuf), ", rootbp ");
- (void) sprintf_blkptr(blkbuf + strlen(blkbuf),
- BP_SPRINTF_LEN - strlen(blkbuf), os->os->os_rootbp);
+ (void) sprintf(blkbuf, ", rootbp ");
+ (void) sprintf_blkptr(blkbuf + strlen(blkbuf), os->os_rootbp);
} else {
blkbuf[0] = '\0';
}
@@ -1217,18 +1609,6 @@ dump_dir(objset_t *os)
(u_longlong_t)dds.dds_creation_txg,
numbuf, (u_longlong_t)usedobjs, blkbuf);
- dump_intent_log(dmu_objset_zil(os));
-
- if (dmu_objset_ds(os) != NULL)
- dump_bplist(dmu_objset_pool(os)->dp_meta_objset,
- dmu_objset_ds(os)->ds_phys->ds_deadlist_obj, "Deadlist");
-
- if (verbosity < 2)
- return;
-
- if (os->os->os_rootbp->blk_birth == 0)
- return;
-
if (zopt_objects != 0) {
for (i = 0; i < zopt_objects; i++)
dump_object(os, zopt_object[i], verbosity,
@@ -1237,10 +1617,22 @@ dump_dir(objset_t *os)
return;
}
+ if (dump_opt['i'] != 0 || verbosity >= 2)
+ dump_intent_log(dmu_objset_zil(os));
+
+ if (dmu_objset_ds(os) != NULL)
+ dump_deadlist(&dmu_objset_ds(os)->ds_deadlist);
+
+ if (verbosity < 2)
+ return;
+
+ if (os->os_rootbp->blk_birth == 0)
+ return;
+
dump_object(os, 0, verbosity, &print_header);
object_count = 0;
- if (os->os->os_userused_dnode &&
- os->os->os_userused_dnode->dn_type != 0) {
+ if (DMU_USERUSED_DNODE(os) != NULL &&
+ DMU_USERUSED_DNODE(os)->dn_type != 0) {
dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
}
@@ -1262,11 +1654,11 @@ dump_dir(objset_t *os)
}
static void
-dump_uberblock(uberblock_t *ub)
+dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
{
time_t timestamp = ub->ub_timestamp;
- (void) printf("Uberblock\n\n");
+ (void) printf(header ? header : "");
(void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
(void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
(void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
@@ -1275,25 +1667,34 @@ dump_uberblock(uberblock_t *ub)
(u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
if (dump_opt['u'] >= 3) {
char blkbuf[BP_SPRINTF_LEN];
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ub->ub_rootbp);
+ sprintf_blkptr(blkbuf, &ub->ub_rootbp);
(void) printf("\trootbp = %s\n", blkbuf);
}
- (void) printf("\n");
+ (void) printf(footer ? footer : "");
}
static void
-dump_config(const char *pool)
+dump_config(spa_t *spa)
{
- spa_t *spa = NULL;
+ dmu_buf_t *db;
+ size_t nvsize = 0;
+ int error = 0;
+
+
+ error = dmu_bonus_hold(spa->spa_meta_objset,
+ spa->spa_config_object, FTAG, &db);
- mutex_enter(&spa_namespace_lock);
- while ((spa = spa_next(spa)) != NULL) {
- if (pool == NULL)
- (void) printf("%s\n", spa_name(spa));
- if (pool == NULL || strcmp(pool, spa_name(spa)) == 0)
- dump_nvlist(spa->spa_config, 4);
+ if (error == 0) {
+ nvsize = *(uint64_t *)db->db_data;
+ dmu_buf_rele(db, FTAG);
+
+ (void) printf("\nMOS Configuration:\n");
+ dump_packed_nvlist(spa->spa_meta_objset,
+ spa->spa_config_object, (void *)&nvsize, 1);
+ } else {
+ (void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
+ (u_longlong_t)spa->spa_config_object, error);
}
- mutex_exit(&spa_namespace_lock);
}
static void
@@ -1342,41 +1743,75 @@ dump_cachefile(const char *cachefile)
nvlist_free(config);
}
+#define ZDB_MAX_UB_HEADER_SIZE 32
+
+static void
+dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
+{
+ vdev_t vd;
+ vdev_t *vdp = &vd;
+ char header[ZDB_MAX_UB_HEADER_SIZE];
+
+ vd.vdev_ashift = ashift;
+ vdp->vdev_top = vdp;
+
+ for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
+ uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i);
+ uberblock_t *ub = (void *)((char *)lbl + uoff);
+
+ if (uberblock_verify(ub))
+ continue;
+ (void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
+ "Uberblock[%d]\n", i);
+ dump_uberblock(ub, header, "");
+ }
+}
+
static void
dump_label(const char *dev)
{
int fd;
vdev_label_t label;
- char *buf = label.vl_vdev_phys.vp_nvlist;
+ char *path, *buf = label.vl_vdev_phys.vp_nvlist;
size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
struct stat64 statbuf;
- uint64_t psize;
- int l;
+ uint64_t psize, ashift;
+ int len = strlen(dev) + 1;
+
+ if (strncmp(dev, "/dev/dsk/", 9) == 0) {
+ len++;
+ path = malloc(len);
+ (void) snprintf(path, len, "%s%s", "/dev/rdsk/", dev + 9);
+ } else {
+ path = strdup(dev);
+ }
- if ((fd = open64(dev, O_RDONLY)) < 0) {
- (void) printf("cannot open '%s': %s\n", dev, strerror(errno));
+ if ((fd = open64(path, O_RDONLY)) < 0) {
+ (void) printf("cannot open '%s': %s\n", path, strerror(errno));
+ free(path);
exit(1);
}
if (fstat64(fd, &statbuf) != 0) {
- (void) printf("failed to stat '%s': %s\n", dev,
+ (void) printf("failed to stat '%s': %s\n", path,
strerror(errno));
+ free(path);
+ (void) close(fd);
exit(1);
}
- if (S_ISCHR(statbuf.st_mode)) {
- if (ioctl(fd, DIOCGMEDIASIZE, &statbuf.st_size) == -1) {
- (void) printf("failed to get size of '%s': %s\n", dev,
- strerror(errno));
- exit(1);
- }
+ if (S_ISBLK(statbuf.st_mode)) {
+ (void) printf("cannot use '%s': character device required\n",
+ path);
+ free(path);
+ (void) close(fd);
+ exit(1);
}
psize = statbuf.st_size;
psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
- for (l = 0; l < VDEV_LABELS; l++) {
-
+ for (int l = 0; l < VDEV_LABELS; l++) {
nvlist_t *config = NULL;
(void) printf("--------------------------------------------\n");
@@ -1391,105 +1826,47 @@ dump_label(const char *dev)
if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
(void) printf("failed to unpack label %d\n", l);
- continue;
+ ashift = SPA_MINBLOCKSHIFT;
+ } else {
+ nvlist_t *vdev_tree = NULL;
+
+ dump_nvlist(config, 4);
+ if ((nvlist_lookup_nvlist(config,
+ ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
+ (nvlist_lookup_uint64(vdev_tree,
+ ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
+ ashift = SPA_MINBLOCKSHIFT;
+ nvlist_free(config);
}
- dump_nvlist(config, 4);
- nvlist_free(config);
+ if (dump_opt['u'])
+ dump_label_uberblocks(&label, ashift);
}
+
+ free(path);
+ (void) close(fd);
}
/*ARGSUSED*/
static int
-dump_one_dir(char *dsname, void *arg)
+dump_one_dir(const char *dsname, void *arg)
{
int error;
objset_t *os;
- error = dmu_objset_open(dsname, DMU_OST_ANY,
- DS_MODE_USER | DS_MODE_READONLY, &os);
+ error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, FTAG, &os);
if (error) {
- (void) printf("Could not open %s\n", dsname);
+ (void) printf("Could not open %s, error %d\n", dsname, error);
return (0);
}
dump_dir(os);
- dmu_objset_close(os);
+ dmu_objset_disown(os, FTAG);
fuid_table_destroy();
+ sa_loaded = B_FALSE;
return (0);
}
-static void
-zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
-{
- vdev_t *vd = sm->sm_ppd;
-
- (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
- (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
-}
-
-/* ARGSUSED */
-static void
-zdb_space_map_load(space_map_t *sm)
-{
-}
-
-static void
-zdb_space_map_unload(space_map_t *sm)
-{
- space_map_vacate(sm, zdb_leak, sm);
-}
-
-/* ARGSUSED */
-static void
-zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
-{
-}
-
-static space_map_ops_t zdb_space_map_ops = {
- zdb_space_map_load,
- zdb_space_map_unload,
- NULL, /* alloc */
- zdb_space_map_claim,
- NULL, /* free */
- NULL /* maxsize */
-};
-
-static void
-zdb_leak_init(spa_t *spa)
-{
- vdev_t *rvd = spa->spa_root_vdev;
-
- for (int c = 0; c < rvd->vdev_children; c++) {
- vdev_t *vd = rvd->vdev_child[c];
- for (int m = 0; m < vd->vdev_ms_count; m++) {
- metaslab_t *msp = vd->vdev_ms[m];
- mutex_enter(&msp->ms_lock);
- VERIFY(space_map_load(&msp->ms_map, &zdb_space_map_ops,
- SM_ALLOC, &msp->ms_smo, spa->spa_meta_objset) == 0);
- msp->ms_map.sm_ppd = vd;
- mutex_exit(&msp->ms_lock);
- }
- }
-}
-
-static void
-zdb_leak_fini(spa_t *spa)
-{
- vdev_t *rvd = spa->spa_root_vdev;
-
- for (int c = 0; c < rvd->vdev_children; c++) {
- vdev_t *vd = rvd->vdev_child[c];
- for (int m = 0; m < vd->vdev_ms_count; m++) {
- metaslab_t *msp = vd->vdev_ms[m];
- mutex_enter(&msp->ms_lock);
- space_map_unload(&msp->ms_map);
- mutex_exit(&msp->ms_lock);
- }
- }
-}
-
/*
- * Verify that the sum of the sizes of all blocks in the pool adds up
- * to the SPA's sa_alloc total.
+ * Block statistics.
*/
typedef struct zdb_blkstats {
uint64_t zb_asize;
@@ -1498,24 +1875,45 @@ typedef struct zdb_blkstats {
uint64_t zb_count;
} zdb_blkstats_t;
-#define DMU_OT_DEFERRED DMU_OT_NONE
-#define DMU_OT_TOTAL DMU_OT_NUMTYPES
+/*
+ * Extended object types to report deferred frees and dedup auto-ditto blocks.
+ */
+#define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0)
+#define ZDB_OT_DITTO (DMU_OT_NUMTYPES + 1)
+#define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 2)
+
+static char *zdb_ot_extname[] = {
+ "deferred free",
+ "dedup ditto",
+ "Total",
+};
#define ZB_TOTAL DN_MAX_LEVELS
typedef struct zdb_cb {
- zdb_blkstats_t zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1];
+ zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
+ uint64_t zcb_dedup_asize;
+ uint64_t zcb_dedup_blocks;
uint64_t zcb_errors[256];
int zcb_readfails;
int zcb_haderrors;
+ spa_t *zcb_spa;
} zdb_cb_t;
static void
-zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, dmu_object_type_t type)
+zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
+ dmu_object_type_t type)
{
+ uint64_t refcnt = 0;
+
+ ASSERT(type < ZDB_OT_TOTAL);
+
+ if (zilog && zil_bp_tree_add(zilog, bp) != 0)
+ return;
+
for (int i = 0; i < 4; i++) {
int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
- int t = (i & 1) ? type : DMU_OT_TOTAL;
+ int t = (i & 1) ? type : ZDB_OT_TOTAL;
zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
zb->zb_asize += BP_GET_ASIZE(bp);
@@ -1524,127 +1922,258 @@ zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, dmu_object_type_t type)
zb->zb_count++;
}
- if (dump_opt['S']) {
- boolean_t print_sig;
-
- print_sig = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 &&
- BP_GET_TYPE(bp) == DMU_OT_PLAIN_FILE_CONTENTS);
-
- if (BP_GET_CHECKSUM(bp) < zdb_sig_cksumalg)
- print_sig = B_FALSE;
-
- if (print_sig) {
- (void) printf("%llu\t%lld\t%lld\t%s\t%s\t%s\t"
- "%llx:%llx:%llx:%llx\n",
- (u_longlong_t)BP_GET_LEVEL(bp),
- (longlong_t)BP_GET_PSIZE(bp),
- (longlong_t)BP_GET_NDVAS(bp),
- dmu_ot[BP_GET_TYPE(bp)].ot_name,
- zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name,
- zio_compress_table[BP_GET_COMPRESS(bp)].ci_name,
- (u_longlong_t)bp->blk_cksum.zc_word[0],
- (u_longlong_t)bp->blk_cksum.zc_word[1],
- (u_longlong_t)bp->blk_cksum.zc_word[2],
- (u_longlong_t)bp->blk_cksum.zc_word[3]);
+ if (dump_opt['L'])
+ return;
+
+ if (BP_GET_DEDUP(bp)) {
+ ddt_t *ddt;
+ ddt_entry_t *dde;
+
+ ddt = ddt_select(zcb->zcb_spa, bp);
+ ddt_enter(ddt);
+ dde = ddt_lookup(ddt, bp, B_FALSE);
+
+ if (dde == NULL) {
+ refcnt = 0;
+ } else {
+ ddt_phys_t *ddp = ddt_phys_select(dde, bp);
+ ddt_phys_decref(ddp);
+ refcnt = ddp->ddp_refcnt;
+ if (ddt_phys_total_refcnt(dde) == 0)
+ ddt_remove(ddt, dde);
}
+ ddt_exit(ddt);
}
- if (!dump_opt['L'])
- VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp,
- NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0);
+ VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
+ refcnt ? 0 : spa_first_txg(zcb->zcb_spa),
+ bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
}
+/* ARGSUSED */
static int
-zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
- const dnode_phys_t *dnp, void *arg)
+zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
+ const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
{
zdb_cb_t *zcb = arg;
char blkbuf[BP_SPRINTF_LEN];
dmu_object_type_t type;
- boolean_t is_l0_metadata;
+ boolean_t is_metadata;
if (bp == NULL)
return (0);
type = BP_GET_TYPE(bp);
- zdb_count_block(spa, zcb, bp, type);
+ zdb_count_block(zcb, zilog, bp, type);
- /*
- * if we do metadata-only checksumming there's no need to checksum
- * indirect blocks here because it is done during traverse
- */
- is_l0_metadata = (BP_GET_LEVEL(bp) == 0 && type < DMU_OT_NUMTYPES &&
- dmu_ot[type].ot_metadata);
+ is_metadata = (BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata);
+
+ if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
+ int ioerr;
+ size_t size = BP_GET_PSIZE(bp);
+ void *data = malloc(size);
+ int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
- if (dump_opt['c'] > 1 || dump_opt['S'] ||
- (dump_opt['c'] && is_l0_metadata)) {
- int ioerr, size;
- void *data;
+ /* If it's an intent log block, failure is expected. */
+ if (zb->zb_level == ZB_ZIL_LEVEL)
+ flags |= ZIO_FLAG_SPECULATIVE;
- size = BP_GET_LSIZE(bp);
- data = malloc(size);
ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
- NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB, zb));
+ NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
+
free(data);
- /* We expect io errors on intent log */
- if (ioerr && type != DMU_OT_INTENT_LOG) {
+ if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
zcb->zcb_haderrors = 1;
zcb->zcb_errors[ioerr]++;
if (dump_opt['b'] >= 2)
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
+ sprintf_blkptr(blkbuf, bp);
else
blkbuf[0] = '\0';
- if (!dump_opt['S']) {
- (void) printf("zdb_blkptr_cb: "
- "Got error %d reading "
- "<%llu, %llu, %lld, %llx> %s -- skipping\n",
- ioerr,
- (u_longlong_t)zb->zb_objset,
- (u_longlong_t)zb->zb_object,
- (u_longlong_t)zb->zb_level,
- (u_longlong_t)zb->zb_blkid,
- blkbuf);
- }
+ (void) printf("zdb_blkptr_cb: "
+ "Got error %d reading "
+ "<%llu, %llu, %lld, %llx> %s -- skipping\n",
+ ioerr,
+ (u_longlong_t)zb->zb_objset,
+ (u_longlong_t)zb->zb_object,
+ (u_longlong_t)zb->zb_level,
+ (u_longlong_t)zb->zb_blkid,
+ blkbuf);
}
}
zcb->zcb_readfails = 0;
if (dump_opt['b'] >= 4) {
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
- (void) printf("objset %llu object %llu offset 0x%llx %s\n",
+ sprintf_blkptr(blkbuf, bp);
+ (void) printf("objset %llu object %llu "
+ "level %lld offset 0x%llx %s\n",
(u_longlong_t)zb->zb_objset,
(u_longlong_t)zb->zb_object,
- (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid),
+ (longlong_t)zb->zb_level,
+ (u_longlong_t)blkid2offset(dnp, bp, zb),
blkbuf);
}
return (0);
}
+static void
+zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
+{
+ vdev_t *vd = sm->sm_ppd;
+
+ (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
+ (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
+}
+
+/* ARGSUSED */
+static void
+zdb_space_map_load(space_map_t *sm)
+{
+}
+
+static void
+zdb_space_map_unload(space_map_t *sm)
+{
+ space_map_vacate(sm, zdb_leak, sm);
+}
+
+/* ARGSUSED */
+static void
+zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
+{
+}
+
+static space_map_ops_t zdb_space_map_ops = {
+ zdb_space_map_load,
+ zdb_space_map_unload,
+ NULL, /* alloc */
+ zdb_space_map_claim,
+ NULL, /* free */
+ NULL /* maxsize */
+};
+
+static void
+zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
+{
+ ddt_bookmark_t ddb = { 0 };
+ ddt_entry_t dde;
+ int error;
+
+ while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
+ blkptr_t blk;
+ ddt_phys_t *ddp = dde.dde_phys;
+
+ if (ddb.ddb_class == DDT_CLASS_UNIQUE)
+ return;
+
+ ASSERT(ddt_phys_total_refcnt(&dde) > 1);
+
+ for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
+ if (ddp->ddp_phys_birth == 0)
+ continue;
+ ddt_bp_create(ddb.ddb_checksum,
+ &dde.dde_key, ddp, &blk);
+ if (p == DDT_PHYS_DITTO) {
+ zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
+ } else {
+ zcb->zcb_dedup_asize +=
+ BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
+ zcb->zcb_dedup_blocks++;
+ }
+ }
+ if (!dump_opt['L']) {
+ ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
+ ddt_enter(ddt);
+ VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
+ ddt_exit(ddt);
+ }
+ }
+
+ ASSERT(error == ENOENT);
+}
+
+static void
+zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
+{
+ zcb->zcb_spa = spa;
+
+ if (!dump_opt['L']) {
+ vdev_t *rvd = spa->spa_root_vdev;
+ for (int c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *vd = rvd->vdev_child[c];
+ for (int m = 0; m < vd->vdev_ms_count; m++) {
+ metaslab_t *msp = vd->vdev_ms[m];
+ mutex_enter(&msp->ms_lock);
+ space_map_unload(&msp->ms_map);
+ VERIFY(space_map_load(&msp->ms_map,
+ &zdb_space_map_ops, SM_ALLOC, &msp->ms_smo,
+ spa->spa_meta_objset) == 0);
+ msp->ms_map.sm_ppd = vd;
+ mutex_exit(&msp->ms_lock);
+ }
+ }
+ }
+
+ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+
+ zdb_ddt_leak_init(spa, zcb);
+
+ spa_config_exit(spa, SCL_CONFIG, FTAG);
+}
+
+static void
+zdb_leak_fini(spa_t *spa)
+{
+ if (!dump_opt['L']) {
+ vdev_t *rvd = spa->spa_root_vdev;
+ for (int c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *vd = rvd->vdev_child[c];
+ for (int m = 0; m < vd->vdev_ms_count; m++) {
+ metaslab_t *msp = vd->vdev_ms[m];
+ mutex_enter(&msp->ms_lock);
+ space_map_unload(&msp->ms_map);
+ mutex_exit(&msp->ms_lock);
+ }
+ }
+ }
+}
+
+/* ARGSUSED */
+static int
+count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+{
+ zdb_cb_t *zcb = arg;
+
+ if (dump_opt['b'] >= 4) {
+ char blkbuf[BP_SPRINTF_LEN];
+ sprintf_blkptr(blkbuf, bp);
+ (void) printf("[%s] %s\n",
+ "deferred free", blkbuf);
+ }
+ zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
+ return (0);
+}
+
static int
dump_block_stats(spa_t *spa)
{
zdb_cb_t zcb = { 0 };
zdb_blkstats_t *zb, *tzb;
- uint64_t alloc, space, logalloc;
- vdev_t *rvd = spa->spa_root_vdev;
+ uint64_t norm_alloc, norm_space, total_alloc, total_found;
+ int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
int leaks = 0;
- int c, e;
- if (!dump_opt['S']) {
- (void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
- (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
- (dump_opt['c'] == 1) ? "metadata " : "",
- dump_opt['c'] ? "checksums " : "",
- (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
- !dump_opt['L'] ? "nothing leaked " : "");
- }
+ (void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
+ (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
+ (dump_opt['c'] == 1) ? "metadata " : "",
+ dump_opt['c'] ? "checksums " : "",
+ (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
+ !dump_opt['L'] ? "nothing leaked " : "");
/*
* Load all space maps as SM_ALLOC maps, then traverse the pool
@@ -1654,39 +2183,25 @@ dump_block_stats(spa_t *spa)
* it's not part of any space map) is a double allocation,
* reference to a freed block, or an unclaimed log block.
*/
- if (!dump_opt['L'])
- zdb_leak_init(spa);
+ zdb_leak_init(spa, &zcb);
/*
* If there's a deferred-free bplist, process that first.
*/
- if (spa->spa_sync_bplist_obj != 0) {
- bplist_t *bpl = &spa->spa_sync_bplist;
- blkptr_t blk;
- uint64_t itor = 0;
+ (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
+ count_block_cb, &zcb, NULL);
+ (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
+ count_block_cb, &zcb, NULL);
- VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset,
- spa->spa_sync_bplist_obj));
+ if (dump_opt['c'] > 1)
+ flags |= TRAVERSE_PREFETCH_DATA;
- while (bplist_iterate(bpl, &itor, &blk) == 0) {
- if (dump_opt['b'] >= 4) {
- char blkbuf[BP_SPRINTF_LEN];
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &blk);
- (void) printf("[%s] %s\n",
- "deferred free", blkbuf);
- }
- zdb_count_block(spa, &zcb, &blk, DMU_OT_DEFERRED);
- }
-
- bplist_close(bpl);
- }
-
- zcb.zcb_haderrors |= traverse_pool(spa, zdb_blkptr_cb, &zcb);
+ zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
- if (zcb.zcb_haderrors && !dump_opt['S']) {
+ if (zcb.zcb_haderrors) {
(void) printf("\nError counts:\n\n");
(void) printf("\t%5s %s\n", "errno", "count");
- for (e = 0; e < 256; e++) {
+ for (int e = 0; e < 256; e++) {
if (zcb.zcb_errors[e] != 0) {
(void) printf("\t%5d %llu\n",
e, (u_longlong_t)zcb.zcb_errors[e]);
@@ -1697,43 +2212,27 @@ dump_block_stats(spa_t *spa)
/*
* Report any leaked segments.
*/
- if (!dump_opt['L'])
- zdb_leak_fini(spa);
+ zdb_leak_fini(spa);
- /*
- * If we're interested in printing out the blkptr signatures,
- * return now as we don't print out anything else (including
- * errors and leaks).
- */
- if (dump_opt['S'])
- return (zcb.zcb_haderrors ? 3 : 0);
-
- alloc = spa_get_alloc(spa);
- space = spa_get_space(spa);
-
- /*
- * Log blocks allocated from a separate log device don't count
- * as part of the normal pool space; factor them in here.
- */
- logalloc = 0;
+ tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
- for (c = 0; c < rvd->vdev_children; c++)
- if (rvd->vdev_child[c]->vdev_islog)
- logalloc += rvd->vdev_child[c]->vdev_stat.vs_alloc;
+ norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
+ norm_space = metaslab_class_get_space(spa_normal_class(spa));
- tzb = &zcb.zcb_type[ZB_TOTAL][DMU_OT_TOTAL];
+ total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
+ total_found = tzb->zb_asize - zcb.zcb_dedup_asize;
- if (tzb->zb_asize == alloc + logalloc) {
+ if (total_found == total_alloc) {
if (!dump_opt['L'])
(void) printf("\n\tNo leaks (block sum matches space"
" maps exactly)\n");
} else {
(void) printf("block traversal size %llu != alloc %llu "
"(%s %lld)\n",
- (u_longlong_t)tzb->zb_asize,
- (u_longlong_t)alloc + logalloc,
+ (u_longlong_t)total_found,
+ (u_longlong_t)total_alloc,
(dump_opt['L']) ? "unreachable" : "leaked",
- (longlong_t)(alloc + logalloc - tzb->zb_asize));
+ (longlong_t)(total_alloc - total_found));
leaks = 1;
}
@@ -1743,33 +2242,41 @@ dump_block_stats(spa_t *spa)
(void) printf("\n");
(void) printf("\tbp count: %10llu\n",
(u_longlong_t)tzb->zb_count);
- (void) printf("\tbp logical: %10llu\t avg: %6llu\n",
+ (void) printf("\tbp logical: %10llu avg: %6llu\n",
(u_longlong_t)tzb->zb_lsize,
(u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
- (void) printf("\tbp physical: %10llu\t avg:"
- " %6llu\tcompression: %6.2f\n",
+ (void) printf("\tbp physical: %10llu avg:"
+ " %6llu compression: %6.2f\n",
(u_longlong_t)tzb->zb_psize,
(u_longlong_t)(tzb->zb_psize / tzb->zb_count),
(double)tzb->zb_lsize / tzb->zb_psize);
- (void) printf("\tbp allocated: %10llu\t avg:"
- " %6llu\tcompression: %6.2f\n",
+ (void) printf("\tbp allocated: %10llu avg:"
+ " %6llu compression: %6.2f\n",
(u_longlong_t)tzb->zb_asize,
(u_longlong_t)(tzb->zb_asize / tzb->zb_count),
(double)tzb->zb_lsize / tzb->zb_asize);
- (void) printf("\tSPA allocated: %10llu\tused: %5.2f%%\n",
- (u_longlong_t)alloc, 100.0 * alloc / space);
+ (void) printf("\tbp deduped: %10llu ref>1:"
+ " %6llu deduplication: %6.2f\n",
+ (u_longlong_t)zcb.zcb_dedup_asize,
+ (u_longlong_t)zcb.zcb_dedup_blocks,
+ (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
+ (void) printf("\tSPA allocated: %10llu used: %5.2f%%\n",
+ (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
if (dump_opt['b'] >= 2) {
int l, t, level;
(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
"\t avg\t comp\t%%Total\tType\n");
- for (t = 0; t <= DMU_OT_NUMTYPES; t++) {
- char csize[6], lsize[6], psize[6], asize[6], avg[6];
+ for (t = 0; t <= ZDB_OT_TOTAL; t++) {
+ char csize[32], lsize[32], psize[32], asize[32];
+ char avg[32];
char *typename;
- typename = t == DMU_OT_DEFERRED ? "deferred free" :
- t == DMU_OT_TOTAL ? "Total" : dmu_ot[t].ot_name;
+ if (t < DMU_OT_NUMTYPES)
+ typename = dmu_ot[t].ot_name;
+ else
+ typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
(void) printf("%6s\t%5s\t%5s\t%5s"
@@ -1799,11 +2306,11 @@ dump_block_stats(spa_t *spa)
zcb.zcb_type[ZB_TOTAL][t].zb_asize)
continue;
- nicenum(zb->zb_count, csize);
- nicenum(zb->zb_lsize, lsize);
- nicenum(zb->zb_psize, psize);
- nicenum(zb->zb_asize, asize);
- nicenum(zb->zb_asize / zb->zb_count, avg);
+ zdb_nicenum(zb->zb_count, csize);
+ zdb_nicenum(zb->zb_lsize, lsize);
+ zdb_nicenum(zb->zb_psize, psize);
+ zdb_nicenum(zb->zb_asize, asize);
+ zdb_nicenum(zb->zb_asize / zb->zb_count, avg);
(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
"\t%5.2f\t%6.2f\t",
@@ -1831,36 +2338,157 @@ dump_block_stats(spa_t *spa)
return (0);
}
+typedef struct zdb_ddt_entry {
+ ddt_key_t zdde_key;
+ uint64_t zdde_ref_blocks;
+ uint64_t zdde_ref_lsize;
+ uint64_t zdde_ref_psize;
+ uint64_t zdde_ref_dsize;
+ avl_node_t zdde_node;
+} zdb_ddt_entry_t;
+
+/* ARGSUSED */
+static int
+zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
+ arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
+{
+ avl_tree_t *t = arg;
+ avl_index_t where;
+ zdb_ddt_entry_t *zdde, zdde_search;
+
+ if (bp == NULL)
+ return (0);
+
+ if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
+ (void) printf("traversing objset %llu, %llu objects, "
+ "%lu blocks so far\n",
+ (u_longlong_t)zb->zb_objset,
+ (u_longlong_t)bp->blk_fill,
+ avl_numnodes(t));
+ }
+
+ if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
+ BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata)
+ return (0);
+
+ ddt_key_fill(&zdde_search.zdde_key, bp);
+
+ zdde = avl_find(t, &zdde_search, &where);
+
+ if (zdde == NULL) {
+ zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
+ zdde->zdde_key = zdde_search.zdde_key;
+ avl_insert(t, zdde, where);
+ }
+
+ zdde->zdde_ref_blocks += 1;
+ zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
+ zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
+ zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
+
+ return (0);
+}
+
+static void
+dump_simulated_ddt(spa_t *spa)
+{
+ avl_tree_t t;
+ void *cookie = NULL;
+ zdb_ddt_entry_t *zdde;
+ ddt_histogram_t ddh_total = { 0 };
+ ddt_stat_t dds_total = { 0 };
+
+ avl_create(&t, ddt_entry_compare,
+ sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
+
+ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+
+ (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
+ zdb_ddt_add_cb, &t);
+
+ spa_config_exit(spa, SCL_CONFIG, FTAG);
+
+ while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
+ ddt_stat_t dds;
+ uint64_t refcnt = zdde->zdde_ref_blocks;
+ ASSERT(refcnt != 0);
+
+ dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
+ dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
+ dds.dds_psize = zdde->zdde_ref_psize / refcnt;
+ dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
+
+ dds.dds_ref_blocks = zdde->zdde_ref_blocks;
+ dds.dds_ref_lsize = zdde->zdde_ref_lsize;
+ dds.dds_ref_psize = zdde->zdde_ref_psize;
+ dds.dds_ref_dsize = zdde->zdde_ref_dsize;
+
+ ddt_stat_add(&ddh_total.ddh_stat[highbit(refcnt) - 1], &dds, 0);
+
+ umem_free(zdde, sizeof (*zdde));
+ }
+
+ avl_destroy(&t);
+
+ ddt_histogram_stat(&dds_total, &ddh_total);
+
+ (void) printf("Simulated DDT histogram:\n");
+
+ zpool_dump_ddt(&dds_total, &ddh_total);
+
+ dump_dedup_ratio(&dds_total);
+}
+
static void
dump_zpool(spa_t *spa)
{
dsl_pool_t *dp = spa_get_dsl(spa);
int rc = 0;
+ if (dump_opt['S']) {
+ dump_simulated_ddt(spa);
+ return;
+ }
+
+ if (!dump_opt['e'] && dump_opt['C'] > 1) {
+ (void) printf("\nCached configuration:\n");
+ dump_nvlist(spa->spa_config, 8);
+ }
+
+ if (dump_opt['C'])
+ dump_config(spa);
+
if (dump_opt['u'])
- dump_uberblock(&spa->spa_uberblock);
+ dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
- if (dump_opt['d'] || dump_opt['i'] || dump_opt['m']) {
+ if (dump_opt['D'])
+ dump_all_ddts(spa);
+
+ if (dump_opt['d'] > 2 || dump_opt['m'])
+ dump_metaslabs(spa);
+
+ if (dump_opt['d'] || dump_opt['i']) {
dump_dir(dp->dp_meta_objset);
if (dump_opt['d'] >= 3) {
- dump_bplist(dp->dp_meta_objset,
- spa->spa_sync_bplist_obj, "Deferred frees");
+ dump_bpobj(&spa->spa_deferred_bpobj, "Deferred frees");
+ if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
+ dump_bpobj(&spa->spa_dsl_pool->dp_free_bpobj,
+ "Pool frees");
+ }
dump_dtl(spa->spa_root_vdev, 0);
}
-
- if (dump_opt['d'] >= 3 || dump_opt['m'])
- dump_metaslabs(spa);
-
- (void) dmu_objset_find(spa_name(spa), dump_one_dir, NULL,
- DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
+ (void) dmu_objset_find(spa_name(spa), dump_one_dir,
+ NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
}
-
- if (dump_opt['b'] || dump_opt['c'] || dump_opt['S'])
+ if (dump_opt['b'] || dump_opt['c'])
rc = dump_block_stats(spa);
if (dump_opt['s'])
show_pool_stats(spa);
+ if (dump_opt['h'])
+ dump_history(spa);
+
if (rc != 0)
exit(rc);
}
@@ -1879,51 +2507,13 @@ int flagbits[256];
static void
zdb_print_blkptr(blkptr_t *bp, int flags)
{
- dva_t *dva = bp->blk_dva;
- int d;
+ char blkbuf[BP_SPRINTF_LEN];
if (flags & ZDB_FLAG_BSWAP)
byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
- /*
- * Super-ick warning: This code is also duplicated in
- * cmd/mdb/common/modules/zfs/zfs.c . Yeah, I hate code
- * replication, too.
- */
- for (d = 0; d < BP_GET_NDVAS(bp); d++) {
- (void) printf("\tDVA[%d]: vdev_id %lld / %llx\n", d,
- (longlong_t)DVA_GET_VDEV(&dva[d]),
- (longlong_t)DVA_GET_OFFSET(&dva[d]));
- (void) printf("\tDVA[%d]: GANG: %-5s GRID: %04llx\t"
- "ASIZE: %llx\n", d,
- DVA_GET_GANG(&dva[d]) ? "TRUE" : "FALSE",
- (longlong_t)DVA_GET_GRID(&dva[d]),
- (longlong_t)DVA_GET_ASIZE(&dva[d]));
- (void) printf("\tDVA[%d]: :%llu:%llx:%llx:%s%s%s%s\n", d,
- (u_longlong_t)DVA_GET_VDEV(&dva[d]),
- (longlong_t)DVA_GET_OFFSET(&dva[d]),
- (longlong_t)BP_GET_PSIZE(bp),
- BP_SHOULD_BYTESWAP(bp) ? "e" : "",
- !DVA_GET_GANG(&dva[d]) && BP_GET_LEVEL(bp) != 0 ?
- "d" : "",
- DVA_GET_GANG(&dva[d]) ? "g" : "",
- BP_GET_COMPRESS(bp) != 0 ? "d" : "");
- }
- (void) printf("\tLSIZE: %-16llx\t\tPSIZE: %llx\n",
- (longlong_t)BP_GET_LSIZE(bp), (longlong_t)BP_GET_PSIZE(bp));
- (void) printf("\tENDIAN: %6s\t\t\t\t\tTYPE: %s\n",
- BP_GET_BYTEORDER(bp) ? "LITTLE" : "BIG",
- dmu_ot[BP_GET_TYPE(bp)].ot_name);
- (void) printf("\tBIRTH: %-16llx LEVEL: %-2llu\tFILL: %llx\n",
- (u_longlong_t)bp->blk_birth, (u_longlong_t)BP_GET_LEVEL(bp),
- (u_longlong_t)bp->blk_fill);
- (void) printf("\tCKFUNC: %-16s\t\tCOMP: %s\n",
- zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name,
- zio_compress_table[BP_GET_COMPRESS(bp)].ci_name);
- (void) printf("\tCKSUM: %llx:%llx:%llx:%llx\n",
- (u_longlong_t)bp->blk_cksum.zc_word[0],
- (u_longlong_t)bp->blk_cksum.zc_word[1],
- (u_longlong_t)bp->blk_cksum.zc_word[2],
- (u_longlong_t)bp->blk_cksum.zc_word[3]);
+
+ sprintf_blkptr(blkbuf, bp);
+ (void) printf("%s\n", blkbuf);
}
static void
@@ -1946,7 +2536,7 @@ zdb_dump_block_raw(void *buf, uint64_t size, int flags)
{
if (flags & ZDB_FLAG_BSWAP)
byteswap_uint64_array(buf, size);
- (void) write(2, buf, size);
+ (void) write(1, buf, size);
}
static void
@@ -2049,31 +2639,30 @@ name:
* flags - A string of characters specifying options
* b: Decode a blkptr at given offset within block
* *c: Calculate and display checksums
- * *d: Decompress data before dumping
+ * d: Decompress data before dumping
* e: Byteswap data before dumping
- * *g: Display data as a gang block header
- * *i: Display as an indirect block
+ * g: Display data as a gang block header
+ * i: Display as an indirect block
* p: Do I/O to physical offset
* r: Dump raw data to stdout
*
* * = not yet implemented
*/
static void
-zdb_read_block(char *thing, spa_t **spap)
+zdb_read_block(char *thing, spa_t *spa)
{
- spa_t *spa = *spap;
+ blkptr_t blk, *bp = &blk;
+ dva_t *dva = bp->blk_dva;
int flags = 0;
- uint64_t offset = 0, size = 0, blkptr_offset = 0;
+ uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
zio_t *zio;
vdev_t *vd;
- void *buf;
- char *s, *p, *dup, *pool, *vdev, *flagstr;
- int i, error, zio_flags;
+ void *pbuf, *lbuf, *buf;
+ char *s, *p, *dup, *vdev, *flagstr;
+ int i, error;
dup = strdup(thing);
s = strtok(dup, ":");
- pool = s ? s : "";
- s = strtok(NULL, ":");
vdev = s ? s : "";
s = strtok(NULL, ":");
offset = strtoull(s ? s : "", NULL, 16);
@@ -2107,7 +2696,7 @@ zdb_read_block(char *thing, spa_t **spap)
flags |= bit;
/* If it's not something with an argument, keep going */
- if ((bit & (ZDB_FLAG_CHECKSUM | ZDB_FLAG_DECOMPRESS |
+ if ((bit & (ZDB_FLAG_CHECKSUM |
ZDB_FLAG_PRINT_BLKPTR)) == 0)
continue;
@@ -2122,16 +2711,6 @@ zdb_read_block(char *thing, spa_t **spap)
}
}
- if (spa == NULL || strcmp(spa_name(spa), pool) != 0) {
- if (spa)
- spa_close(spa, (void *)zdb_read_block);
- error = spa_open(pool, spap, (void *)zdb_read_block);
- if (error)
- fatal("Failed to open pool '%s': %s",
- pool, strerror(error));
- spa = *spap;
- }
-
vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
if (vd == NULL) {
(void) printf("***Invalid vdev: %s\n", vdev);
@@ -2139,22 +2718,58 @@ zdb_read_block(char *thing, spa_t **spap)
return;
} else {
if (vd->vdev_path)
- (void) printf("Found vdev: %s\n", vd->vdev_path);
+ (void) fprintf(stderr, "Found vdev: %s\n",
+ vd->vdev_path);
else
- (void) printf("Found vdev type: %s\n",
+ (void) fprintf(stderr, "Found vdev type: %s\n",
vd->vdev_ops->vdev_op_type);
}
- buf = umem_alloc(size, UMEM_NOFAIL);
+ psize = size;
+ lsize = size;
+
+ pbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
+ lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
- zio_flags = ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
- ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY;
+ BP_ZERO(bp);
+
+ DVA_SET_VDEV(&dva[0], vd->vdev_id);
+ DVA_SET_OFFSET(&dva[0], offset);
+ DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
+ DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
+
+ BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
+
+ BP_SET_LSIZE(bp, lsize);
+ BP_SET_PSIZE(bp, psize);
+ BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
+ BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
+ BP_SET_TYPE(bp, DMU_OT_NONE);
+ BP_SET_LEVEL(bp, 0);
+ BP_SET_DEDUP(bp, 0);
+ BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
zio = zio_root(spa, NULL, NULL, 0);
- /* XXX todo - cons up a BP so RAID-Z will be happy */
- zio_nowait(zio_vdev_child_io(zio, NULL, vd, offset, buf, size,
- ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, zio_flags, NULL, NULL));
+
+ if (vd == vd->vdev_top) {
+ /*
+ * Treat this as a normal block read.
+ */
+ zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
+ ZIO_PRIORITY_SYNC_READ,
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
+ } else {
+ /*
+ * Treat this as a vdev child I/O.
+ */
+ zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
+ ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
+ ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
+ ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
+ }
+
error = zio_wait(zio);
spa_config_exit(spa, SCL_STATE, FTAG);
@@ -2163,6 +2778,52 @@ zdb_read_block(char *thing, spa_t **spap)
goto out;
}
+ if (flags & ZDB_FLAG_DECOMPRESS) {
+ /*
+ * We don't know how the data was compressed, so just try
+ * every decompress function at every inflated blocksize.
+ */
+ enum zio_compress c;
+ void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
+ void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
+
+ bcopy(pbuf, pbuf2, psize);
+
+ VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize,
+ SPA_MAXBLOCKSIZE - psize) == 0);
+
+ VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
+ SPA_MAXBLOCKSIZE - psize) == 0);
+
+ for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
+ lsize -= SPA_MINBLOCKSIZE) {
+ for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
+ if (zio_decompress_data(c, pbuf, lbuf,
+ psize, lsize) == 0 &&
+ zio_decompress_data(c, pbuf2, lbuf2,
+ psize, lsize) == 0 &&
+ bcmp(lbuf, lbuf2, lsize) == 0)
+ break;
+ }
+ if (c != ZIO_COMPRESS_FUNCTIONS)
+ break;
+ lsize -= SPA_MINBLOCKSIZE;
+ }
+
+ umem_free(pbuf2, SPA_MAXBLOCKSIZE);
+ umem_free(lbuf2, SPA_MAXBLOCKSIZE);
+
+ if (lsize <= psize) {
+ (void) printf("Decompress of %s failed\n", thing);
+ goto out;
+ }
+ buf = lbuf;
+ size = lsize;
+ } else {
+ buf = pbuf;
+ size = psize;
+ }
+
if (flags & ZDB_FLAG_PRINT_BLKPTR)
zdb_print_blkptr((blkptr_t *)(void *)
((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
@@ -2177,134 +2838,92 @@ zdb_read_block(char *thing, spa_t **spap)
zdb_dump_block(thing, buf, size, flags);
out:
- umem_free(buf, size);
+ umem_free(pbuf, SPA_MAXBLOCKSIZE);
+ umem_free(lbuf, SPA_MAXBLOCKSIZE);
free(dup);
}
static boolean_t
-nvlist_string_match(nvlist_t *config, char *name, char *tgt)
+pool_match(nvlist_t *cfg, char *tgt)
{
+ uint64_t v, guid = strtoull(tgt, NULL, 0);
char *s;
- if (nvlist_lookup_string(config, name, &s) != 0)
- return (B_FALSE);
-
- return (strcmp(s, tgt) == 0);
-}
-
-static boolean_t
-nvlist_uint64_match(nvlist_t *config, char *name, uint64_t tgt)
-{
- uint64_t val;
-
- if (nvlist_lookup_uint64(config, name, &val) != 0)
- return (B_FALSE);
-
- return (val == tgt);
-}
-
-static boolean_t
-vdev_child_guid_match(nvlist_t *vdev, uint64_t guid)
-{
- nvlist_t **child;
- uint_t c, children;
-
- verify(nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN,
- &child, &children) == 0);
- for (c = 0; c < children; ++c)
- if (nvlist_uint64_match(child[c], ZPOOL_CONFIG_GUID, guid))
- return (B_TRUE);
- return (B_FALSE);
-}
-
-static boolean_t
-vdev_child_string_match(nvlist_t *vdev, char *tgt)
-{
- nvlist_t **child;
- uint_t c, children;
-
- verify(nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN,
- &child, &children) == 0);
- for (c = 0; c < children; ++c) {
- if (nvlist_string_match(child[c], ZPOOL_CONFIG_PATH, tgt) ||
- nvlist_string_match(child[c], ZPOOL_CONFIG_DEVID, tgt))
- return (B_TRUE);
- }
- return (B_FALSE);
-}
-
-static boolean_t
-vdev_guid_match(nvlist_t *config, uint64_t guid)
-{
- nvlist_t *nvroot;
-
- verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
-
- return (nvlist_uint64_match(nvroot, ZPOOL_CONFIG_GUID, guid) ||
- vdev_child_guid_match(nvroot, guid));
-}
-
-static boolean_t
-vdev_string_match(nvlist_t *config, char *tgt)
-{
- nvlist_t *nvroot;
-
- verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
-
- return (vdev_child_string_match(nvroot, tgt));
-}
-
-static boolean_t
-pool_match(nvlist_t *config, char *tgt)
-{
- uint64_t guid = strtoull(tgt, NULL, 0);
-
if (guid != 0) {
- return (
- nvlist_uint64_match(config, ZPOOL_CONFIG_POOL_GUID, guid) ||
- vdev_guid_match(config, guid));
+ if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
+ return (v == guid);
} else {
- return (
- nvlist_string_match(config, ZPOOL_CONFIG_POOL_NAME, tgt) ||
- vdev_string_match(config, tgt));
+ if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
+ return (strcmp(s, tgt) == 0);
}
+ return (B_FALSE);
}
-static int
-find_exported_zpool(char *pool_id, nvlist_t **configp, char *vdev_dir)
+static char *
+find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv)
{
nvlist_t *pools;
- int error = ENOENT;
nvlist_t *match = NULL;
+ char *name = NULL;
+ char *sepp = NULL;
+ char sep;
+ int count = 0;
+ importargs_t args = { 0 };
- if (vdev_dir != NULL)
- pools = zpool_find_import_activeok(g_zfs, 1, &vdev_dir);
- else
- pools = zpool_find_import_activeok(g_zfs, 0, NULL);
+ args.paths = dirc;
+ args.path = dirv;
+ args.can_be_active = B_TRUE;
+
+ if ((sepp = strpbrk(*target, "/@")) != NULL) {
+ sep = *sepp;
+ *sepp = '\0';
+ }
+
+ pools = zpool_search_import(g_zfs, &args);
if (pools != NULL) {
nvpair_t *elem = NULL;
-
while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
verify(nvpair_value_nvlist(elem, configp) == 0);
- if (pool_match(*configp, pool_id)) {
+ if (pool_match(*configp, *target)) {
+ count++;
if (match != NULL) {
- (void) fatal(
- "More than one matching pool - "
- "specify guid/devid/device path.");
+ /* print previously found config */
+ if (name != NULL) {
+ (void) printf("%s\n", name);
+ dump_nvlist(match, 8);
+ name = NULL;
+ }
+ (void) printf("%s\n",
+ nvpair_name(elem));
+ dump_nvlist(*configp, 8);
} else {
match = *configp;
- error = 0;
+ name = nvpair_name(elem);
}
}
}
}
+ if (count > 1)
+ (void) fatal("\tMatched %d pools - use pool GUID "
+ "instead of pool name or \n"
+ "\tpool name part of a dataset name to select pool", count);
- *configp = error ? NULL : match;
+ if (sepp)
+ *sepp = sep;
+ /*
+ * If pool GUID was specified for pool id, replace it with pool name
+ */
+ if (name && (strstr(*target, name) != *target)) {
+ int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0);
+
+ *target = umem_alloc(sz, UMEM_NOFAIL);
+ (void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : "");
+ }
- return (error);
+ *configp = name ? match : NULL;
+
+ return (name);
}
int
@@ -2312,83 +2931,85 @@ main(int argc, char **argv)
{
int i, c;
struct rlimit rl = { 1024, 1024 };
- spa_t *spa;
+ spa_t *spa = NULL;
objset_t *os = NULL;
- char *endstr;
int dump_all = 1;
int verbose = 0;
- int error;
- int exported = 0;
- char *vdev_dir = NULL;
+ int error = 0;
+ char **searchdirs = NULL;
+ int nsearch = 0;
+ char *target;
+ nvlist_t *policy = NULL;
+ uint64_t max_txg = UINT64_MAX;
+ int rewind = ZPOOL_NEVER_REWIND;
(void) setrlimit(RLIMIT_NOFILE, &rl);
(void) enable_extended_FILE_stdio(-1, -1);
dprintf_setup(&argc, argv);
- while ((c = getopt(argc, argv, "udibcmsvCLS:U:lRep:t:")) != -1) {
+ while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:P")) != -1) {
switch (c) {
- case 'u':
- case 'd':
- case 'i':
case 'b':
case 'c':
+ case 'd':
+ case 'h':
+ case 'i':
+ case 'l':
case 'm':
case 's':
+ case 'u':
case 'C':
- case 'l':
+ case 'D':
case 'R':
+ case 'S':
dump_opt[c]++;
dump_all = 0;
break;
+ case 'A':
+ case 'F':
case 'L':
+ case 'X':
+ case 'e':
+ case 'P':
dump_opt[c]++;
break;
case 'v':
verbose++;
break;
- case 'U':
- spa_config_path = optarg;
- break;
- case 'e':
- exported = 1;
- break;
case 'p':
- vdev_dir = optarg;
- break;
- case 'S':
- dump_opt[c]++;
- dump_all = 0;
- zdb_sig_user_data = (strncmp(optarg, "user:", 5) == 0);
- if (!zdb_sig_user_data && strncmp(optarg, "all:", 4))
- usage();
- endstr = strchr(optarg, ':') + 1;
- if (strcmp(endstr, "fletcher2") == 0)
- zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_2;
- else if (strcmp(endstr, "fletcher4") == 0)
- zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_4;
- else if (strcmp(endstr, "sha256") == 0)
- zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
- else if (strcmp(endstr, "all") == 0)
- zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_2;
- else
- usage();
+ if (searchdirs == NULL) {
+ searchdirs = umem_alloc(sizeof (char *),
+ UMEM_NOFAIL);
+ } else {
+ char **tmp = umem_alloc((nsearch + 1) *
+ sizeof (char *), UMEM_NOFAIL);
+ bcopy(searchdirs, tmp, nsearch *
+ sizeof (char *));
+ umem_free(searchdirs,
+ nsearch * sizeof (char *));
+ searchdirs = tmp;
+ }
+ searchdirs[nsearch++] = optarg;
break;
case 't':
- ub_max_txg = strtoull(optarg, NULL, 0);
- if (ub_max_txg < TXG_INITIAL) {
+ max_txg = strtoull(optarg, NULL, 0);
+ if (max_txg < TXG_INITIAL) {
(void) fprintf(stderr, "incorrect txg "
"specified: %s\n", optarg);
usage();
}
break;
+ case 'U':
+ spa_config_path = optarg;
+ break;
default:
usage();
break;
}
}
- if (vdev_dir != NULL && exported == 0) {
+ if (!dump_opt['e'] && searchdirs != NULL) {
(void) fprintf(stderr, "-p option requires use of -e\n");
usage();
}
@@ -2397,18 +3018,26 @@ main(int argc, char **argv)
g_zfs = libzfs_init();
ASSERT(g_zfs != NULL);
+ if (dump_all)
+ verbose = MAX(verbose, 1);
+
for (c = 0; c < 256; c++) {
- if (dump_all && c != 'l' && c != 'R')
+ if (dump_all && !strchr("elAFLRSXP", c))
dump_opt[c] = 1;
if (dump_opt[c])
dump_opt[c] += verbose;
}
+ aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2);
+ zfs_recover = (dump_opt['A'] > 1);
+
argc -= optind;
argv += optind;
+ if (argc < 2 && dump_opt['R'])
+ usage();
if (argc < 1) {
- if (dump_opt['C']) {
+ if (!dump_opt['e'] && dump_opt['C']) {
dump_cachefile(spa_config_path);
return (0);
}
@@ -2420,99 +3049,107 @@ main(int argc, char **argv)
return (0);
}
- if (dump_opt['R']) {
- flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
- flagbits['c'] = ZDB_FLAG_CHECKSUM;
- flagbits['d'] = ZDB_FLAG_DECOMPRESS;
- flagbits['e'] = ZDB_FLAG_BSWAP;
- flagbits['g'] = ZDB_FLAG_GBH;
- flagbits['i'] = ZDB_FLAG_INDIRECT;
- flagbits['p'] = ZDB_FLAG_PHYS;
- flagbits['r'] = ZDB_FLAG_RAW;
-
- spa = NULL;
- while (argv[0]) {
- zdb_read_block(argv[0], &spa);
- argv++;
- argc--;
- }
- if (spa)
- spa_close(spa, (void *)zdb_read_block);
- return (0);
- }
+ if (dump_opt['X'] || dump_opt['F'])
+ rewind = ZPOOL_DO_REWIND |
+ (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
- if (dump_opt['C'])
- dump_config(argv[0]);
+ if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
+ nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, max_txg) != 0 ||
+ nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind) != 0)
+ fatal("internal error: %s", strerror(ENOMEM));
error = 0;
- if (exported) {
- /*
- * Check to see if the name refers to an exported zpool
- */
- char *slash;
- nvlist_t *exported_conf = NULL;
-
- if ((slash = strchr(argv[0], '/')) != NULL)
- *slash = '\0';
-
- error = find_exported_zpool(argv[0], &exported_conf, vdev_dir);
- if (error == 0) {
- nvlist_t *nvl = NULL;
-
- if (vdev_dir != NULL) {
- if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
- error = ENOMEM;
- else if (nvlist_add_string(nvl,
- zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
- vdev_dir) != 0)
- error = ENOMEM;
- }
+ target = argv[0];
- if (error == 0)
- error = spa_import_verbatim(argv[0],
- exported_conf, nvl);
+ if (dump_opt['e']) {
+ nvlist_t *cfg = NULL;
+ char *name = find_zpool(&target, &cfg, nsearch, searchdirs);
- nvlist_free(nvl);
+ error = ENOENT;
+ if (name) {
+ if (dump_opt['C'] > 1) {
+ (void) printf("\nConfiguration for import:\n");
+ dump_nvlist(cfg, 8);
+ }
+ if (nvlist_add_nvlist(cfg,
+ ZPOOL_REWIND_POLICY, policy) != 0) {
+ fatal("can't open '%s': %s",
+ target, strerror(ENOMEM));
+ }
+ if ((error = spa_import(name, cfg, NULL,
+ ZFS_IMPORT_MISSING_LOG)) != 0) {
+ error = spa_import(name, cfg, NULL,
+ ZFS_IMPORT_VERBATIM);
+ }
}
-
- if (slash != NULL)
- *slash = '/';
}
if (error == 0) {
- if (strchr(argv[0], '/') != NULL) {
- error = dmu_objset_open(argv[0], DMU_OST_ANY,
- DS_MODE_USER | DS_MODE_READONLY, &os);
+ if (strpbrk(target, "/@") == NULL || dump_opt['R']) {
+ error = spa_open_rewind(target, &spa, FTAG, policy,
+ NULL);
+ if (error) {
+ /*
+ * If we're missing the log device then
+ * try opening the pool after clearing the
+ * log state.
+ */
+ mutex_enter(&spa_namespace_lock);
+ if ((spa = spa_lookup(target)) != NULL &&
+ spa->spa_log_state == SPA_LOG_MISSING) {
+ spa->spa_log_state = SPA_LOG_CLEAR;
+ error = 0;
+ }
+ mutex_exit(&spa_namespace_lock);
+
+ if (!error) {
+ error = spa_open_rewind(target, &spa,
+ FTAG, policy, NULL);
+ }
+ }
} else {
- error = spa_open(argv[0], &spa, FTAG);
+ error = dmu_objset_own(target, DMU_OST_ANY,
+ B_TRUE, FTAG, &os);
}
}
+ nvlist_free(policy);
if (error)
- fatal("can't open %s: %s", argv[0], strerror(error));
+ fatal("can't open '%s': %s", target, strerror(error));
argv++;
- if (--argc > 0) {
- zopt_objects = argc;
- zopt_object = calloc(zopt_objects, sizeof (uint64_t));
- for (i = 0; i < zopt_objects; i++) {
- errno = 0;
- zopt_object[i] = strtoull(argv[i], NULL, 0);
- if (zopt_object[i] == 0 && errno != 0)
- fatal("bad object number %s: %s",
- argv[i], strerror(errno));
+ argc--;
+ if (!dump_opt['R']) {
+ if (argc > 0) {
+ zopt_objects = argc;
+ zopt_object = calloc(zopt_objects, sizeof (uint64_t));
+ for (i = 0; i < zopt_objects; i++) {
+ errno = 0;
+ zopt_object[i] = strtoull(argv[i], NULL, 0);
+ if (zopt_object[i] == 0 && errno != 0)
+ fatal("bad number %s: %s",
+ argv[i], strerror(errno));
+ }
}
- }
-
- if (os != NULL) {
- dump_dir(os);
- dmu_objset_close(os);
+ (os != NULL) ? dump_dir(os) : dump_zpool(spa);
} else {
- dump_zpool(spa);
- spa_close(spa, FTAG);
+ flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
+ flagbits['c'] = ZDB_FLAG_CHECKSUM;
+ flagbits['d'] = ZDB_FLAG_DECOMPRESS;
+ flagbits['e'] = ZDB_FLAG_BSWAP;
+ flagbits['g'] = ZDB_FLAG_GBH;
+ flagbits['i'] = ZDB_FLAG_INDIRECT;
+ flagbits['p'] = ZDB_FLAG_PHYS;
+ flagbits['r'] = ZDB_FLAG_RAW;
+
+ for (i = 0; i < argc; i++)
+ zdb_read_block(argv[i], spa);
}
+ (os != NULL) ? dmu_objset_disown(os, FTAG) : spa_close(spa, FTAG);
+
fuid_table_destroy();
+ sa_loaded = B_FALSE;
libzfs_fini(g_zfs);
kernel_fini();
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c b/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
index 1b3c18f..a0ed985 100644
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
@@ -40,12 +40,14 @@
extern uint8_t dump_opt[256];
+static char prefix[4] = "\t\t\t";
+
static void
print_log_bp(const blkptr_t *bp, const char *prefix)
{
char blkbuf[BP_SPRINTF_LEN];
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
+ sprintf_blkptr(blkbuf, bp);
(void) printf("%s%s\n", prefix, blkbuf);
}
@@ -54,19 +56,29 @@ static void
zil_prt_rec_create(zilog_t *zilog, int txtype, lr_create_t *lr)
{
time_t crtime = lr->lr_crtime[0];
- char *name = (char *)(lr + 1);
- char *link = name + strlen(name) + 1;
+ char *name, *link;
+ lr_attr_t *lrattr;
- if (txtype == TX_SYMLINK)
- (void) printf("\t\t\t%s -> %s\n", name, link);
- else
- (void) printf("\t\t\t%s\n", name);
+ name = (char *)(lr + 1);
+
+ if (lr->lr_common.lrc_txtype == TX_CREATE_ATTR ||
+ lr->lr_common.lrc_txtype == TX_MKDIR_ATTR) {
+ lrattr = (lr_attr_t *)(lr + 1);
+ name += ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
+ }
+
+ if (txtype == TX_SYMLINK) {
+ link = name + strlen(name) + 1;
+ (void) printf("%s%s -> %s\n", prefix, name, link);
+ } else if (txtype != TX_MKXATTR) {
+ (void) printf("%s%s\n", prefix, name);
+ }
- (void) printf("\t\t\t%s", ctime(&crtime));
- (void) printf("\t\t\tdoid %llu, foid %llu, mode %llo\n",
+ (void) printf("%s%s", prefix, ctime(&crtime));
+ (void) printf("%sdoid %llu, foid %llu, mode %llo\n", prefix,
(u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_foid,
(longlong_t)lr->lr_mode);
- (void) printf("\t\t\tuid %llu, gid %llu, gen %llu, rdev 0x%llx\n",
+ (void) printf("%suid %llu, gid %llu, gen %llu, rdev 0x%llx\n", prefix,
(u_longlong_t)lr->lr_uid, (u_longlong_t)lr->lr_gid,
(u_longlong_t)lr->lr_gen, (u_longlong_t)lr->lr_rdev);
}
@@ -75,7 +87,7 @@ zil_prt_rec_create(zilog_t *zilog, int txtype, lr_create_t *lr)
static void
zil_prt_rec_remove(zilog_t *zilog, int txtype, lr_remove_t *lr)
{
- (void) printf("\t\t\tdoid %llu, name %s\n",
+ (void) printf("%sdoid %llu, name %s\n", prefix,
(u_longlong_t)lr->lr_doid, (char *)(lr + 1));
}
@@ -83,7 +95,7 @@ zil_prt_rec_remove(zilog_t *zilog, int txtype, lr_remove_t *lr)
static void
zil_prt_rec_link(zilog_t *zilog, int txtype, lr_link_t *lr)
{
- (void) printf("\t\t\tdoid %llu, link_obj %llu, name %s\n",
+ (void) printf("%sdoid %llu, link_obj %llu, name %s\n", prefix,
(u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_link_obj,
(char *)(lr + 1));
}
@@ -95,9 +107,9 @@ zil_prt_rec_rename(zilog_t *zilog, int txtype, lr_rename_t *lr)
char *snm = (char *)(lr + 1);
char *tnm = snm + strlen(snm) + 1;
- (void) printf("\t\t\tsdoid %llu, tdoid %llu\n",
+ (void) printf("%ssdoid %llu, tdoid %llu\n", prefix,
(u_longlong_t)lr->lr_sdoid, (u_longlong_t)lr->lr_tdoid);
- (void) printf("\t\t\tsrc %s tgt %s\n", snm, tnm);
+ (void) printf("%ssrc %s tgt %s\n", prefix, snm, tnm);
}
/* ARGSUSED */
@@ -106,44 +118,48 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
{
char *data, *dlimit;
blkptr_t *bp = &lr->lr_blkptr;
+ zbookmark_t zb;
char buf[SPA_MAXBLOCKSIZE];
int verbose = MAX(dump_opt['d'], dump_opt['i']);
int error;
- (void) printf("\t\t\tfoid %llu, offset 0x%llx,"
- " length 0x%llx, blkoff 0x%llx\n",
- (u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
- (u_longlong_t)lr->lr_length, (u_longlong_t)lr->lr_blkoff);
+ (void) printf("%sfoid %llu, offset %llx, length %llx\n", prefix,
+ (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_offset,
+ (u_longlong_t)lr->lr_length);
if (txtype == TX_WRITE2 || verbose < 5)
return;
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
- (void) printf("\t\t\thas blkptr, %s\n",
+ (void) printf("%shas blkptr, %s\n", prefix,
bp->blk_birth >= spa_first_txg(zilog->zl_spa) ?
"will claim" : "won't claim");
- print_log_bp(bp, "\t\t\t");
+ print_log_bp(bp, prefix);
+
if (BP_IS_HOLE(bp)) {
(void) printf("\t\t\tLSIZE 0x%llx\n",
(u_longlong_t)BP_GET_LSIZE(bp));
}
if (bp->blk_birth == 0) {
bzero(buf, sizeof (buf));
- } else {
- zbookmark_t zb;
-
- zb.zb_objset = dmu_objset_id(zilog->zl_os);
- zb.zb_object = lr->lr_foid;
- zb.zb_level = 0;
- zb.zb_blkid = -1; /* unknown */
-
- error = zio_wait(zio_read(NULL, zilog->zl_spa,
- bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
- ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
- if (error)
- return;
+ (void) printf("%s<hole>\n", prefix);
+ return;
+ }
+ if (bp->blk_birth < zilog->zl_header->zh_claim_txg) {
+ (void) printf("%s<block already committed>\n", prefix);
+ return;
}
- data = buf + lr->lr_blkoff;
+
+ SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os),
+ lr->lr_foid, ZB_ZIL_LEVEL,
+ lr->lr_offset / BP_GET_LSIZE(bp));
+
+ error = zio_wait(zio_read(NULL, zilog->zl_spa,
+ bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
+ ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
+ if (error)
+ return;
+ data = buf;
} else {
data = (char *)(lr + 1);
}
@@ -151,7 +167,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
dlimit = data + MIN(lr->lr_length,
(verbose < 6 ? 20 : SPA_MAXBLOCKSIZE));
- (void) printf("\t\t\t");
+ (void) printf("%s", prefix);
while (data < dlimit) {
if (isprint(*data))
(void) printf("%c ", *data);
@@ -166,7 +182,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
static void
zil_prt_rec_truncate(zilog_t *zilog, int txtype, lr_truncate_t *lr)
{
- (void) printf("\t\t\tfoid %llu, offset 0x%llx, length 0x%llx\n",
+ (void) printf("%sfoid %llu, offset 0x%llx, length 0x%llx\n", prefix,
(u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
(u_longlong_t)lr->lr_length);
}
@@ -178,38 +194,38 @@ zil_prt_rec_setattr(zilog_t *zilog, int txtype, lr_setattr_t *lr)
time_t atime = (time_t)lr->lr_atime[0];
time_t mtime = (time_t)lr->lr_mtime[0];
- (void) printf("\t\t\tfoid %llu, mask 0x%llx\n",
+ (void) printf("%sfoid %llu, mask 0x%llx\n", prefix,
(u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_mask);
if (lr->lr_mask & AT_MODE) {
- (void) printf("\t\t\tAT_MODE %llo\n",
+ (void) printf("%sAT_MODE %llo\n", prefix,
(longlong_t)lr->lr_mode);
}
if (lr->lr_mask & AT_UID) {
- (void) printf("\t\t\tAT_UID %llu\n",
+ (void) printf("%sAT_UID %llu\n", prefix,
(u_longlong_t)lr->lr_uid);
}
if (lr->lr_mask & AT_GID) {
- (void) printf("\t\t\tAT_GID %llu\n",
+ (void) printf("%sAT_GID %llu\n", prefix,
(u_longlong_t)lr->lr_gid);
}
if (lr->lr_mask & AT_SIZE) {
- (void) printf("\t\t\tAT_SIZE %llu\n",
+ (void) printf("%sAT_SIZE %llu\n", prefix,
(u_longlong_t)lr->lr_size);
}
if (lr->lr_mask & AT_ATIME) {
- (void) printf("\t\t\tAT_ATIME %llu.%09llu %s",
+ (void) printf("%sAT_ATIME %llu.%09llu %s", prefix,
(u_longlong_t)lr->lr_atime[0],
(u_longlong_t)lr->lr_atime[1],
ctime(&atime));
}
if (lr->lr_mask & AT_MTIME) {
- (void) printf("\t\t\tAT_MTIME %llu.%09llu %s",
+ (void) printf("%sAT_MTIME %llu.%09llu %s", prefix,
(u_longlong_t)lr->lr_mtime[0],
(u_longlong_t)lr->lr_mtime[1],
ctime(&mtime));
@@ -220,7 +236,7 @@ zil_prt_rec_setattr(zilog_t *zilog, int txtype, lr_setattr_t *lr)
static void
zil_prt_rec_acl(zilog_t *zilog, int txtype, lr_acl_t *lr)
{
- (void) printf("\t\t\tfoid %llu, aclcnt %llu\n",
+ (void) printf("%sfoid %llu, aclcnt %llu\n", prefix,
(u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_aclcnt);
}
@@ -256,7 +272,7 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = {
};
/* ARGSUSED */
-static void
+static int
print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
{
int txtype;
@@ -280,23 +296,24 @@ print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
zil_rec_info[txtype].zri_count++;
zil_rec_info[0].zri_count++;
+
+ return (0);
}
/* ARGSUSED */
-static void
+static int
print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
{
- char blkbuf[BP_SPRINTF_LEN];
+ char blkbuf[BP_SPRINTF_LEN + 10];
int verbose = MAX(dump_opt['d'], dump_opt['i']);
char *claim;
if (verbose <= 3)
- return;
+ return (0);
if (verbose >= 5) {
(void) strcpy(blkbuf, ", ");
- sprintf_blkptr(blkbuf + strlen(blkbuf),
- BP_SPRINTF_LEN - strlen(blkbuf), bp);
+ sprintf_blkptr(blkbuf + strlen(blkbuf), bp);
} else {
blkbuf[0] = '\0';
}
@@ -310,6 +327,8 @@ print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
(void) printf("\tBlock seqno %llu, %s%s\n",
(u_longlong_t)bp->blk_cksum.zc_word[ZIL_ZC_SEQ], claim, blkbuf);
+
+ return (0);
}
static void
@@ -342,17 +361,17 @@ dump_intent_log(zilog_t *zilog)
int verbose = MAX(dump_opt['d'], dump_opt['i']);
int i;
- if (zh->zh_log.blk_birth == 0 || verbose < 2)
+ if (zh->zh_log.blk_birth == 0 || verbose < 1)
return;
- (void) printf("\n ZIL header: claim_txg %llu, claim_seq %llu",
- (u_longlong_t)zh->zh_claim_txg, (u_longlong_t)zh->zh_claim_seq);
+ (void) printf("\n ZIL header: claim_txg %llu, "
+ "claim_blk_seq %llu, claim_lr_seq %llu",
+ (u_longlong_t)zh->zh_claim_txg,
+ (u_longlong_t)zh->zh_claim_blk_seq,
+ (u_longlong_t)zh->zh_claim_lr_seq);
(void) printf(" replay_seq %llu, flags 0x%llx\n",
(u_longlong_t)zh->zh_replay_seq, (u_longlong_t)zh->zh_flags);
- if (verbose >= 4)
- print_log_bp(&zh->zh_log, "\n\tfirst block: ");
-
for (i = 0; i < TX_MAX_TYPE; i++)
zil_rec_info[i].zri_count = 0;
diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs.8 b/cddl/contrib/opensolaris/cmd/zfs/zfs.8
index 0d97026..0d40a90 100644
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs.8
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs.8
@@ -6,7 +6,7 @@
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with
.\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH zfs 1M "5 May 2009" "SunOS 5.11" "System Administration Commands"
+.TH zfs 1M "24 Sep 2009" "SunOS 5.11" "System Administration Commands"
.SH NAME
zfs \- configures ZFS file systems
.SH SYNOPSIS
@@ -27,7 +27,12 @@ zfs \- configures ZFS file systems
.LP
.nf
-\fBzfs\fR \fBdestroy\fR [\fB-rRf\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
+\fBzfs\fR \fBdestroy\fR [\fB-rRf\fR] \fIfilesystem\fR|\fIvolume\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBdestroy\fR [\fB-rRd\fR] \fIsnapshot\fR
.fi
.LP
@@ -75,7 +80,7 @@ zfs \- configures ZFS file systems
.LP
.nf
-\fBzfs\fR \fBset\fR \fIproperty\fR=\fIvalue\fR \fIfilesystem\fR|\fIvolume\fR|snapshot ...
+\fBzfs\fR \fBset\fR \fIproperty\fR=\fIvalue\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...
.fi
.LP
@@ -174,7 +179,7 @@ zfs \- configures ZFS file systems
.LP
.nf
-\fBzfs\fR \fBallow\fR \fB-s\fR @setname \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR
+\fBzfs\fR \fBallow\fR \fB-s\fR @\fIsetname\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR
.fi
.LP
@@ -195,7 +200,29 @@ zfs \- configures ZFS file systems
.LP
.nf
-\fBzfs\fR \fBunallow\fR [\fB-r\fR] \fB-s\fR @setname [\fIperm\fR|@\fIsetname\fR[,... ]] \fIfilesystem\fR|\fIvolume\fR
+\fBzfs\fR \fBunallow\fR [\fB-r\fR] \fB-s\fR @\fIsetname\fR [\fIperm\fR|@\fIsetname\fR[,... ]] \fIfilesystem\fR|\fIvolume\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBhold\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR...
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBholds\fR [\fB-r\fR] \fIsnapshot\fR...
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBrelease\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR...
+.fi
+
+\fBzfs\fR \fBjail\fR \fBjailid\fR \fB\fIfilesystem\fR\fR
+.fi
+.LP
+.nf
+\fBzfs\fR \fBunjail\fR \fBjailid\fR \fB\fIfilesystem\fR\fR
.fi
.SH DESCRIPTION
@@ -212,7 +239,7 @@ pool/{filesystem,volume,snapshot}
.sp
.LP
-\&...where the maximum length of a dataset name is \fBMAXNAMELEN\fR (256 bytes).
+where the maximum length of a dataset name is \fBMAXNAMELEN\fR (256 bytes).
.sp
.LP
A dataset can be one of the following:
@@ -224,7 +251,7 @@ A dataset can be one of the following:
.ad
.sp .6
.RS 4n
-A \fBZFS\fR dataset of type "filesystem" that can be mounted within the standard system namespace and behaves like other file systems. While \fBZFS\fR file systems are designed to be \fBPOSIX\fR compliant, known issues exist that prevent compliance in some cases. Applications that depend on standards conformance might fail due to nonstandard behavior when checking file system free space.
+A \fBZFS\fR dataset of type \fBfilesystem\fR can be mounted within the standard system namespace and behaves like other file systems. While \fBZFS\fR file systems are designed to be \fBPOSIX\fR compliant, known issues exist that prevent compliance in some cases. Applications that depend on standards conformance might fail due to nonstandard behavior when checking file system free space.
.RE
.sp
@@ -268,17 +295,17 @@ A snapshot is a read-only copy of a file system or volume. Snapshots can be crea
Snapshots can have arbitrary names. Snapshots of volumes can be cloned or rolled back, but cannot be accessed independently.
.sp
.LP
-File system snapshots can be accessed under the ".zfs/snapshot" directory in the root of the file system. Snapshots are automatically mounted on demand and may be unmounted at regular intervals. The visibility of the ".zfs" directory can be controlled by the "snapdir" property.
+File system snapshots can be accessed under the \fB\&.zfs/snapshot\fR directory in the root of the file system. Snapshots are automatically mounted on demand and may be unmounted at regular intervals. The visibility of the \fB\&.zfs\fR directory can be controlled by the \fBsnapdir\fR property.
.SS "Clones"
.sp
.LP
A clone is a writable volume or file system whose initial contents are the same as another dataset. As with snapshots, creating a clone is nearly instantaneous, and initially consumes no additional space.
.sp
.LP
-Clones can only be created from a snapshot. When a snapshot is cloned, it creates an implicit dependency between the parent and child. Even though the clone is created somewhere else in the dataset hierarchy, the original snapshot cannot be destroyed as long as a clone exists. The "origin" property exposes this dependency, and the \fBdestroy\fR command lists any such dependencies, if they exist.
+Clones can only be created from a snapshot. When a snapshot is cloned, it creates an implicit dependency between the parent and child. Even though the clone is created somewhere else in the dataset hierarchy, the original snapshot cannot be destroyed as long as a clone exists. The \fBorigin\fR property exposes this dependency, and the \fBdestroy\fR command lists any such dependencies, if they exist.
.sp
.LP
-The clone parent-child dependency relationship can be reversed by using the "\fBpromote\fR" subcommand. This causes the "origin" file system to become a clone of the specified file system, which makes it possible to destroy the file system that the clone was created from.
+The clone parent-child dependency relationship can be reversed by using the \fBpromote\fR subcommand. This causes the "origin" file system to become a clone of the specified file system, which makes it possible to destroy the file system that the clone was created from.
.SS "Mount Points"
.sp
.LP
@@ -304,10 +331,10 @@ A \fBZFS\fR file system can be added to a non-global zone by using the \fBzonecf
The physical properties of an added file system are controlled by the global administrator. However, the zone administrator can create, modify, or destroy files within the added file system, depending on how the file system is mounted.
.sp
.LP
-A dataset can also be delegated to a non-global zone by using \fBzonecfg\fR \fBadd dataset\fR subcommand. You cannot delegate a dataset to one zone and the children of the same dataset to another zone. The zone administrator can change properties of the dataset or any of its children. However, the \fBquota\fR property is controlled by the global administrator.
+A dataset can also be delegated to a non-global zone by using the \fBzonecfg\fR \fBadd dataset\fR subcommand. You cannot delegate a dataset to one zone and the children of the same dataset to another zone. The zone administrator can change properties of the dataset or any of its children. However, the \fBquota\fR property is controlled by the global administrator.
.sp
.LP
-A \fBZFS\fR volume can be added as a device to a non-global zone by using \fBzonecfg\fR \fBadd device\fR subcommand. However, its physical properties can be modified only by the global administrator.
+A \fBZFS\fR volume can be added as a device to a non-global zone by using the \fBzonecfg\fR \fBadd device\fR subcommand. However, its physical properties can be modified only by the global administrator.
.sp
.LP
For more information about \fBzonecfg\fR syntax, see \fBzonecfg\fR(1M).
@@ -320,7 +347,7 @@ The global administrator can forcibly clear the \fBzoned\fR property, though thi
.SS "Native Properties"
.sp
.LP
-Properties are divided into two types, native and user-defined (or "user"). Native properties either export internal statistics or control \fBZFS\fR behavior. In addition, native properties are either editable or read-only. User properties have no effect on \fBZFS\fR behavior, but you can use them to annotate datasets in a way that is meaningful in your environment. For more information about user properties, see the "User Properties" section, below.
+Properties are divided into two types, native properties and user-defined (or "user") properties. Native properties either export internal statistics or control \fBZFS\fR behavior. In addition, native properties are either editable or read-only. User properties have no effect on \fBZFS\fR behavior, but you can use them to annotate datasets in a way that is meaningful in your environment. For more information about user properties, see the "User Properties" section, below.
.sp
.LP
Every dataset has a set of properties that export statistics about the dataset as well as control various behaviors. Properties are inherited from the parent unless overridden by the child. Some properties apply only to certain types of datasets (file systems, volumes, or snapshots).
@@ -380,6 +407,17 @@ The time this dataset was created.
.ne 2
.mk
.na
+\fB\fBdefer_destroy\fR\fR
+.ad
+.sp .6
+.RS 4n
+This property is \fBon\fR if the snapshot has been marked for deferred destroy by using the \fBzfs destroy\fR \fB-d\fR command. Otherwise, the property is \fBoff\fR.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
\fB\fBmounted\fR\fR
.ad
.sp .6
@@ -489,7 +527,7 @@ The amount of space used by a \fBrefreservation\fR set on this dataset, which wo
.ad
.sp .6
.RS 4n
-The amount of space consumed by snapshots of this dataset. In particular, it is the amount of space that would be freed if all of this dataset's snapshots were destroyed. Note that this is not simply the sum of the snapshots' \fBused\fR properties because space can be shared by multiple snapshots
+The amount of space consumed by snapshots of this dataset. In particular, it is the amount of space that would be freed if all of this dataset's snapshots were destroyed. Note that this is not simply the sum of the snapshots' \fBused\fR properties because space can be shared by multiple snapshots.
.RE
.sp
@@ -500,34 +538,34 @@ The amount of space consumed by snapshots of this dataset. In particular, it is
.ad
.sp .6
.RS 4n
-The amount of space referenced in this dataset by the specified user. Space is charged to the owner of each file, as displayed by \fBls\fR \fB-l\fR. The amount of space charged is displayed by \fBdu\fR and \fBls\fR \fB-s\fR. See the \fBzfs userspace\fR subcommand for more information.
+The amount of space consumed by the specified user in this dataset. Space is charged to the owner of each file, as displayed by \fBls\fR \fB-l\fR. The amount of space charged is displayed by \fBdu\fR and \fBls\fR \fB-s\fR. See the \fBzfs userspace\fR subcommand for more information.
.sp
Unprivileged users can access only their own space usage. The root user, or a user who has been granted the \fBuserused\fR privilege with \fBzfs allow\fR, can access everyone's usage.
.sp
-This property cannot be set on volumes, or on pools before version 15. The \fBuserused@\fR... properties are not displayed by \fBzfs get all\fR. The user's name must be appended after the \fB@\fR symbol, using one of the following forms:
+The \fBuserused@\fR... properties are not displayed by \fBzfs get all\fR. The user's name must be appended after the \fB@\fR symbol, using one of the following forms:
.RS +4
.TP
.ie t \(bu
.el o
-\fIposix name\fR (for example, \fBjoe\fR)
+\fIPOSIX name\fR (for example, \fBjoe\fR)
.RE
.RS +4
.TP
.ie t \(bu
.el o
-\fIposix numeric id\fR (for example, \fB789\fR)
+\fIPOSIX numeric ID\fR (for example, \fB789\fR)
.RE
.RS +4
.TP
.ie t \(bu
.el o
-\fIsid name\fR (for example, \fBjoe.smith@mydomain\fR)
+\fISID name\fR (for example, \fBjoe.smith@mydomain\fR)
.RE
.RS +4
.TP
.ie t \(bu
.el o
-\fIsid numeric id\fR (for example, \fBS-1-123-456-789\fR)
+\fISID numeric ID\fR (for example, \fBS-1-123-456-789\fR)
.RE
.RE
@@ -535,13 +573,24 @@ This property cannot be set on volumes, or on pools before version 15. The \fBus
.ne 2
.mk
.na
+\fB\fBuserrefs\fR\fR
+.ad
+.sp .6
+.RS 4n
+This property is set to the number of user holds on this snapshot. User holds are set by using the \fBzfs hold\fR command.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
\fB\fBgroupused@\fR\fIgroup\fR\fR
.ad
.sp .6
.RS 4n
-The amount of space referenced in this dataset by the specified group. Space is charged to the group of each file, as displayed by \fBls\fR \fB-l\fR. See the \fBuserused@\fR\fIuser\fR property for more information.
+The amount of space consumed by the specified group in this dataset. Space is charged to the group of each file, as displayed by \fBls\fR \fB-l\fR. See the \fBuserused@\fR\fIuser\fR property for more information.
.sp
-Unprivileged users can only access the \fBgroupused@\fR... property for groups that they are a member of. The root user, or a user who has been granted the \fBgroupused\fR privilege with \fBzfs allow\fR, can access all groups' usage.
+Unprivileged users can only access their own groups' space usage. The root user, or a user who has been granted the \fBgroupused\fR privilege with \fBzfs allow\fR, can access all groups' usage.
.RE
.sp
@@ -618,7 +667,9 @@ This property is not inherited.
.ad
.sp .6
.RS 4n
-Controls the checksum used to verify data integrity. The default value is \fBon\fR, which automatically selects an appropriate algorithm (currently, \fBfletcher2\fR, but this may change in future releases). The value \fBoff\fR disables integrity checking on user data. Disabling checksums is \fBNOT\fR a recommended practice.
+Controls the checksum used to verify data integrity. The default value is \fBon\fR, which automatically selects an appropriate algorithm (currently, \fBfletcher4\fR, but this may change in future releases). The value \fBoff\fR disables integrity checking on user data. Disabling checksums is \fBNOT\fR a recommended practice.
+.sp
+Changing this property affects only newly-written data.
.RE
.sp
@@ -629,22 +680,22 @@ Controls the checksum used to verify data integrity. The default value is \fBon\
.ad
.sp .6
.RS 4n
-Controls the compression algorithm used for this dataset. The \fBlzjb\fR compression algorithm is optimized for performance while providing decent data compression. Setting compression to "on" uses the "lzjb" compression algorithm. The "gzip" compression algorithm uses the same compression as the \fBgzip\fR(1) command. You can specify the "gzip" level by using the value "gzip-\fIN\fR" where \fIN\fR is an integer from 1 (fastest) to 9 (best compression ratio). Currently, "gzip" is equivalent to "gzip-6" (which is also the default for \fBgzip\fR(1)).
+Controls the compression algorithm used for this dataset. The \fBlzjb\fR compression algorithm is optimized for performance while providing decent data compression. Setting compression to \fBon\fR uses the \fBlzjb\fR compression algorithm. The \fBgzip\fR compression algorithm uses the same compression as the \fBgzip\fR(1) command. You can specify the \fBgzip\fR level by using the value \fBgzip-\fR\fIN\fR where \fIN\fR is an integer from 1 (fastest) to 9 (best compression ratio). Currently, \fBgzip\fR is equivalent to \fBgzip-6\fR (which is also the default for \fBgzip\fR(1)).
.sp
-This property can also be referred to by its shortened column name "compress".
+This property can also be referred to by its shortened column name \fBcompress\fR. Changing this property affects only newly-written data.
.RE
.sp
.ne 2
.mk
.na
-\fBcopies=\fB1\fR | \fB2\fR | \fB3\fR\fR
+\fB\fBcopies\fR=\fB1\fR | \fB2\fR | \fB3\fR\fR
.ad
.sp .6
.RS 4n
-Controls the number of copies of data stored for this dataset. These copies are in addition to any redundancy provided by the pool, for example, mirroring or \fBraid-z\fR. The copies are stored on different disks, if possible. The space used by multiple copies is charged to the associated file and dataset, changing the \fBused\fR property and counting against quotas and reservations.
+Controls the number of copies of data stored for this dataset. These copies are in addition to any redundancy provided by the pool, for example, mirroring or RAID-Z. The copies are stored on different disks, if possible. The space used by multiple copies is charged to the associated file and dataset, changing the \fBused\fR property and counting against quotas and reservations.
.sp
-Changing this property only affects newly-written data. Therefore, set this property at file system creation time by using the \fB-o\fR \fBcopies=\fR option.
+Changing this property only affects newly-written data. Therefore, set this property at file system creation time by using the \fB-o\fR \fBcopies=\fR\fIN\fR option.
.RE
.sp
@@ -725,36 +776,36 @@ Quotas cannot be set on volumes, as the \fBvolsize\fR property acts as an implic
.ad
.sp .6
.RS 4n
-Limits the amount of space referenced by the specified user, which is specified by the \fBuserspace@\fR\fIuser\fR property.
+Limits the amount of space consumed by the specified user. User space consumption is identified by the \fBuserspace@\fR\fIuser\fR property.
.sp
-Enforcement of user quotas may be delayed by several seconds. In other words, users may go a bit over their quota before the system notices that they are over quota and begins to refuse additional writes with \fBEDQUOT\fR. See the \fBzfs userspace\fR subcommand for more information.
+Enforcement of user quotas may be delayed by several seconds. This delay means that a user might exceed their quota before the system notices that they are over quota and begins to refuse additional writes with the \fBEDQUOT\fR error message . See the \fBzfs userspace\fR subcommand for more information.
.sp
-Unprivileged users can get only their own quota. The root user, or a user who has been granted the \fBuserquota\fR privilege with \fBzfs allow\fR, can get and set everyone's quota.
+Unprivileged users can only access their own groups' space usage. The root user, or a user who has been granted the \fBuserquota\fR privilege with \fBzfs allow\fR, can get and set everyone's quota.
.sp
-This property cannot be set on volumes, on filesystems before version 4, or on pools before version 15. The \fBuserquota@\fR... properties are not displayed by \fBzfs get all\fR. The user's name must be appended after the \fB@\fR symbol, using one of the following forms:
+This property is not available on volumes, on file systems before version 4, or on pools before version 15. The \fBuserquota@\fR... properties are not displayed by \fBzfs get all\fR. The user's name must be appended after the \fB@\fR symbol, using one of the following forms:
.RS +4
.TP
.ie t \(bu
.el o
-\fIposix name\fR (for example, \fBjoe\fR)
+\fIPOSIX name\fR (for example, \fBjoe\fR)
.RE
.RS +4
.TP
.ie t \(bu
.el o
-\fIposix numeric id\fR (for example, \fB789\fR)
+\fIPOSIX numeric ID\fR (for example, \fB789\fR)
.RE
.RS +4
.TP
.ie t \(bu
.el o
-\fIsid name\fR (for example, \fBjoe.smith@mydomain\fR)
+\fISID name\fR (for example, \fBjoe.smith@mydomain\fR)
.RE
.RS +4
.TP
.ie t \(bu
.el o
-\fIsid numeric id\fR (for example, \fBS-1-123-456-789\fR)
+\fISID numeric ID\fR (for example, \fBS-1-123-456-789\fR)
.RE
.RE
@@ -766,9 +817,9 @@ This property cannot be set on volumes, on filesystems before version 4, or on p
.ad
.sp .6
.RS 4n
-Limits the amount of space referenced by the specified group. See the \fBuserquota@\fR\fIuser\fR property for more information.
+Limits the amount of space consumed by the specified group. Group space consumption is identified by the \fBuserquota@\fR\fIuser\fR property.
.sp
-Unprivileged users can only get the quota of groups they are a member of. The root user, or a user who has been granted the \fBgroupquota\fR privilege with \fBzfs allow\fR, can get and set all groups' quotas.
+Unprivileged users can access only their own groups' space usage. The root user, or a user who has been granted the \fBgroupquota\fR privilege with \fBzfs allow\fR, can get and set all groups' quotas.
.RE
.sp
@@ -904,7 +955,18 @@ When the \fBsharesmb\fR property is changed for a dataset, the dataset and any c
.RS 4n
Controls whether the file system is shared via \fBNFS\fR, and what options are used. A file system with a \fBsharenfs\fR property of \fBoff\fR is managed through traditional tools such as \fBshare\fR(1M), \fBunshare\fR(1M), and \fBdfstab\fR(4). Otherwise, the file system is automatically shared and unshared with the \fBzfs share\fR and \fBzfs unshare\fR commands. If the property is set to \fBon\fR, the \fBshare\fR(1M) command is invoked with no options. Otherwise, the \fBshare\fR(1M) command is invoked with options equivalent to the contents of this property.
.sp
-When the \fBsharenfs\fR property is changed for a dataset, the dataset and any children inheriting the property are re-shared with the new options, only if the property was previously "off", or if they were shared before the property was changed. If the new property is \fBoff\fR, the file systems are unshared.
+When the \fBsharenfs\fR property is changed for a dataset, the dataset and any children inheriting the property are re-shared with the new options, only if the property was previously \fBoff\fR, or if they were shared before the property was changed. If the new property is \fBoff\fR, the file systems are unshared.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBlogbias\fR = \fBlatency\fR | \fBthroughput\fR\fR
+.ad
+.sp .6
+.RS 4n
+Provide a hint to ZFS about handling of synchronous requests in this dataset. If \fBlogbias\fR is set to \fBlatency\fR (the default), ZFS will use pool log devices (if configured) to handle the requests at low latency. If \fBlogbias\fR is set to \fBthroughput\fR, ZFS will not use configured pool log devices. ZFS will instead optimize synchronous operations for global pool throughput and efficient use of resources.
.RE
.sp
@@ -959,7 +1021,7 @@ Controls whether regular files should be scanned for viruses when a file is open
.ne 2
.mk
.na
-\fBxattr=\fBon\fR | \fBoff\fR\fR
+\fB\fBxattr\fR=\fBon\fR | \fBoff\fR\fR
.ad
.sp .6
.RS 4n
@@ -997,7 +1059,7 @@ The \fBmixed\fR value for the \fBcasesensitivity\fR property indicates that the
.ne 2
.mk
.na
-\fB\fBnormalization\fR=\fBnone\fR | \fBformD\fR | \fBformKCf\fR\fR
+\fB\fBnormalization\fR = \fBnone\fR | \fBformC\fR | \fBformD\fR | \fBformKC\fR | \fBformKD\fR\fR
.ad
.sp .6
.RS 4n
@@ -1008,6 +1070,17 @@ Indicates whether the file system should perform a \fBunicode\fR normalization o
.ne 2
.mk
.na
+\fBjailed =\fIon\fR | \fIoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls whether the dataset is managed from within a jail. The default value is "off".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
\fB\fButf8only\fR=\fBon\fR | \fBoff\fR\fR
.ad
.sp .6
@@ -1081,7 +1154,7 @@ Displays a help message.
.ad
.sp .6
.RS 4n
-Creates a new \fBZFS\fR file system. The file system is automatically mounted according to the "mountpoint" property inherited from the parent.
+Creates a new \fBZFS\fR file system. The file system is automatically mounted according to the \fBmountpoint\fR property inherited from the parent.
.sp
.ne 2
.mk
@@ -1101,7 +1174,7 @@ Creates all the non-existing parent datasets. Datasets created in this manner ar
.ad
.sp .6
.RS 4n
-Sets the specified property as if the command \fBzfs set \fIproperty\fR=\fIvalue\fR\fR was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An error results if the same property is specified in multiple \fB-o\fR options.
+Sets the specified property as if the command \fBzfs set\fR \fIproperty\fR=\fIvalue\fR was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An error results if the same property is specified in multiple \fB-o\fR options.
.RE
.RE
@@ -1114,7 +1187,7 @@ Sets the specified property as if the command \fBzfs set \fIproperty\fR=\fIvalue
.ad
.sp .6
.RS 4n
-Creates a volume of the given size. The volume is exported as a block device in \fB/dev/zvol/{dsk,rdsk}/\fIpath\fR\fR, where \fIpath\fR is the name of the volume in the \fBZFS\fR namespace. The size represents the logical size as exported by the device. By default, a reservation of equal size is created.
+Creates a volume of the given size. The volume is exported as a block device in \fB/dev/zvol/{dsk,rdsk}/\fR\fIpath\fR, where \fIpath\fR is the name of the volume in the \fBZFS\fR namespace. The size represents the logical size as exported by the device. By default, a reservation of equal size is created.
.sp
\fIsize\fR is automatically rounded up to the nearest 128 Kbytes to ensure that the volume has an integral number of blocks regardless of \fIblocksize\fR.
.sp
@@ -1147,7 +1220,7 @@ Creates a sparse volume with no reservation. See \fBvolsize\fR in the Native Pro
.ad
.sp .6
.RS 4n
-Sets the specified property as if the \fBzfs set \fIproperty\fR=\fIvalue\fR\fR command was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An error results if the same property is specified in multiple \fB-o\fR options.
+Sets the specified property as if the \fBzfs set\fR \fIproperty\fR=\fIvalue\fR command was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An error results if the same property is specified in multiple \fB-o\fR options.
.RE
.sp
@@ -1158,7 +1231,7 @@ Sets the specified property as if the \fBzfs set \fIproperty\fR=\fIvalue\fR\fR c
.ad
.sp .6
.RS 4n
-Equivalent to \fB\fR\fB-o\fR \fBvolblocksize=\fIblocksize\fR\fR. If this option is specified in conjunction with \fB-o\fR \fBvolblocksize\fR, the resulting behavior is undefined.
+Equivalent to \fB-o\fR \fBvolblocksize\fR=\fIblocksize\fR. If this option is specified in conjunction with \fB-o\fR \fBvolblocksize\fR, the resulting behavior is undefined.
.RE
.RE
@@ -1167,11 +1240,11 @@ Equivalent to \fB\fR\fB-o\fR \fBvolblocksize=\fIblocksize\fR\fR. If this option
.ne 2
.mk
.na
-\fB\fBzfs destroy\fR [\fB-rRf\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
+\fB\fBzfs destroy\fR [\fB-rRf\fR] \fIfilesystem\fR|\fIvolume\fR\fR
.ad
.sp .6
.RS 4n
-Destroys the given dataset. By default, the command unshares any file systems that are currently shared, unmounts any file systems that are currently mounted, and refuses to destroy a dataset that has active dependents (children, snapshots, clones).
+Destroys the given dataset. By default, the command unshares any file systems that are currently shared, unmounts any file systems that are currently mounted, and refuses to destroy a dataset that has active dependents (children or clones).
.sp
.ne 2
.mk
@@ -1180,7 +1253,7 @@ Destroys the given dataset. By default, the command unshares any file systems th
.ad
.sp .6
.RS 4n
-Recursively destroy all children. If a snapshot is specified, destroy all snapshots with this name in descendent file systems.
+Recursively destroy all children.
.RE
.sp
@@ -1191,7 +1264,7 @@ Recursively destroy all children. If a snapshot is specified, destroy all snapsh
.ad
.sp .6
.RS 4n
-Recursively destroy all dependents, including cloned file systems outside the target hierarchy. If a snapshot is specified, destroy all snapshots with this name in descendent file systems.
+Recursively destroy all dependents, including cloned file systems outside the target hierarchy.
.RE
.sp
@@ -1212,6 +1285,52 @@ Extreme care should be taken when applying either the \fB-r\fR or the \fB-f\fR o
.ne 2
.mk
.na
+\fB\fBzfs destroy\fR [\fB-rRd\fR] \fIsnapshot\fR\fR
+.ad
+.sp .6
+.RS 4n
+The given snapshot is destroyed immediately if and only if the \fBzfs destroy\fR command without the \fB-d\fR option would have destroyed it. Such immediate destruction would occur, for example, if the snapshot had no clones and the user-initiated reference count were zero.
+.sp
+If the snapshot does not qualify for immediate destruction, it is marked for deferred deletion. In this state, it exists as a usable, visible snapshot until both of the preconditions listed above are met, at which point it is destroyed.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-d\fR\fR
+.ad
+.sp .6
+.RS 4n
+Defer snapshot deletion.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-r\fR\fR
+.ad
+.sp .6
+.RS 4n
+Destroy (or mark for deferred deletion) all snapshots with this name in descendent file systems.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-R\fR\fR
+.ad
+.sp .6
+.RS 4n
+Recursively destroy all dependents.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
\fB\fBzfs snapshot\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIfilesystem@snapname\fR|\fIvolume@snapname\fR\fR
.ad
.sp .6
@@ -1251,6 +1370,8 @@ Sets the specified property; see \fBzfs create\fR for details.
.RS 4n
Roll back the given dataset to a previous snapshot. When a dataset is rolled back, all data that has changed since the snapshot is discarded, and the dataset reverts to the state at the time of the snapshot. By default, the command refuses to roll back to a snapshot other than the most recent one. In order to do so, all intermediate snapshots must be destroyed by specifying the \fB-r\fR option.
.sp
+The \fB-rR\fR options do not recursively destroy the child snapshots of a recursive snapshot. Only the top-level recursive snapshot is destroyed by either of these options. To completely roll back a recursive snapshot, you must rollback the individual child snapshots.
+.sp
.ne 2
.mk
.na
@@ -1380,15 +1501,7 @@ Recursively rename the snapshots of all descendent datasets. Snapshots are the o
.ad
.sp .6
.RS 4n
-Lists the property information for the given datasets in tabular form. If specified, you can list property information by the absolute pathname or the relative pathname. By default, all file systems and volumes are displayed. Snapshots are displayed if the \fBlistsnaps\fR property is \fBon\fR (the default is \fBoff\fR) . The following fields are displayed:
-.sp
-.in +2
-.nf
-name,used,available,referenced,mountpoint
-.fi
-.in -2
-.sp
-
+Lists the property information for the given datasets in tabular form. If specified, you can list property information by the absolute pathname or the relative pathname. By default, all file systems and volumes are displayed. Snapshots are displayed if the \fBlistsnaps\fR property is \fBon\fR (the default is \fBoff\fR) . The following fields are displayed, \fBname,used,available,referenced,mountpoint\fR.
.sp
.ne 2
.mk
@@ -1397,7 +1510,7 @@ name,used,available,referenced,mountpoint
.ad
.sp .6
.RS 4n
-Used for scripting mode. Do not print headers and separate fields by a single tab instead of arbitrary whitespace.
+Used for scripting mode. Do not print headers and separate fields by a single tab instead of arbitrary white space.
.RE
.sp
@@ -1435,34 +1548,25 @@ A comma-separated list of properties to display. The property must be:
.TP
.ie t \(bu
.el o
-one of the properties described in the "Native Properties" section
+One of the properties described in the "Native Properties" section
.RE
.RS +4
.TP
.ie t \(bu
.el o
-a user property
+A user property
.RE
.RS +4
.TP
.ie t \(bu
.el o
-the value \fBname\fR to display the dataset name
+The value \fBname\fR to display the dataset name
.RE
.RS +4
.TP
.ie t \(bu
.el o
-the value \fBspace\fR to display space usage properties on file systems and volumes. This is a shortcut for:
-.sp
-.in +2
-.nf
--o name,avail,used,usedsnap,usedds,usedrefreserv,\e
-usedchild -t filesystem,volume
-.fi
-.in -2
-.sp
-
+The value \fBspace\fR to display space usage properties on file systems and volumes. This is a shortcut for specifying \fB-o name,avail,used,usedsnap,usedds,usedrefreserv,usedchild\fR \fB-t filesystem,volume\fR syntax.
.RE
.RE
@@ -1474,7 +1578,7 @@ usedchild -t filesystem,volume
.ad
.sp .6
.RS 4n
-A property to use for sorting the output by column in ascending order based on the value of the property. The property must be one of the properties described in the "Properties" section, or the special value \fBname\fR to sort by the dataset name. Multiple properties can be specified at one time using multiple \fB-s\fR property options. Multiple \fB-s\fR options are evaluated from left to right in decreasing order of importance.
+A property for sorting the output by column in ascending order based on the value of the property. The property must be one of the properties described in the "Properties" section, or the special value \fBname\fR to sort by the dataset name. Multiple properties can be specified at one time using multiple \fB-s\fR property options. Multiple \fB-s\fR options are evaluated from left to right in decreasing order of importance.
.sp
The following is a list of sorting criteria:
.RS +4
@@ -1535,7 +1639,7 @@ A comma-separated list of types to display, where \fItype\fR is one of \fBfilesy
.ad
.sp .6
.RS 4n
-Sets the property to the given value for each dataset. Only some properties can be edited. See the "Properties" section for more information on what properties can be set and acceptable values. Numeric values can be specified as exact values, or in a human-readable form with a suffix of \fBB\fR, \fBK\fR, \fBM\fR, \fBG\fR, \fBT\fR, \fBP\fR, \fBE\fR, \fBZ\fR (for bytes, kilobytes, megabytes, gigabytes, terabytes, petabytes, exabytes, or zettabytes, respectively). Properties cannot be set on snapshots.
+Sets the property to the given value for each dataset. Only some properties can be edited. See the "Properties" section for more information on what properties can be set and acceptable values. Numeric values can be specified as exact values, or in a human-readable form with a suffix of \fBB\fR, \fBK\fR, \fBM\fR, \fBG\fR, \fBT\fR, \fBP\fR, \fBE\fR, \fBZ\fR (for bytes, kilobytes, megabytes, gigabytes, terabytes, petabytes, exabytes, or zettabytes, respectively). User properties can be set on snapshots. For more information, see the "User Properties" section.
.RE
.sp
@@ -1671,11 +1775,11 @@ Displays a list of file systems that are not the most recent version.
.ad
.sp .6
.RS 4n
-Upgrades file systems to a new on-disk version. Once this is done, the file systems will no longer be accessible on systems running older versions of the software. \fBzfs send\fR streams generated from new snapshots of these file systems can not be accessed on systems running older versions of the software.
+Upgrades file systems to a new on-disk version. Once this is done, the file systems will no longer be accessible on systems running older versions of the software. \fBzfs send\fR streams generated from new snapshots of these file systems cannot be accessed on systems running older versions of the software.
.sp
-The file system version is independent of the pool version (see \fBzpool\fR(1M) for information on the \fBzpool upgrade\fR command).
+In general, the file system version is independent of the pool version. See \fBzpool\fR(1M) for information on the \fBzpool upgrade\fR command.
.sp
-The file system version does not have to be upgraded when the pool version is upgraded, and vice-versa.
+In some cases, the file system version and the pool version are interrelated and the pool version must be upgraded before the file system version can be upgraded.
.sp
.ne 2
.mk
@@ -1772,16 +1876,7 @@ Use exact (parseable) numeric output.
.ad
.sp .6
.RS 4n
-Display only the specified fields, from the following set:
-.sp
-.in +2
-.nf
-type,name,used,quota
-.fi
-.in -2
-.sp
-
-The default is to display all fields.
+Display only the specified fields from the following set, \fBtype,name,used,quota\fR.The default is to display all fields.
.RE
.sp
@@ -1792,15 +1887,7 @@ The default is to display all fields.
.ad
.sp .6
.RS 4n
-Sort output by this field. The \fIs\fR and \fIS\fR flags may be specified multiple times to sort first by one field, then by another. The default is:
-.sp
-.in +2
-.nf
--s type -s name
-.fi
-.in -2
-.sp
-
+Sort output by this field. The \fIs\fR and \fIS\fR flags may be specified multiple times to sort first by one field, then by another. The default is \fB-s type\fR \fB-s name\fR.
.RE
.sp
@@ -1822,25 +1909,11 @@ Sort by this field in reverse order. See \fB-s\fR.
.ad
.sp .6
.RS 4n
-Print only the specified types, from the following set:
-.sp
-.in +2
-.nf
-all,posixuser,smbuser,posixgroup,smbgroup
-.fi
-.in -2
+Print only the specified types from the following set, \fBall,posixuser,smbuser,posixgroup,smbgroup\fR.
.sp
-
-The default is:
+The default is \fB-t posixuser,smbuser\fR
.sp
-.in +2
-.nf
--t posixuser,smbuser
-.fi
-.in -2
-.sp
-
-\&...but can be changed to include group types.
+The default can be changed to include group types.
.RE
.sp
@@ -1851,7 +1924,7 @@ The default is:
.ad
.sp .6
.RS 4n
-Translate SID to POSIX ID. The POSIX ID may be ephemeral if no mapping exists. Normal POSIX interfaces (for example, \fBstat\fR(2), \fBls\fR \fB-l\fR) perform this translation, so the \fB-i\fR option allows the output from \fBzfs userspace\fR to be compared directly with those utilities. However, \fB-i\fR may lead to confusion if some files were created by an SMB user before a SMB-to-POSIX name mapping was established. In such a case, some files are owned by the SMB entity and some by the POSIX entity. However, he \fB-i\fR option will report that the POSIX entity has the total usage and quota for both.
+Translate SID to POSIX ID. The POSIX ID may be ephemeral if no mapping exists. Normal POSIX interfaces (for example, \fBstat\fR(2), \fBls\fR \fB-l\fR) perform this translation, so the \fB-i\fR option allows the output from \fBzfs userspace\fR to be compared directly with those utilities. However, \fB-i\fR may lead to confusion if some files were created by an SMB user before a SMB-to-POSIX name mapping was established. In such a case, some files are owned by the SMB entity and some by the POSIX entity. However, the \fB-i\fR option will report that the POSIX entity has the total usage and quota for both.
.RE
.RE
@@ -1864,11 +1937,11 @@ Translate SID to POSIX ID. The POSIX ID may be ephemeral if no mapping exists. N
.ad
.sp .6
.RS 4n
-Displays space consumed by, and quotas on, each group in the specified filesystem or snapshot. This subcommand is identical to \fBzfs userspace\fR, except that the default types to display are:
+Displays space consumed by, and quotas on, each group in the specified filesystem or snapshot. This subcommand is identical to \fBzfs userspace\fR, except that the default types to display are \fB-t posixgroup,smbgroup\fR.
.sp
.in +2
.nf
--t posixgroup,smbgroup
+-
.fi
.in -2
.sp
@@ -2119,7 +2192,7 @@ If the \fB-i\fR or \fB-I\fR flags are used in conjunction with the \fB-R\fR flag
Print verbose information about the stream package generated.
.RE
-The format of the stream is evolving. No backwards compatibility is guaranteed. You may not be able to receive your streams on future versions of \fBZFS\fR.
+The format of the stream is committed. You will be able to receive your streams on future versions of \fBZFS\fR.
.RE
.sp
@@ -2138,6 +2211,8 @@ Creates a snapshot whose contents are as specified in the stream provided on sta
.sp
If an incremental stream is received, then the destination file system must already exist, and its most recent snapshot must match the incremental stream's source. For \fBzvols\fR, the destination device link is destroyed and recreated, which means the \fBzvol\fR cannot be accessed during the \fBreceive\fR operation.
.sp
+When a snapshot replication package stream that is generated by using the \fBzfs send\fR \fB-R\fR command is received, any snapshots that do not exist on the sending location are destroyed by using the \fBzfs destroy\fR \fB-d\fR command.
+.sp
The name of the snapshot (and file system, if a full stream is received) that this subcommand creates depends on the argument type and the \fB-d\fR option.
.sp
If the argument is a snapshot name, the specified \fIsnapshot\fR is created. If the argument is a file system or volume name, a snapshot with the same name as the sent snapshot is created within the specified \fIfilesystem\fR or \fIvolume\fR. If the \fB-d\fR option is specified, the snapshot name is determined by appending the sent snapshot's name to the specified \fIfilesystem\fR. If the \fB-d\fR option is specified, any required file systems within the specified one are created.
@@ -2241,7 +2316,7 @@ Specifies to whom the permissions are delegated. Multiple entities can be specif
.ad
.sp .6
.RS 4n
-Specifies that the permissions be delegated to "everyone." Multiple permissions may be specified as a comma-separated list. Permission names are the same as \fBZFS\fR subcommand and property names. See the property list below. Property set names, which begin with an "at sign" ("@") , may be specified. See the \fB-s\fR form below for details.
+Specifies that the permissions be delegated to "everyone." Multiple permissions may be specified as a comma-separated list. Permission names are the same as \fBZFS\fR subcommand and property names. See the property list below. Property set names, which begin with an at sign (\fB@\fR) , may be specified. See the \fB-s\fR form below for details.
.RE
.sp
@@ -2263,67 +2338,63 @@ Permissions are generally the ability to use a \fBZFS\fR subcommand or change a
.sp
.in +2
.nf
-NAME TYPE NOTES
-allow subcommand Must also have the permission
- that is being allowed.
-clone subcommand Must also have the 'create' ability
- and the 'mount' ability in the origin
- file system.
-create subcommand Must also have the 'mount' ability.
-destroy subcommand Must also have the 'mount' ability.
-mount subcommand Allows mount, unmount, and
- create/remove zvol device links.
-promote subcommand Must also have the 'mount' ability and
- 'promote' ability in the origin file system.
-receive subcommand Must also have the 'mount' ability and
- the 'create' ability.
-rename subcommand Must also have the 'mount' ability and
- the 'create' ability in the new parent.
-rollback subcommand Must also have the 'mount' ability.
-snapshot subcommand Must also have the 'mount' ability.
-share subcommand Allows share and unshare.
-send subcommand
-
-
-aclinherit property
-aclmode property
-atime property
-canmount property
-casesensitivity property
-checksum property
-compression property
-copies property
-devices property
-exec property
-groupquota other Allows accessing any groupquota@... property.
-groupused other Allows reading any groupused@... property.
-mountpoint property
-nbmand property
-normalization property
-primarycache property
-quota property
-readonly property
-recordsize property
-refquota property
-refreservation property
-reservation property
-secondarycache property
-setuid property
-shareiscsi property
-sharenfs property
-sharesmb property
-snapdir property
-utf8only property
-userprop other Allows changing any user property.
-userquota other Allows accessing any userquota@... property.
-userused other Allows reading any userused@... property.
-version property
-volblocksize property
-volsize property
-vscan property
-xattr property
-zoned property
-userprop other Allows changing any user property.
+NAME TYPE NOTES
+allow subcommand Must also have the permission that is being
+ allowed
+clone subcommand Must also have the 'create' ability and 'mount'
+ ability in the origin file system
+create subcommand Must also have the 'mount' ability
+destroy subcommand Must also have the 'mount' ability
+mount subcommand Allows mount/umount of ZFS datasets
+promote subcommand Must also have the 'mount'
+ and 'promote' ability in the origin file system
+receive subcommand Must also have the 'mount' and 'create' ability
+rename subcommand Must also have the 'mount' and 'create'
+ ability in the new parent
+rollback subcommand Must also have the 'mount' ability
+send subcommand
+share subcommand Allows sharing file systems over NFS or SMB
+ protocols
+snapshot subcommand Must also have the 'mount' ability
+groupquota other Allows accessing any groupquota@... property
+groupused other Allows reading any groupused@... property
+userprop other Allows changing any user property
+userquota other Allows accessing any userquota@... property
+userused other Allows reading any userused@... property
+
+aclinherit property
+aclmode property
+atime property
+canmount property
+casesensitivity property
+checksum property
+compression property
+copies property
+devices property
+exec property
+mountpoint property
+nbmand property
+normalization property
+primarycache property
+quota property
+readonly property
+recordsize property
+refquota property
+refreservation property
+reservation property
+secondarycache property
+setuid property
+shareiscsi property
+sharenfs property
+sharesmb property
+snapdir property
+utf8only property
+version property
+volblocksize property
+volsize property
+vscan property
+xattr property
+zoned property
.fi
.in -2
.sp
@@ -2343,7 +2414,7 @@ Sets "create time" permissions. These permissions are granted (locally) to the c
.ne 2
.mk
.na
-\fB\fBzfs allow\fR \fB-s\fR @setname \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR\fR
+\fB\fBzfs allow\fR \fB-s\fR @\fIsetname\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR\fR
.ad
.sp .6
.RS 4n
@@ -2388,7 +2459,7 @@ Recursively remove the permissions from this file system and all descendents.
.ne 2
.mk
.na
-\fB\fBzfs unallow\fR [\fB-r\fR] \fB-s\fR @setname [\fIperm\fR|@\fIsetname\fR[,...]]\fR
+\fB\fBzfs unallow\fR [\fB-r\fR] \fB-s\fR @\fIsetname\fR [\fIperm\fR|@\fIsetname\fR[,...]]\fR
.ad
.br
.na
@@ -2399,12 +2470,101 @@ Recursively remove the permissions from this file system and all descendents.
Removes permissions from a permission set. If no permissions are specified, then all permissions are removed, thus removing the set entirely.
.RE
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs hold\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR...\fR
+.ad
+.sp .6
+.RS 4n
+Adds a single reference, named with the \fItag\fR argument, to the specified snapshot or snapshots. Each snapshot has its own tag namespace, and tags must be unique within that space.
+.sp
+If a hold exists on a snapshot, attempts to destroy that snapshot by using the \fBzfs destroy\fR command return \fBEBUSY\fR.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-r\fR\fR
+.ad
+.sp .6
+.RS 4n
+Specifies that a hold with the given tag is applied recursively to the snapshots of all descendent file systems.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs holds\fR [\fB-r\fR] \fIsnapshot\fR...\fR
+.ad
+.sp .6
+.RS 4n
+Lists all existing user references for the given snapshot or snapshots.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-r\fR\fR
+.ad
+.sp .6
+.RS 4n
+Lists the holds that are set on the named descendent snapshots, in addition to listing the holds on the named snapshot.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs release\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR...\fR
+.ad
+.sp .6
+.RS 4n
+Removes a single reference, named with the \fItag\fR argument, from the specified snapshot or snapshots. The tag must already exist for each snapshot.
+.sp
+If a hold exists on a snapshot, attempts to destroy that snapshot by using the \fBzfs destroy\fR command return \fBEBUSY\fR.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-r\fR\fR
+.ad
+.sp .6
+.RS 4n
+Recursively releases a hold with the given tag on the snapshots of all descendent file systems.
+.RE
+
+.RE
+
+\fB\fBzfs jail\fR \fIjailid\fR \fIfilesystem\fR\fR
+.ad
+.sp .6
+.RS 4n
+Attaches the given file system to the given jail. From now on this file system tree can be managed from within a jail if the "\fBjailed\fR" property has been set.
+To use this functionality, sysctl \fBsecurity.jail.enforce_statfs\fR should be set to 0 and sysctl \fBsecurity.jail.mount_allowed\fR should be set to 1.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs unjail\fR \fIjailid\fR \fIfilesystem\fR\fR
+.ad
+.sp .6
+.RS 4n
+Detaches the given file system from the given jail.
+.RE
+
.SH EXAMPLES
.LP
\fBExample 1 \fRCreating a ZFS File System Hierarchy
.sp
.LP
-The following commands create a file system named \fBpool/home\fR and a file system named \fBpool/home/bob\fR. The mount point \fB/export/home\fR is set for the parent file system, and automatically inherited by the child file system.
+The following commands create a file system named \fBpool/home\fR and a file system named \fBpool/home/bob\fR. The mount point \fB/export/home\fR is set for the parent file system, and is automatically inherited by the child file system.
.sp
.in +2
@@ -2431,7 +2591,7 @@ The following command creates a snapshot named \fByesterday\fR. This snapshot is
.sp
.LP
-\fBExample 3 \fRTaking and Destroying Multiple Snapshots
+\fBExample 3 \fRCreating and Destroying Multiple Snapshots
.sp
.LP
The following command creates snapshots named \fByesterday\fR of \fBpool/home\fR and all of its descendent file systems. Each snapshot is mounted on demand in the \fB\&.zfs/snapshot\fR directory at the root of its file system. The second command destroys the newly created snapshots.
@@ -2446,10 +2606,10 @@ The following command creates snapshots named \fByesterday\fR of \fBpool/home\fR
.sp
.LP
-\fBExample 4 \fRTurning Off Compression
+\fBExample 4 \fRDisabling and Enabling File System Compression
.sp
.LP
-The following commands turn compression off for all file systems under \fBpool/home\fR, but explicitly turns it on for \fBpool/home/anne\fR.
+The following command disables the \fBcompression\fR property for all file systems under \fBpool/home\fR. The next command explicitly enables \fBcompression\fR for \fBpool/home/anne\fR.
.sp
.in +2
@@ -2464,14 +2624,12 @@ The following commands turn compression off for all file systems under \fBpool/h
\fBExample 5 \fRListing ZFS Datasets
.sp
.LP
-The following command lists all active file systems and volumes in the system. Snapshots are displayed if the \fBlistsnaps\fR property is \fBon\fR (the default is \fBoff\fR) . See \fBzpool\fR(1M) for more information on pool properties.
+The following command lists all active file systems and volumes in the system. Snapshots are displayed if the \fBlistsnaps\fR property is \fBon\fR. The default is \fBoff\fR. See \fBzpool\fR(1M) for more information on pool properties.
.sp
.in +2
.nf
# \fBzfs list\fR
-
-
NAME USED AVAIL REFER MOUNTPOINT
pool 450K 457G 18K /pool
pool/home 315K 457G 21K /export/home
@@ -2505,25 +2663,21 @@ The following command lists all properties for \fBpool/home/bob\fR.
.in +2
.nf
# \fBzfs get all pool/home/bob\fR
-
-
NAME PROPERTY VALUE SOURCE
pool/home/bob type filesystem -
-pool/home/bob creation Thu Jul 12 14:44 2007 -
-pool/home/bob used 276K -
-pool/home/bob available 50.0G -
-pool/home/bob referenced 276K -
+pool/home/bob creation Tue Jul 21 15:53 2009 -
+pool/home/bob used 21K -
+pool/home/bob available 20.0G -
+pool/home/bob referenced 21K -
pool/home/bob compressratio 1.00x -
pool/home/bob mounted yes -
-pool/home/bob quota 50G local
+pool/home/bob quota 20G local
pool/home/bob reservation none default
pool/home/bob recordsize 128K default
-pool/home/bob mountpoint /export/home/bob inherited
- from
- pool/home
+pool/home/bob mountpoint /pool/home/bob default
pool/home/bob sharenfs off default
pool/home/bob checksum on default
-pool/home/bob compression off default
+pool/home/bob compression on local
pool/home/bob atime on default
pool/home/bob devices on default
pool/home/bob exec on default
@@ -2537,22 +2691,21 @@ pool/home/bob canmount on default
pool/home/bob shareiscsi off default
pool/home/bob xattr on default
pool/home/bob copies 1 default
-pool/home/bob version 1 -
+pool/home/bob version 4 -
pool/home/bob utf8only off -
pool/home/bob normalization none -
pool/home/bob casesensitivity sensitive -
pool/home/bob vscan off default
pool/home/bob nbmand off default
pool/home/bob sharesmb off default
-pool/home/bob refquota 10M local
+pool/home/bob refquota none default
pool/home/bob refreservation none default
pool/home/bob primarycache all default
-pool/home/bob secondarycache a default
+pool/home/bob secondarycache all default
pool/home/bob usedbysnapshots 0 -
-pool/home/bob usedbydataset 18K -
+pool/home/bob usedbydataset 21K -
pool/home/bob usedbychildren 0 -
pool/home/bob usedbyrefreservation 0 -
-
.fi
.in -2
.sp
@@ -2578,10 +2731,9 @@ The following command lists all properties with local settings for \fBpool/home/
.in +2
.nf
# \fBzfs get -r -s local -o name,property,value all pool/home/bob\fR
-
- NAME PROPERTY VALUE
- pool compression on
- pool/home checksum off
+NAME PROPERTY VALUE
+pool/home/bob quota 20G
+pool/home/bob compression on
.fi
.in -2
.sp
@@ -2669,7 +2821,7 @@ The following commands send a full stream and then an incremental stream to a re
.sp
.LP
-\fBExample 13 \fRUsing the \fBreceive\fR \fB-d\fR Option
+\fBExample 13 \fRUsing the \fBzfs receive\fR \fB-d\fR Option
.sp
.LP
The following command sends a full stream of \fBpoolA/fsA/fsB@snap\fR to a remote machine, receiving it into \fBpoolB/received/fsA/fsB@snap\fR. The \fBfsA/fsB@snap\fR portion of the received snapshot's name is determined from the name of the sent snapshot. \fBpoolB\fR must contain the file system \fBpoolB/received\fR. If \fBpoolB/received/fsA\fR does not exist, it is created as an empty file system.
@@ -2752,7 +2904,6 @@ The following commands show how to set \fBsharenfs\fR property options to enable
.in +2
.nf
# \fB# zfs set sharenfs='rw=@123.123.0.0/16,root=neo' tank/home\fR
-
.fi
.in -2
.sp
@@ -2770,13 +2921,12 @@ The following example shows how to set permissions so that user \fBcindys\fR can
.sp
.in +2
.nf
-# \fB# zfs allow cindys create,destroy,mount,snapshot tank/cindys\fR
+# \fBzfs allow cindys create,destroy,mount,snapshot tank/cindys\fR
# \fBzfs allow tank/cindys\fR
-------------------------------------------------------------
Local+Descendent permissions on (tank/cindys)
user cindys create,destroy,mount,snapshot
-------------------------------------------------------------
-
.fi
.in -2
.sp
@@ -2853,8 +3003,8 @@ The following example shows to grant the ability to set quotas and reservations
Local+Descendent permissions on (users/home)
user cindys quota,reservation
-------------------------------------------------------------
-cindys% zfs set quota=10G users/home/marks
-cindys% zfs get quota users/home/marks
+cindys% \fBzfs set quota=10G users/home/marks\fR
+cindys% \fBzfs get quota users/home/marks\fR
NAME PROPERTY VALUE SOURCE
users/home/marks quota 10G local
.fi
diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c b/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c
index ca5c2b2..e2ab90e 100644
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <libintl.h>
@@ -107,7 +106,8 @@ zfs_callback(zfs_handle_t *zhp, void *data)
zfs_prune_proplist(zhp,
cb->cb_props_table);
- if (zfs_expand_proplist(zhp, cb->cb_proplist)
+ if (zfs_expand_proplist(zhp, cb->cb_proplist,
+ (cb->cb_flags & ZFS_ITER_RECVD_PROPS))
!= 0) {
free(node);
return (-1);
@@ -350,11 +350,8 @@ zfs_for_each(int argc, char **argv, int flags, zfs_type_t types,
avl_pool = uu_avl_pool_create("zfs_pool", sizeof (zfs_node_t),
offsetof(zfs_node_t, zn_avlnode), zfs_sort, UU_DEFAULT);
- if (avl_pool == NULL) {
- (void) fprintf(stderr,
- gettext("internal error: out of memory\n"));
- exit(1);
- }
+ if (avl_pool == NULL)
+ nomem();
cb.cb_sortcol = sortcol;
cb.cb_flags = flags;
@@ -362,7 +359,7 @@ zfs_for_each(int argc, char **argv, int flags, zfs_type_t types,
cb.cb_types = types;
cb.cb_depth_limit = limit;
/*
- * If cb_proplist is provided then in the zfs_handles created we
+ * If cb_proplist is provided then in the zfs_handles created we
* retain only those properties listed in cb_proplist and sortcol.
* The rest are pruned. So, the caller should make sure that no other
* properties other than those listed in cb_proplist/sortcol are
@@ -399,11 +396,8 @@ zfs_for_each(int argc, char **argv, int flags, zfs_type_t types,
sizeof (cb.cb_props_table));
}
- if ((cb.cb_avl = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) {
- (void) fprintf(stderr,
- gettext("internal error: out of memory\n"));
- exit(1);
- }
+ if ((cb.cb_avl = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL)
+ nomem();
if (argc == 0) {
/*
@@ -453,11 +447,8 @@ zfs_for_each(int argc, char **argv, int flags, zfs_type_t types,
/*
* Finally, clean up the AVL tree.
*/
- if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL) {
- (void) fprintf(stderr,
- gettext("internal error: out of memory"));
- exit(1);
- }
+ if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL)
+ nomem();
while ((node = uu_avl_walk_next(walk)) != NULL) {
uu_avl_remove(cb.cb_avl, node);
diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h b/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h
index a029077..8c6b9fd 100644
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h
@@ -42,6 +42,7 @@ typedef struct zfs_sort_column {
#define ZFS_ITER_ARGS_CAN_BE_PATHS (1 << 1)
#define ZFS_ITER_PROP_LISTSNAPS (1 << 2)
#define ZFS_ITER_DEPTH_LIMIT (1 << 3)
+#define ZFS_ITER_RECVD_PROPS (1 << 4)
int zfs_for_each(int, char **, int options, zfs_type_t,
zfs_sort_column_t *, zprop_list_t **, int, zfs_iter_f, void *);
diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
index c5e9b64..8383dbc 100644
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
@@ -20,8 +20,8 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
*/
#include <assert.h>
@@ -41,23 +41,33 @@
#include <zone.h>
#include <grp.h>
#include <pwd.h>
+#include <signal.h>
+#include <sys/list.h>
#include <sys/mntent.h>
#include <sys/mnttab.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/fs/zfs.h>
+#include <sys/types.h>
+#include <time.h>
#include <libzfs.h>
+#include <zfs_prop.h>
+#include <zfs_deleg.h>
#include <libuutil.h>
+#ifdef sun
+#include <aclutils.h>
+#include <directory.h>
+#endif
#include "zfs_iter.h"
#include "zfs_util.h"
+#include "zfs_comutil.h"
libzfs_handle_t *g_zfs;
static FILE *mnttab_file;
static char history_str[HIS_MAX_RECORD_LEN];
-const char *pypath = "/usr/lib/zfs/pyzfs.py";
static int zfs_do_clone(int argc, char **argv);
static int zfs_do_create(int argc, char **argv);
@@ -78,7 +88,12 @@ static int zfs_do_send(int argc, char **argv);
static int zfs_do_receive(int argc, char **argv);
static int zfs_do_promote(int argc, char **argv);
static int zfs_do_userspace(int argc, char **argv);
-static int zfs_do_python(int argc, char **argv);
+static int zfs_do_allow(int argc, char **argv);
+static int zfs_do_unallow(int argc, char **argv);
+static int zfs_do_hold(int argc, char **argv);
+static int zfs_do_holds(int argc, char **argv);
+static int zfs_do_release(int argc, char **argv);
+static int zfs_do_diff(int argc, char **argv);
static int zfs_do_jail(int argc, char **argv);
static int zfs_do_unjail(int argc, char **argv);
@@ -124,7 +139,11 @@ typedef enum {
HELP_ALLOW,
HELP_UNALLOW,
HELP_USERSPACE,
- HELP_GROUPSPACE
+ HELP_GROUPSPACE,
+ HELP_HOLD,
+ HELP_HOLDS,
+ HELP_RELEASE,
+ HELP_DIFF
} zfs_help_t;
typedef struct zfs_command {
@@ -155,7 +174,7 @@ static zfs_command_t command_table[] = {
{ "list", zfs_do_list, HELP_LIST },
{ NULL },
{ "set", zfs_do_set, HELP_SET },
- { "get", zfs_do_get, HELP_GET },
+ { "get", zfs_do_get, HELP_GET },
{ "inherit", zfs_do_inherit, HELP_INHERIT },
{ "upgrade", zfs_do_upgrade, HELP_UPGRADE },
{ "userspace", zfs_do_userspace, HELP_USERSPACE },
@@ -169,9 +188,14 @@ static zfs_command_t command_table[] = {
{ "send", zfs_do_send, HELP_SEND },
{ "receive", zfs_do_receive, HELP_RECEIVE },
{ NULL },
- { "allow", zfs_do_python, HELP_ALLOW },
+ { "allow", zfs_do_allow, HELP_ALLOW },
+ { NULL },
+ { "unallow", zfs_do_unallow, HELP_UNALLOW },
{ NULL },
- { "unallow", zfs_do_python, HELP_UNALLOW },
+ { "hold", zfs_do_hold, HELP_HOLD },
+ { "holds", zfs_do_holds, HELP_HOLDS },
+ { "release", zfs_do_release, HELP_RELEASE },
+ { "diff", zfs_do_diff, HELP_DIFF },
{ NULL },
{ "jail", zfs_do_jail, HELP_JAIL },
{ "unjail", zfs_do_unjail, HELP_UNJAIL },
@@ -194,15 +218,15 @@ get_usage(zfs_help_t idx)
"\tcreate [-ps] [-b blocksize] [-o property=value] ... "
"-V <size> <volume>\n"));
case HELP_DESTROY:
- return (gettext("\tdestroy [-rRf] "
- "<filesystem|volume|snapshot>\n"));
+ return (gettext("\tdestroy [-rRf] <filesystem|volume>\n"
+ "\tdestroy [-rRd] <snapshot>\n"));
case HELP_GET:
return (gettext("\tget [-rHp] [-d max] "
- "[-o field[,...]] [-s source[,...]]\n"
+ "[-o \"all\" | field[,...]] [-s source[,...]]\n"
"\t <\"all\" | property[,...]> "
"[filesystem|volume|snapshot] ...\n"));
case HELP_INHERIT:
- return (gettext("\tinherit [-r] <property> "
+ return (gettext("\tinherit [-rS] <property> "
"<filesystem|volume|snapshot> ...\n"));
case HELP_UPGRADE:
return (gettext("\tupgrade [-v]\n"
@@ -222,9 +246,9 @@ get_usage(zfs_help_t idx)
case HELP_PROMOTE:
return (gettext("\tpromote <clone-filesystem>\n"));
case HELP_RECEIVE:
- return (gettext("\treceive [-vnF] <filesystem|volume|"
+ return (gettext("\treceive [-vnFu] <filesystem|volume|"
"snapshot>\n"
- "\treceive [-vnF] -d <filesystem>\n"));
+ "\treceive [-vnFu] [-d | -e] <filesystem>\n"));
case HELP_RENAME:
return (gettext("\trename <filesystem|volume|snapshot> "
"<filesystem|volume|snapshot>\n"
@@ -233,7 +257,7 @@ get_usage(zfs_help_t idx)
case HELP_ROLLBACK:
return (gettext("\trollback [-rRf] <snapshot>\n"));
case HELP_SEND:
- return (gettext("\tsend [-R] [-[iI] snapshot] <snapshot>\n"));
+ return (gettext("\tsend [-RDp] [-[iI] snapshot] <snapshot>\n"));
case HELP_SET:
return (gettext("\tset <property=value> "
"<filesystem|volume|snapshot> ...\n"));
@@ -246,10 +270,11 @@ get_usage(zfs_help_t idx)
return (gettext("\tunmount [-f] "
"<-a | filesystem|mountpoint>\n"));
case HELP_UNSHARE:
- return (gettext("\tunshare [-f] "
+ return (gettext("\tunshare "
"<-a | filesystem|mountpoint>\n"));
case HELP_ALLOW:
- return (gettext("\tallow [-ldug] "
+ return (gettext("\tallow <filesystem|volume>\n"
+ "\tallow [-ldug] "
"<\"everyone\"|user|group>[,...] <perm|@setname>[,...]\n"
"\t <filesystem|volume>\n"
"\tallow [-ld] -e <perm|@setname>[,...] "
@@ -275,28 +300,54 @@ get_usage(zfs_help_t idx)
return (gettext("\tgroupspace [-hniHpU] [-o field[,...]] "
"[-sS field] ... [-t type[,...]]\n"
"\t <filesystem|snapshot>\n"));
+ case HELP_HOLD:
+ return (gettext("\thold [-r] <tag> <snapshot> ...\n"));
+ case HELP_HOLDS:
+ return (gettext("\tholds [-r] <snapshot> ...\n"));
+ case HELP_RELEASE:
+ return (gettext("\trelease [-r] <tag> <snapshot> ...\n"));
+ case HELP_DIFF:
+ return (gettext("\tdiff [-FHt] <snapshot> "
+ "[snapshot|filesystem]\n"));
}
abort();
/* NOTREACHED */
}
+void
+nomem(void)
+{
+ (void) fprintf(stderr, gettext("internal error: out of memory\n"));
+ exit(1);
+}
+
/*
* Utility function to guarantee malloc() success.
*/
+
void *
safe_malloc(size_t size)
{
void *data;
- if ((data = calloc(1, size)) == NULL) {
- (void) fprintf(stderr, "internal error: out of memory\n");
- exit(1);
- }
+ if ((data = calloc(1, size)) == NULL)
+ nomem();
return (data);
}
+static char *
+safe_strdup(char *str)
+{
+ char *dupstr = strdup(str);
+
+ if (dupstr == NULL)
+ nomem();
+
+ return (dupstr);
+}
+
/*
* Callback routine that will print out information for each of
* the properties.
@@ -435,11 +486,8 @@ parseprop(nvlist_t *props)
"specified multiple times\n"), propname);
return (-1);
}
- if (nvlist_add_string(props, propname, propval) != 0) {
- (void) fprintf(stderr, gettext("internal "
- "error: out of memory\n"));
- return (-1);
- }
+ if (nvlist_add_string(props, propname, propval) != 0)
+ nomem();
return (0);
}
@@ -464,6 +512,59 @@ parse_depth(char *opt, int *flags)
return (depth);
}
+#define PROGRESS_DELAY 2 /* seconds */
+
+static char *pt_reverse = "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b";
+static time_t pt_begin;
+static char *pt_header = NULL;
+static boolean_t pt_shown;
+
+static void
+start_progress_timer(void)
+{
+ pt_begin = time(NULL) + PROGRESS_DELAY;
+ pt_shown = B_FALSE;
+}
+
+static void
+set_progress_header(char *header)
+{
+ assert(pt_header == NULL);
+ pt_header = safe_strdup(header);
+ if (pt_shown) {
+ (void) printf("%s: ", header);
+ (void) fflush(stdout);
+ }
+}
+
+static void
+update_progress(char *update)
+{
+ if (!pt_shown && time(NULL) > pt_begin) {
+ int len = strlen(update);
+
+ (void) printf("%s: %s%*.*s", pt_header, update, len, len,
+ pt_reverse);
+ (void) fflush(stdout);
+ pt_shown = B_TRUE;
+ } else if (pt_shown) {
+ int len = strlen(update);
+
+ (void) printf("%s%*.*s", update, len, len, pt_reverse);
+ (void) fflush(stdout);
+ }
+}
+
+static void
+finish_progress(char *done)
+{
+ if (pt_shown) {
+ (void) printf("%s\n", done);
+ (void) fflush(stdout);
+ }
+ free(pt_header);
+ pt_header = NULL;
+}
/*
* zfs clone [-p] [-o prop=value] ... <snap> <fs | vol>
*
@@ -483,11 +584,8 @@ zfs_do_clone(int argc, char **argv)
int ret;
int c;
- if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
- (void) fprintf(stderr, gettext("internal error: "
- "out of memory\n"));
- return (1);
- }
+ if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
+ nomem();
/* check options */
while ((c = getopt(argc, argv, "o:p")) != -1) {
@@ -552,8 +650,9 @@ zfs_do_clone(int argc, char **argv)
clone = zfs_open(g_zfs, argv[1], ZFS_TYPE_DATASET);
if (clone != NULL) {
- if ((ret = zfs_mount(clone, NULL, 0)) == 0)
- ret = zfs_share(clone);
+ if (zfs_get_type(clone) != ZFS_TYPE_VOLUME)
+ if ((ret = zfs_mount(clone, NULL, 0)) == 0)
+ ret = zfs_share(clone);
zfs_close(clone);
}
}
@@ -599,13 +698,10 @@ zfs_do_create(int argc, char **argv)
int ret = 1;
nvlist_t *props;
uint64_t intval;
- int canmount;
+ int canmount = ZFS_CANMOUNT_OFF;
- if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
- (void) fprintf(stderr, gettext("internal error: "
- "out of memory\n"));
- return (1);
- }
+ if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
+ nomem();
/* check options */
while ((c = getopt(argc, argv, ":V:b:so:p")) != -1) {
@@ -620,12 +716,8 @@ zfs_do_create(int argc, char **argv)
}
if (nvlist_add_uint64(props,
- zfs_prop_to_name(ZFS_PROP_VOLSIZE),
- intval) != 0) {
- (void) fprintf(stderr, gettext("internal "
- "error: out of memory\n"));
- goto error;
- }
+ zfs_prop_to_name(ZFS_PROP_VOLSIZE), intval) != 0)
+ nomem();
volsize = intval;
break;
case 'p':
@@ -642,11 +734,8 @@ zfs_do_create(int argc, char **argv)
if (nvlist_add_uint64(props,
zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
- intval) != 0) {
- (void) fprintf(stderr, gettext("internal "
- "error: out of memory\n"));
- goto error;
- }
+ intval) != 0)
+ nomem();
break;
case 'o':
if (parseprop(props))
@@ -708,15 +797,14 @@ zfs_do_create(int argc, char **argv)
resv_prop = ZFS_PROP_REFRESERVATION;
else
resv_prop = ZFS_PROP_RESERVATION;
+ volsize = zvol_volsize_to_reservation(volsize, props);
if (nvlist_lookup_string(props, zfs_prop_to_name(resv_prop),
&strval) != 0) {
if (nvlist_add_uint64(props,
zfs_prop_to_name(resv_prop), volsize) != 0) {
- (void) fprintf(stderr, gettext("internal "
- "error: out of memory\n"));
nvlist_free(props);
- return (1);
+ nomem();
}
}
}
@@ -741,19 +829,20 @@ zfs_do_create(int argc, char **argv)
if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL)
goto error;
+
+ ret = 0;
/*
* if the user doesn't want the dataset automatically mounted,
* then skip the mount/share step
*/
-
- canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT);
+ if (zfs_prop_valid_for_type(ZFS_PROP_CANMOUNT, type))
+ canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT);
/*
* Mount and/or share the new filesystem as appropriate. We provide a
* verbose error message to let the user know that their filesystem was
* in fact created, even if we failed to mount or share it.
*/
- ret = 0;
if (canmount == ZFS_CANMOUNT_ON) {
if (zfs_mount(zhp, NULL, 0) != 0) {
(void) fprintf(stderr, gettext("filesystem "
@@ -778,11 +867,13 @@ badusage:
}
/*
- * zfs destroy [-rf] <fs, snap, vol>
+ * zfs destroy [-rRf] <fs, vol>
+ * zfs destroy [-rRd] <snap>
*
- * -r Recursively destroy all children
- * -R Recursively destroy all dependents, including clones
- * -f Force unmounting of any dependents
+ * -r Recursively destroy all children
+ * -R Recursively destroy all dependents, including clones
+ * -f Force unmounting of any dependents
+ * -d If we can't destroy now, mark for deferred destruction
*
* Destroys the given dataset. By default, it will unmount any filesystems,
* and refuse to destroy a dataset that has any dependents. A dependent can
@@ -798,6 +889,7 @@ typedef struct destroy_cbdata {
boolean_t cb_closezhp;
zfs_handle_t *cb_target;
char *cb_snapname;
+ boolean_t cb_defer_destroy;
} destroy_cbdata_t;
/*
@@ -866,7 +958,7 @@ destroy_callback(zfs_handle_t *zhp, void *data)
/*
* Ignore pools (which we've already flagged as an error before getting
- * here.
+ * here).
*/
if (strchr(zfs_get_name(zhp), '/') == NULL &&
zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
@@ -878,7 +970,7 @@ destroy_callback(zfs_handle_t *zhp, void *data)
* Bail out on the first error.
*/
if (zfs_unmount(zhp, NULL, cbp->cb_force ? MS_FORCE : 0) != 0 ||
- zfs_destroy(zhp) != 0) {
+ zfs_destroy(zhp, cbp->cb_defer_destroy) != 0) {
zfs_close(zhp);
return (-1);
}
@@ -930,10 +1022,15 @@ zfs_do_destroy(int argc, char **argv)
int c;
zfs_handle_t *zhp;
char *cp;
+ zfs_type_t type = ZFS_TYPE_DATASET;
/* check options */
- while ((c = getopt(argc, argv, "frR")) != -1) {
+ while ((c = getopt(argc, argv, "dfrR")) != -1) {
switch (c) {
+ case 'd':
+ cb.cb_defer_destroy = B_TRUE;
+ type = ZFS_TYPE_SNAPSHOT;
+ break;
case 'f':
cb.cb_force = 1;
break;
@@ -979,14 +1076,22 @@ zfs_do_destroy(int argc, char **argv)
cp++;
if (cb.cb_doclones) {
+ boolean_t defer = cb.cb_defer_destroy;
+
+ /*
+ * Temporarily ignore the defer_destroy setting since
+ * it's not supported for clones.
+ */
+ cb.cb_defer_destroy = B_FALSE;
cb.cb_snapname = cp;
if (destroy_snap_clones(zhp, &cb) != 0) {
zfs_close(zhp);
return (1);
}
+ cb.cb_defer_destroy = defer;
}
- ret = zfs_destroy_snaps(zhp, cp);
+ ret = zfs_destroy_snaps(zhp, cp, cb.cb_defer_destroy);
zfs_close(zhp);
if (ret) {
(void) fprintf(stderr,
@@ -995,9 +1100,8 @@ zfs_do_destroy(int argc, char **argv)
return (ret != 0);
}
-
/* Open the given dataset */
- if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL)
+ if ((zhp = zfs_open(g_zfs, argv[0], type)) == NULL)
return (1);
cb.cb_target = zhp;
@@ -1023,15 +1127,15 @@ zfs_do_destroy(int argc, char **argv)
* Check for any dependents and/or clones.
*/
cb.cb_first = B_TRUE;
- if (!cb.cb_doclones &&
+ if (!cb.cb_doclones && !cb.cb_defer_destroy &&
zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent,
&cb) != 0) {
zfs_close(zhp);
return (1);
}
- if (cb.cb_error ||
- zfs_iter_dependents(zhp, B_FALSE, destroy_callback, &cb) != 0) {
+ if (cb.cb_error || (!cb.cb_defer_destroy &&
+ (zfs_iter_dependents(zhp, B_FALSE, destroy_callback, &cb) != 0))) {
zfs_close(zhp);
return (1);
}
@@ -1044,22 +1148,35 @@ zfs_do_destroy(int argc, char **argv)
if (destroy_callback(zhp, &cb) != 0)
return (1);
-
return (0);
}
+static boolean_t
+is_recvd_column(zprop_get_cbdata_t *cbp)
+{
+ int i;
+ zfs_get_column_t col;
+
+ for (i = 0; i < ZFS_GET_NCOLS &&
+ (col = cbp->cb_columns[i]) != GET_COL_NONE; i++)
+ if (col == GET_COL_RECVD)
+ return (B_TRUE);
+ return (B_FALSE);
+}
+
/*
- * zfs get [-rHp] [-o field[,field]...] [-s source[,source]...]
- * < all | property[,property]... > < fs | snap | vol > ...
+ * zfs get [-rHp] [-o all | field[,field]...] [-s source[,source]...]
+ * < all | property[,property]... > < fs | snap | vol > ...
*
* -r recurse over any child datasets
* -H scripted mode. Headers are stripped, and fields are separated
* by tabs instead of spaces.
- * -o Set of fields to display. One of "name,property,value,source".
- * Default is all four.
+ * -o Set of fields to display. One of "name,property,value,
+ * received,source". Default is "name,property,value,source".
+ * "all" is an alias for all five.
* -s Set of sources to allow. One of
- * "local,default,inherited,temporary,none". Default is all
- * five.
+ * "local,default,inherited,received,temporary,none". Default is
+ * all six.
* -p Display values in parsable (literal) format.
*
* Prints properties for the given datasets. The user can control which
@@ -1073,16 +1190,19 @@ static int
get_callback(zfs_handle_t *zhp, void *data)
{
char buf[ZFS_MAXPROPLEN];
+ char rbuf[ZFS_MAXPROPLEN];
zprop_source_t sourcetype;
char source[ZFS_MAXNAMELEN];
zprop_get_cbdata_t *cbp = data;
- nvlist_t *userprop = zfs_get_user_props(zhp);
+ nvlist_t *user_props = zfs_get_user_props(zhp);
zprop_list_t *pl = cbp->cb_proplist;
nvlist_t *propval;
char *strval;
char *sourceval;
+ boolean_t received = is_recvd_column(cbp);
for (; pl != NULL; pl = pl->pl_next) {
+ char *recvdval = NULL;
/*
* Skip the special fake placeholder. This will also skip over
* the name property when 'all' is specified.
@@ -1109,9 +1229,14 @@ get_callback(zfs_handle_t *zhp, void *data)
(void) strlcpy(buf, "-", sizeof (buf));
}
+ if (received && (zfs_prop_get_recvd(zhp,
+ zfs_prop_to_name(pl->pl_prop), rbuf, sizeof (rbuf),
+ cbp->cb_literal) == 0))
+ recvdval = rbuf;
+
zprop_print_one_property(zfs_get_name(zhp), cbp,
zfs_prop_to_name(pl->pl_prop),
- buf, sourcetype, source);
+ buf, sourcetype, source, recvdval);
} else if (zfs_prop_userquota(pl->pl_user_prop)) {
sourcetype = ZPROP_SRC_LOCAL;
@@ -1122,9 +1247,9 @@ get_callback(zfs_handle_t *zhp, void *data)
}
zprop_print_one_property(zfs_get_name(zhp), cbp,
- pl->pl_user_prop, buf, sourcetype, source);
+ pl->pl_user_prop, buf, sourcetype, source, NULL);
} else {
- if (nvlist_lookup_nvlist(userprop,
+ if (nvlist_lookup_nvlist(user_props,
pl->pl_user_prop, &propval) != 0) {
if (pl->pl_all)
continue;
@@ -1139,6 +1264,9 @@ get_callback(zfs_handle_t *zhp, void *data)
if (strcmp(sourceval,
zfs_get_name(zhp)) == 0) {
sourcetype = ZPROP_SRC_LOCAL;
+ } else if (strcmp(sourceval,
+ ZPROP_SOURCE_VAL_RECVD) == 0) {
+ sourcetype = ZPROP_SRC_RECEIVED;
} else {
sourcetype = ZPROP_SRC_INHERITED;
(void) strlcpy(source,
@@ -1146,9 +1274,14 @@ get_callback(zfs_handle_t *zhp, void *data)
}
}
+ if (received && (zfs_prop_get_recvd(zhp,
+ pl->pl_user_prop, rbuf, sizeof (rbuf),
+ cbp->cb_literal) == 0))
+ recvdval = rbuf;
+
zprop_print_one_property(zfs_get_name(zhp), cbp,
pl->pl_user_prop, strval, sourcetype,
- source);
+ source, recvdval);
}
}
@@ -1204,10 +1337,10 @@ zfs_do_get(int argc, char **argv)
i = 0;
while (*optarg != '\0') {
static char *col_subopts[] =
- { "name", "property", "value", "source",
- NULL };
+ { "name", "property", "value", "received",
+ "source", "all", NULL };
- if (i == 4) {
+ if (i == ZFS_GET_NCOLS) {
(void) fprintf(stderr, gettext("too "
"many fields given to -o "
"option\n"));
@@ -1226,8 +1359,28 @@ zfs_do_get(int argc, char **argv)
cb.cb_columns[i++] = GET_COL_VALUE;
break;
case 3:
+ cb.cb_columns[i++] = GET_COL_RECVD;
+ flags |= ZFS_ITER_RECVD_PROPS;
+ break;
+ case 4:
cb.cb_columns[i++] = GET_COL_SOURCE;
break;
+ case 5:
+ if (i > 0) {
+ (void) fprintf(stderr,
+ gettext("\"all\" conflicts "
+ "with specific fields "
+ "given to -o option\n"));
+ usage(B_FALSE);
+ }
+ cb.cb_columns[0] = GET_COL_NAME;
+ cb.cb_columns[1] = GET_COL_PROPERTY;
+ cb.cb_columns[2] = GET_COL_VALUE;
+ cb.cb_columns[3] = GET_COL_RECVD;
+ cb.cb_columns[4] = GET_COL_SOURCE;
+ flags |= ZFS_ITER_RECVD_PROPS;
+ i = ZFS_GET_NCOLS;
+ break;
default:
(void) fprintf(stderr,
gettext("invalid column name "
@@ -1242,7 +1395,8 @@ zfs_do_get(int argc, char **argv)
while (*optarg != '\0') {
static char *source_subopts[] = {
"local", "default", "inherited",
- "temporary", "none", NULL };
+ "received", "temporary", "none",
+ NULL };
switch (getsubopt(&optarg, source_subopts,
&value)) {
@@ -1256,9 +1410,12 @@ zfs_do_get(int argc, char **argv)
cb.cb_sources |= ZPROP_SRC_INHERITED;
break;
case 3:
- cb.cb_sources |= ZPROP_SRC_TEMPORARY;
+ cb.cb_sources |= ZPROP_SRC_RECEIVED;
break;
case 4:
+ cb.cb_sources |= ZPROP_SRC_TEMPORARY;
+ break;
+ case 5:
cb.cb_sources |= ZPROP_SRC_NONE;
break;
default:
@@ -1325,9 +1482,10 @@ zfs_do_get(int argc, char **argv)
}
/*
- * inherit [-r] <property> <fs|vol> ...
+ * inherit [-rS] <property> <fs|vol> ...
*
- * -r Recurse over all children
+ * -r Recurse over all children
+ * -S Revert to received value, if any
*
* For each dataset specified on the command line, inherit the given property
* from its parent. Inheriting a property at the pool level will cause it to
@@ -1336,11 +1494,16 @@ zfs_do_get(int argc, char **argv)
* local modifications for each dataset.
*/
+typedef struct inherit_cbdata {
+ const char *cb_propname;
+ boolean_t cb_received;
+} inherit_cbdata_t;
+
static int
inherit_recurse_cb(zfs_handle_t *zhp, void *data)
{
- char *propname = data;
- zfs_prop_t prop = zfs_name_to_prop(propname);
+ inherit_cbdata_t *cb = data;
+ zfs_prop_t prop = zfs_name_to_prop(cb->cb_propname);
/*
* If we're doing it recursively, then ignore properties that
@@ -1350,15 +1513,15 @@ inherit_recurse_cb(zfs_handle_t *zhp, void *data)
!zfs_prop_valid_for_type(prop, zfs_get_type(zhp)))
return (0);
- return (zfs_prop_inherit(zhp, propname) != 0);
+ return (zfs_prop_inherit(zhp, cb->cb_propname, cb->cb_received) != 0);
}
static int
inherit_cb(zfs_handle_t *zhp, void *data)
{
- char *propname = data;
+ inherit_cbdata_t *cb = data;
- return (zfs_prop_inherit(zhp, propname) != 0);
+ return (zfs_prop_inherit(zhp, cb->cb_propname, cb->cb_received) != 0);
}
static int
@@ -1366,16 +1529,21 @@ zfs_do_inherit(int argc, char **argv)
{
int c;
zfs_prop_t prop;
+ inherit_cbdata_t cb = { 0 };
char *propname;
int ret;
int flags = 0;
+ boolean_t received = B_FALSE;
/* check options */
- while ((c = getopt(argc, argv, "r")) != -1) {
+ while ((c = getopt(argc, argv, "rS")) != -1) {
switch (c) {
case 'r':
flags |= ZFS_ITER_RECURSE;
break;
+ case 'S':
+ received = B_TRUE;
+ break;
case '?':
default:
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
@@ -1408,7 +1576,7 @@ zfs_do_inherit(int argc, char **argv)
propname);
return (1);
}
- if (!zfs_prop_inheritable(prop)) {
+ if (!zfs_prop_inheritable(prop) && !received) {
(void) fprintf(stderr, gettext("'%s' property cannot "
"be inherited\n"), propname);
if (prop == ZFS_PROP_QUOTA ||
@@ -1419,18 +1587,27 @@ zfs_do_inherit(int argc, char **argv)
"%s=none' to clear\n"), propname);
return (1);
}
+ if (received && (prop == ZFS_PROP_VOLSIZE ||
+ prop == ZFS_PROP_VERSION)) {
+ (void) fprintf(stderr, gettext("'%s' property cannot "
+ "be reverted to a received value\n"), propname);
+ return (1);
+ }
} else if (!zfs_prop_user(propname)) {
(void) fprintf(stderr, gettext("invalid property '%s'\n"),
propname);
usage(B_FALSE);
}
+ cb.cb_propname = propname;
+ cb.cb_received = received;
+
if (flags & ZFS_ITER_RECURSE) {
ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET,
- NULL, NULL, 0, inherit_recurse_cb, propname);
+ NULL, NULL, 0, inherit_recurse_cb, &cb);
} else {
ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET,
- NULL, NULL, 0, inherit_cb, propname);
+ NULL, NULL, 0, inherit_cb, &cb);
}
return (ret);
@@ -1499,31 +1676,25 @@ upgrade_set_callback(zfs_handle_t *zhp, void *data)
{
upgrade_cbdata_t *cb = data;
int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
- int i;
- static struct { int zplver; int spaver; } table[] = {
- {ZPL_VERSION_FUID, SPA_VERSION_FUID},
- {ZPL_VERSION_USERSPACE, SPA_VERSION_USERSPACE},
- {0, 0}
- };
+ int needed_spa_version;
+ int spa_version;
+ if (zfs_spa_version(zhp, &spa_version) < 0)
+ return (-1);
- for (i = 0; table[i].zplver; i++) {
- if (cb->cb_version >= table[i].zplver) {
- int spa_version;
+ needed_spa_version = zfs_spa_version_map(cb->cb_version);
- if (zfs_spa_version(zhp, &spa_version) < 0)
- return (-1);
+ if (needed_spa_version < 0)
+ return (-1);
- if (spa_version < table[i].spaver) {
- /* can't upgrade */
- (void) printf(gettext("%s: can not be "
- "upgraded; the pool version needs to first "
- "be upgraded\nto version %d\n\n"),
- zfs_get_name(zhp), table[i].spaver);
- cb->cb_numfailed++;
- return (0);
- }
- }
+ if (spa_version < needed_spa_version) {
+ /* can't upgrade */
+ (void) printf(gettext("%s: can not be "
+ "upgraded; the pool version needs to first "
+ "be upgraded\nto version %d\n\n"),
+ zfs_get_name(zhp), needed_spa_version);
+ cb->cb_numfailed++;
+ return (0);
}
/* upgrade */
@@ -1622,14 +1793,13 @@ zfs_do_upgrade(int argc, char **argv)
(void) printf(gettext(" 1 Initial ZFS filesystem version\n"));
(void) printf(gettext(" 2 Enhanced directory entries\n"));
(void) printf(gettext(" 3 Case insensitive and File system "
- "unique identifer (FUID)\n"));
+ "unique identifier (FUID)\n"));
(void) printf(gettext(" 4 userquota, groupquota "
"properties\n"));
+ (void) printf(gettext(" 5 System attributes\n"));
(void) printf(gettext("\nFor more information on a particular "
- "version, including supported releases, see:\n\n"));
- (void) printf("http://www.opensolaris.org/os/community/zfs/"
- "version/zpl/N\n\n");
- (void) printf(gettext("Where 'N' is the version number.\n"));
+ "version, including supported releases,\n"));
+ (void) printf("see the ZFS Administration Guide.\n\n");
ret = 0;
} else if (argc || all) {
/* Upgrade filesystems */
@@ -1672,82 +1842,730 @@ zfs_do_upgrade(int argc, char **argv)
return (ret);
}
+#define USTYPE_USR_BIT (0)
+#define USTYPE_GRP_BIT (1)
+#define USTYPE_PSX_BIT (2)
+#define USTYPE_SMB_BIT (3)
+
+#define USTYPE_USR (1 << USTYPE_USR_BIT)
+#define USTYPE_GRP (1 << USTYPE_GRP_BIT)
+
+#define USTYPE_PSX (1 << USTYPE_PSX_BIT)
+#define USTYPE_SMB (1 << USTYPE_SMB_BIT)
+
+#define USTYPE_PSX_USR (USTYPE_PSX | USTYPE_USR)
+#define USTYPE_SMB_USR (USTYPE_SMB | USTYPE_USR)
+#define USTYPE_PSX_GRP (USTYPE_PSX | USTYPE_GRP)
+#define USTYPE_SMB_GRP (USTYPE_SMB | USTYPE_GRP)
+#define USTYPE_ALL (USTYPE_PSX_USR | USTYPE_SMB_USR \
+ | USTYPE_PSX_GRP | USTYPE_SMB_GRP)
+
+
+#define USPROP_USED_BIT (0)
+#define USPROP_QUOTA_BIT (1)
+
+#define USPROP_USED (1 << USPROP_USED_BIT)
+#define USPROP_QUOTA (1 << USPROP_QUOTA_BIT)
+
+typedef struct us_node {
+ nvlist_t *usn_nvl;
+ uu_avl_node_t usn_avlnode;
+ uu_list_node_t usn_listnode;
+} us_node_t;
+
+typedef struct us_cbdata {
+ nvlist_t **cb_nvlp;
+ uu_avl_pool_t *cb_avl_pool;
+ uu_avl_t *cb_avl;
+ boolean_t cb_numname;
+ boolean_t cb_nicenum;
+ boolean_t cb_sid2posix;
+ zfs_userquota_prop_t cb_prop;
+ zfs_sort_column_t *cb_sortcol;
+ size_t cb_max_typelen;
+ size_t cb_max_namelen;
+ size_t cb_max_usedlen;
+ size_t cb_max_quotalen;
+} us_cbdata_t;
+
+typedef struct {
+ zfs_sort_column_t *si_sortcol;
+ boolean_t si_num_name;
+ boolean_t si_parsable;
+} us_sort_info_t;
+
+static int
+us_compare(const void *larg, const void *rarg, void *unused)
+{
+ const us_node_t *l = larg;
+ const us_node_t *r = rarg;
+ int rc = 0;
+ us_sort_info_t *si = (us_sort_info_t *)unused;
+ zfs_sort_column_t *sortcol = si->si_sortcol;
+ boolean_t num_name = si->si_num_name;
+ nvlist_t *lnvl = l->usn_nvl;
+ nvlist_t *rnvl = r->usn_nvl;
+
+ for (; sortcol != NULL; sortcol = sortcol->sc_next) {
+ char *lvstr = "";
+ char *rvstr = "";
+ uint32_t lv32 = 0;
+ uint32_t rv32 = 0;
+ uint64_t lv64 = 0;
+ uint64_t rv64 = 0;
+ zfs_prop_t prop = sortcol->sc_prop;
+ const char *propname = NULL;
+ boolean_t reverse = sortcol->sc_reverse;
+
+ switch (prop) {
+ case ZFS_PROP_TYPE:
+ propname = "type";
+ (void) nvlist_lookup_uint32(lnvl, propname, &lv32);
+ (void) nvlist_lookup_uint32(rnvl, propname, &rv32);
+ if (rv32 != lv32)
+ rc = (rv32 > lv32) ? 1 : -1;
+ break;
+ case ZFS_PROP_NAME:
+ propname = "name";
+ if (num_name) {
+ (void) nvlist_lookup_uint32(lnvl, propname,
+ &lv32);
+ (void) nvlist_lookup_uint32(rnvl, propname,
+ &rv32);
+ if (rv32 != lv32)
+ rc = (rv32 > lv32) ? 1 : -1;
+ } else {
+ (void) nvlist_lookup_string(lnvl, propname,
+ &lvstr);
+ (void) nvlist_lookup_string(rnvl, propname,
+ &rvstr);
+ rc = strcmp(lvstr, rvstr);
+ }
+ break;
+
+ case ZFS_PROP_USED:
+ case ZFS_PROP_QUOTA:
+ if (ZFS_PROP_USED == prop)
+ propname = "used";
+ else
+ propname = "quota";
+ (void) nvlist_lookup_uint64(lnvl, propname, &lv64);
+ (void) nvlist_lookup_uint64(rnvl, propname, &rv64);
+ if (rv64 != lv64)
+ rc = (rv64 > lv64) ? 1 : -1;
+ }
+
+ if (rc)
+ if (rc < 0)
+ return (reverse ? 1 : -1);
+ else
+ return (reverse ? -1 : 1);
+ }
+
+ return (rc);
+}
+
+static inline const char *
+us_type2str(unsigned field_type)
+{
+ switch (field_type) {
+ case USTYPE_PSX_USR:
+ return ("POSIX User");
+ case USTYPE_PSX_GRP:
+ return ("POSIX Group");
+ case USTYPE_SMB_USR:
+ return ("SMB User");
+ case USTYPE_SMB_GRP:
+ return ("SMB Group");
+ default:
+ return ("Undefined");
+ }
+}
+
/*
* zfs userspace
*/
static int
userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space)
{
- zfs_userquota_prop_t *typep = arg;
- zfs_userquota_prop_t p = *typep;
+ us_cbdata_t *cb = (us_cbdata_t *)arg;
+ zfs_userquota_prop_t prop = cb->cb_prop;
char *name = NULL;
- char *ug, *propname;
+ char *propname;
char namebuf[32];
char sizebuf[32];
+ us_node_t *node;
+ uu_avl_pool_t *avl_pool = cb->cb_avl_pool;
+ uu_avl_t *avl = cb->cb_avl;
+ uu_avl_index_t idx;
+ nvlist_t *props;
+ us_node_t *n;
+ zfs_sort_column_t *sortcol = cb->cb_sortcol;
+ unsigned type;
+ const char *typestr;
+ size_t namelen;
+ size_t typelen;
+ size_t sizelen;
+ us_sort_info_t sortinfo = { sortcol, cb->cb_numname };
if (domain == NULL || domain[0] == '\0') {
- if (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA) {
+ /* POSIX */
+ if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) {
+ type = USTYPE_PSX_GRP;
struct group *g = getgrgid(rid);
if (g)
name = g->gr_name;
} else {
+ type = USTYPE_PSX_USR;
struct passwd *p = getpwuid(rid);
if (p)
name = p->pw_name;
}
+ } else {
+ char sid[ZFS_MAXNAMELEN+32];
+ uid_t id;
+ uint64_t classes;
+#ifdef sun
+ int err;
+ directory_error_t e;
+#endif
+
+ (void) snprintf(sid, sizeof (sid), "%s-%u", domain, rid);
+ /* SMB */
+ if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) {
+ type = USTYPE_SMB_GRP;
+#ifdef sun
+ err = sid_to_id(sid, B_FALSE, &id);
+#endif
+ } else {
+ type = USTYPE_SMB_USR;
+#ifdef sun
+ err = sid_to_id(sid, B_TRUE, &id);
+#endif
+ }
+
+#ifdef sun
+ if (err == 0) {
+ rid = id;
+
+ e = directory_name_from_sid(NULL, sid, &name, &classes);
+ if (e != NULL) {
+ directory_error_free(e);
+ return (NULL);
+ }
+
+ if (name == NULL)
+ name = sid;
+ }
+#endif
}
- if (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA)
- ug = "group";
- else
- ug = "user";
+/*
+ * if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA)
+ * ug = "group";
+ * else
+ * ug = "user";
+ */
- if (p == ZFS_PROP_USERUSED || p == ZFS_PROP_GROUPUSED)
+ if (prop == ZFS_PROP_USERUSED || prop == ZFS_PROP_GROUPUSED)
propname = "used";
else
propname = "quota";
- if (name == NULL) {
- (void) snprintf(namebuf, sizeof (namebuf),
- "%llu", (longlong_t)rid);
+ (void) snprintf(namebuf, sizeof (namebuf), "%u", rid);
+ if (name == NULL)
name = namebuf;
+
+ if (cb->cb_nicenum)
+ zfs_nicenum(space, sizebuf, sizeof (sizebuf));
+ else
+ (void) sprintf(sizebuf, "%llu", space);
+
+ node = safe_malloc(sizeof (us_node_t));
+ uu_avl_node_init(node, &node->usn_avlnode, avl_pool);
+
+ if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
+ free(node);
+ return (-1);
}
- zfs_nicenum(space, sizebuf, sizeof (sizebuf));
- (void) printf("%s %s %s%c%s %s\n", propname, ug, domain,
- domain[0] ? '-' : ' ', name, sizebuf);
+ if (nvlist_add_uint32(props, "type", type) != 0)
+ nomem();
+
+ if (cb->cb_numname) {
+ if (nvlist_add_uint32(props, "name", rid) != 0)
+ nomem();
+ namelen = strlen(namebuf);
+ } else {
+ if (nvlist_add_string(props, "name", name) != 0)
+ nomem();
+ namelen = strlen(name);
+ }
+
+ typestr = us_type2str(type);
+ typelen = strlen(gettext(typestr));
+ if (typelen > cb->cb_max_typelen)
+ cb->cb_max_typelen = typelen;
+
+ if (namelen > cb->cb_max_namelen)
+ cb->cb_max_namelen = namelen;
+
+ sizelen = strlen(sizebuf);
+ if (0 == strcmp(propname, "used")) {
+ if (sizelen > cb->cb_max_usedlen)
+ cb->cb_max_usedlen = sizelen;
+ } else {
+ if (sizelen > cb->cb_max_quotalen)
+ cb->cb_max_quotalen = sizelen;
+ }
+
+ node->usn_nvl = props;
+
+ n = uu_avl_find(avl, node, &sortinfo, &idx);
+ if (n == NULL)
+ uu_avl_insert(avl, node, idx);
+ else {
+ nvlist_free(props);
+ free(node);
+ node = n;
+ props = node->usn_nvl;
+ }
+
+ if (nvlist_add_uint64(props, propname, space) != 0)
+ nomem();
return (0);
}
+static inline boolean_t
+usprop_check(zfs_userquota_prop_t p, unsigned types, unsigned props)
+{
+ unsigned type;
+ unsigned prop;
+
+ switch (p) {
+ case ZFS_PROP_USERUSED:
+ type = USTYPE_USR;
+ prop = USPROP_USED;
+ break;
+ case ZFS_PROP_USERQUOTA:
+ type = USTYPE_USR;
+ prop = USPROP_QUOTA;
+ break;
+ case ZFS_PROP_GROUPUSED:
+ type = USTYPE_GRP;
+ prop = USPROP_USED;
+ break;
+ case ZFS_PROP_GROUPQUOTA:
+ type = USTYPE_GRP;
+ prop = USPROP_QUOTA;
+ break;
+ default: /* ALL */
+ return (B_TRUE);
+ };
+
+ return (type & types && prop & props);
+}
+
+#define USFIELD_TYPE (1 << 0)
+#define USFIELD_NAME (1 << 1)
+#define USFIELD_USED (1 << 2)
+#define USFIELD_QUOTA (1 << 3)
+#define USFIELD_ALL (USFIELD_TYPE | USFIELD_NAME | USFIELD_USED | USFIELD_QUOTA)
+
+static int
+parsefields(unsigned *fieldsp, char **names, unsigned *bits, size_t len)
+{
+ char *field = optarg;
+ char *delim;
+
+ do {
+ int i;
+ boolean_t found = B_FALSE;
+ delim = strchr(field, ',');
+ if (delim != NULL)
+ *delim = '\0';
+
+ for (i = 0; i < len; i++)
+ if (0 == strcmp(field, names[i])) {
+ found = B_TRUE;
+ *fieldsp |= bits[i];
+ break;
+ }
+
+ if (!found) {
+ (void) fprintf(stderr, gettext("invalid type '%s'"
+ "for -t option\n"), field);
+ return (-1);
+ }
+
+ field = delim + 1;
+ } while (delim);
+
+ return (0);
+}
+
+
+static char *type_names[] = { "posixuser", "smbuser", "posixgroup", "smbgroup",
+ "all" };
+static unsigned type_bits[] = {
+ USTYPE_PSX_USR,
+ USTYPE_SMB_USR,
+ USTYPE_PSX_GRP,
+ USTYPE_SMB_GRP,
+ USTYPE_ALL
+};
+
+static char *us_field_names[] = { "type", "name", "used", "quota" };
+static unsigned us_field_bits[] = {
+ USFIELD_TYPE,
+ USFIELD_NAME,
+ USFIELD_USED,
+ USFIELD_QUOTA
+};
+
+static void
+print_us_node(boolean_t scripted, boolean_t parseable, unsigned fields,
+ size_t type_width, size_t name_width, size_t used_width,
+ size_t quota_width, us_node_t *node)
+{
+ nvlist_t *nvl = node->usn_nvl;
+ nvpair_t *nvp = NULL;
+ char valstr[ZFS_MAXNAMELEN];
+ boolean_t first = B_TRUE;
+ boolean_t quota_found = B_FALSE;
+
+ if (fields & USFIELD_QUOTA && !nvlist_exists(nvl, "quota"))
+ if (nvlist_add_string(nvl, "quota", "none") != 0)
+ nomem();
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ char *pname = nvpair_name(nvp);
+ data_type_t type = nvpair_type(nvp);
+ uint32_t val32 = 0;
+ uint64_t val64 = 0;
+ char *strval = NULL;
+ unsigned field = 0;
+ unsigned width = 0;
+ int i;
+ for (i = 0; i < 4; i++) {
+ if (0 == strcmp(pname, us_field_names[i])) {
+ field = us_field_bits[i];
+ break;
+ }
+ }
+
+ if (!(field & fields))
+ continue;
+
+ switch (type) {
+ case DATA_TYPE_UINT32:
+ (void) nvpair_value_uint32(nvp, &val32);
+ break;
+ case DATA_TYPE_UINT64:
+ (void) nvpair_value_uint64(nvp, &val64);
+ break;
+ case DATA_TYPE_STRING:
+ (void) nvpair_value_string(nvp, &strval);
+ break;
+ default:
+ (void) fprintf(stderr, "Invalid data type\n");
+ }
+
+ if (!first)
+ if (scripted)
+ (void) printf("\t");
+ else
+ (void) printf(" ");
+
+ switch (field) {
+ case USFIELD_TYPE:
+ strval = (char *)us_type2str(val32);
+ width = type_width;
+ break;
+ case USFIELD_NAME:
+ if (type == DATA_TYPE_UINT64) {
+ (void) sprintf(valstr, "%llu", val64);
+ strval = valstr;
+ }
+ width = name_width;
+ break;
+ case USFIELD_USED:
+ case USFIELD_QUOTA:
+ if (type == DATA_TYPE_UINT64) {
+ (void) nvpair_value_uint64(nvp, &val64);
+ if (parseable)
+ (void) sprintf(valstr, "%llu", val64);
+ else
+ zfs_nicenum(val64, valstr,
+ sizeof (valstr));
+ strval = valstr;
+ }
+
+ if (field == USFIELD_USED)
+ width = used_width;
+ else {
+ quota_found = B_FALSE;
+ width = quota_width;
+ }
+
+ break;
+ }
+
+ if (field == USFIELD_QUOTA && !quota_found)
+ (void) printf("%*s", width, strval);
+ else {
+ if (type == DATA_TYPE_STRING)
+ (void) printf("%-*s", width, strval);
+ else
+ (void) printf("%*s", width, strval);
+ }
+
+ first = B_FALSE;
+
+ }
+
+ (void) printf("\n");
+}
+
+static void
+print_us(boolean_t scripted, boolean_t parsable, unsigned fields,
+ unsigned type_width, unsigned name_width, unsigned used_width,
+ unsigned quota_width, boolean_t rmnode, uu_avl_t *avl)
+{
+ static char *us_field_hdr[] = { "TYPE", "NAME", "USED", "QUOTA" };
+ us_node_t *node;
+ const char *col;
+ int i;
+ size_t width[4] = { type_width, name_width, used_width, quota_width };
+
+ if (!scripted) {
+ boolean_t first = B_TRUE;
+ for (i = 0; i < 4; i++) {
+ unsigned field = us_field_bits[i];
+ if (!(field & fields))
+ continue;
+
+ col = gettext(us_field_hdr[i]);
+ if (field == USFIELD_TYPE || field == USFIELD_NAME)
+ (void) printf(first?"%-*s":" %-*s", width[i],
+ col);
+ else
+ (void) printf(first?"%*s":" %*s", width[i],
+ col);
+ first = B_FALSE;
+ }
+ (void) printf("\n");
+ }
+
+ for (node = uu_avl_first(avl); node != NULL;
+ node = uu_avl_next(avl, node)) {
+ print_us_node(scripted, parsable, fields, type_width,
+ name_width, used_width, used_width, node);
+ if (rmnode)
+ nvlist_free(node->usn_nvl);
+ }
+}
+
static int
zfs_do_userspace(int argc, char **argv)
{
zfs_handle_t *zhp;
zfs_userquota_prop_t p;
+
+ uu_avl_pool_t *avl_pool;
+ uu_avl_t *avl_tree;
+ uu_avl_walk_t *walk;
+
+ char *cmd;
+ boolean_t scripted = B_FALSE;
+ boolean_t prtnum = B_FALSE;
+ boolean_t parseable = B_FALSE;
+ boolean_t sid2posix = B_FALSE;
int error;
+ int c;
+ zfs_sort_column_t *default_sortcol = NULL;
+ zfs_sort_column_t *sortcol = NULL;
+ unsigned types = USTYPE_PSX_USR | USTYPE_SMB_USR;
+ unsigned fields = 0;
+ unsigned props = USPROP_USED | USPROP_QUOTA;
+ us_cbdata_t cb;
+ us_node_t *node;
+ boolean_t resort_avl = B_FALSE;
+
+ if (argc < 2)
+ usage(B_FALSE);
- /*
- * Try the python version. If the execv fails, we'll continue
- * and do a simplistic implementation.
- */
- (void) execv(pypath, argv-1);
+ cmd = argv[0];
+ if (0 == strcmp(cmd, "groupspace"))
+ /* toggle default group types */
+ types = USTYPE_PSX_GRP | USTYPE_SMB_GRP;
+
+ /* check options */
+ while ((c = getopt(argc, argv, "nHpo:s:S:t:i")) != -1) {
+ switch (c) {
+ case 'n':
+ prtnum = B_TRUE;
+ break;
+ case 'H':
+ scripted = B_TRUE;
+ break;
+ case 'p':
+ parseable = B_TRUE;
+ break;
+ case 'o':
+ if (parsefields(&fields, us_field_names, us_field_bits,
+ 4) != 0)
+ return (1);
+ break;
+ case 's':
+ if (zfs_add_sort_column(&sortcol, optarg,
+ B_FALSE) != 0) {
+ (void) fprintf(stderr,
+ gettext("invalid property '%s'\n"), optarg);
+ usage(B_FALSE);
+ }
+ break;
+ case 'S':
+ if (zfs_add_sort_column(&sortcol, optarg,
+ B_TRUE) != 0) {
+ (void) fprintf(stderr,
+ gettext("invalid property '%s'\n"), optarg);
+ usage(B_FALSE);
+ }
+ break;
+ case 't':
+ if (parsefields(&types, type_names, type_bits, 5))
+ return (1);
+ break;
+ case 'i':
+ sid2posix = B_TRUE;
+ break;
+ case ':':
+ (void) fprintf(stderr, gettext("missing argument for "
+ "'%c' option\n"), optopt);
+ usage(B_FALSE);
+ break;
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
- (void) fprintf(stderr, "internal error: %s not found\n"
- "falling back on built-in implementation, "
- "some features will not work\n", pypath);
- (void) fprintf(stderr, " install sysutils/py-zfs port to correct this\n");
+ /* ok, now we have sorted by default colums (type,name) avl tree */
+ if (sortcol) {
+ zfs_sort_column_t *sc;
+ for (sc = sortcol; sc; sc = sc->sc_next) {
+ if (sc->sc_prop == ZFS_PROP_QUOTA) {
+ resort_avl = B_TRUE;
+ break;
+ }
+ }
+ }
+
+ if (!fields)
+ fields = USFIELD_ALL;
if ((zhp = zfs_open(g_zfs, argv[argc-1], ZFS_TYPE_DATASET)) == NULL)
return (1);
- (void) printf("PROP TYPE NAME VALUE\n");
+ if ((avl_pool = uu_avl_pool_create("us_avl_pool", sizeof (us_node_t),
+ offsetof(us_node_t, usn_avlnode),
+ us_compare, UU_DEFAULT)) == NULL)
+ nomem();
+ if ((avl_tree = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL)
+ nomem();
+
+ if (sortcol && !resort_avl)
+ cb.cb_sortcol = sortcol;
+ else {
+ (void) zfs_add_sort_column(&default_sortcol, "type", B_FALSE);
+ (void) zfs_add_sort_column(&default_sortcol, "name", B_FALSE);
+ cb.cb_sortcol = default_sortcol;
+ }
+ cb.cb_numname = prtnum;
+ cb.cb_nicenum = !parseable;
+ cb.cb_avl_pool = avl_pool;
+ cb.cb_avl = avl_tree;
+ cb.cb_sid2posix = sid2posix;
+ cb.cb_max_typelen = strlen(gettext("TYPE"));
+ cb.cb_max_namelen = strlen(gettext("NAME"));
+ cb.cb_max_usedlen = strlen(gettext("USED"));
+ cb.cb_max_quotalen = strlen(gettext("QUOTA"));
for (p = 0; p < ZFS_NUM_USERQUOTA_PROPS; p++) {
- error = zfs_userspace(zhp, p, userspace_cb, &p);
+ if (!usprop_check(p, types, props))
+ continue;
+
+ cb.cb_prop = p;
+ error = zfs_userspace(zhp, p, userspace_cb, &cb);
+
if (error)
break;
}
+
+ if (resort_avl) {
+ us_node_t *node;
+ us_node_t *rmnode;
+ uu_list_pool_t *listpool;
+ uu_list_t *list;
+ uu_avl_index_t idx = 0;
+ uu_list_index_t idx2 = 0;
+ listpool = uu_list_pool_create("tmplist", sizeof (us_node_t),
+ offsetof(us_node_t, usn_listnode), NULL,
+ UU_DEFAULT);
+ list = uu_list_create(listpool, NULL, UU_DEFAULT);
+
+ node = uu_avl_first(avl_tree);
+ uu_list_node_init(node, &node->usn_listnode, listpool);
+ while (node != NULL) {
+ rmnode = node;
+ node = uu_avl_next(avl_tree, node);
+ uu_avl_remove(avl_tree, rmnode);
+ if (uu_list_find(list, rmnode, NULL, &idx2) == NULL) {
+ uu_list_insert(list, rmnode, idx2);
+ }
+ }
+
+ for (node = uu_list_first(list); node != NULL;
+ node = uu_list_next(list, node)) {
+ us_sort_info_t sortinfo = { sortcol, cb.cb_numname };
+ if (uu_avl_find(avl_tree, node, &sortinfo, &idx) ==
+ NULL)
+ uu_avl_insert(avl_tree, node, idx);
+ }
+
+ uu_list_destroy(list);
+ }
+
+ /* print & free node`s nvlist memory */
+ print_us(scripted, parseable, fields, cb.cb_max_typelen,
+ cb.cb_max_namelen, cb.cb_max_usedlen,
+ cb.cb_max_quotalen, B_TRUE, cb.cb_avl);
+
+ if (sortcol)
+ zfs_free_sort_columns(sortcol);
+ zfs_free_sort_columns(default_sortcol);
+
+ /*
+ * Finally, clean up the AVL tree.
+ */
+ if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL)
+ nomem();
+
+ while ((node = uu_avl_walk_next(walk)) != NULL) {
+ uu_avl_remove(cb.cb_avl, node);
+ free(node);
+ }
+
+ uu_avl_walk_end(walk);
+ uu_avl_destroy(avl_tree);
+ uu_avl_pool_destroy(avl_pool);
+
return (error);
}
@@ -1756,11 +2574,11 @@ zfs_do_userspace(int argc, char **argv)
* [-s property [-s property]...] [-S property [-S property]...]
* <dataset> ...
*
- * -r Recurse over all children
- * -d Limit recursion by depth.
- * -H Scripted mode; elide headers and separate columns by tabs
- * -o Control which fields to display.
- * -t Control which object types to display.
+ * -r Recurse over all children
+ * -d Limit recursion by depth.
+ * -H Scripted mode; elide headers and separate columns by tabs
+ * -o Control which fields to display.
+ * -t Control which object types to display.
* -s Specify sort columns, descending order.
* -S Specify sort columns, ascending order.
*
@@ -2157,9 +2975,9 @@ zfs_do_promote(int argc, char **argv)
/*
* zfs rollback [-rRf] <snapshot>
*
- * -r Delete any intervening snapshots before doing rollback
- * -R Delete any snapshots and their clones
- * -f ignored for backwards compatability
+ * -r Delete any intervening snapshots before doing rollback
+ * -R Delete any snapshots and their clones
+ * -f ignored for backwards compatability
*
* Given a filesystem, rollback to a specific snapshot, discarding any changes
* since then and making it the active dataset. If more recent snapshots exist,
@@ -2420,11 +3238,8 @@ zfs_do_snapshot(int argc, char **argv)
char c;
nvlist_t *props;
- if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
- (void) fprintf(stderr, gettext("internal error: "
- "out of memory\n"));
- return (1);
- }
+ if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
+ nomem();
/* check options */
while ((c = getopt(argc, argv, "ro:")) != -1) {
@@ -2469,8 +3284,8 @@ usage:
}
/*
- * zfs send [-v] -R [-i|-I <@snap>] <fs@snap>
- * zfs send [-v] [-i|-I <@snap>] <fs@snap>
+ * zfs send [-vDp] -R [-i|-I <@snap>] <fs@snap>
+ * zfs send [-vDp] [-i|-I <@snap>] <fs@snap>
*
* Send a backup stream to stdout.
*/
@@ -2481,14 +3296,13 @@ zfs_do_send(int argc, char **argv)
char *toname = NULL;
char *cp;
zfs_handle_t *zhp;
- boolean_t doall = B_FALSE;
- boolean_t replicate = B_FALSE;
- boolean_t fromorigin = B_FALSE;
- boolean_t verbose = B_FALSE;
+ sendflags_t flags = { 0 };
int c, err;
+ nvlist_t *dbgnv;
+ boolean_t extraverbose = B_FALSE;
/* check options */
- while ((c = getopt(argc, argv, ":i:I:Rv")) != -1) {
+ while ((c = getopt(argc, argv, ":i:I:RDpv")) != -1) {
switch (c) {
case 'i':
if (fromname)
@@ -2499,13 +3313,21 @@ zfs_do_send(int argc, char **argv)
if (fromname)
usage(B_FALSE);
fromname = optarg;
- doall = B_TRUE;
+ flags.doall = B_TRUE;
break;
case 'R':
- replicate = B_TRUE;
+ flags.replicate = B_TRUE;
+ break;
+ case 'p':
+ flags.props = B_TRUE;
break;
case 'v':
- verbose = B_TRUE;
+ if (flags.verbose)
+ extraverbose = B_TRUE;
+ flags.verbose = B_TRUE;
+ break;
+ case 'D':
+ flags.dedup = B_TRUE;
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
@@ -2565,7 +3387,7 @@ zfs_do_send(int argc, char **argv)
if (strcmp(origin, fromname) == 0) {
fromname = NULL;
- fromorigin = B_TRUE;
+ flags.fromorigin = B_TRUE;
} else {
*cp = '\0';
if (cp != fromname && strcmp(argv[0], fromname)) {
@@ -2583,18 +3405,29 @@ zfs_do_send(int argc, char **argv)
}
}
- if (replicate && fromname == NULL)
- doall = B_TRUE;
+ if (flags.replicate && fromname == NULL)
+ flags.doall = B_TRUE;
+
+ err = zfs_send(zhp, fromname, toname, flags, STDOUT_FILENO, NULL, 0,
+ extraverbose ? &dbgnv : NULL);
- err = zfs_send(zhp, fromname, toname, replicate, doall, fromorigin,
- verbose, STDOUT_FILENO);
+ if (extraverbose) {
+ /*
+ * dump_nvlist prints to stdout, but that's been
+ * redirected to a file. Make it print to stderr
+ * instead.
+ */
+ (void) dup2(STDERR_FILENO, STDOUT_FILENO);
+ dump_nvlist(dbgnv, 0);
+ nvlist_free(dbgnv);
+ }
zfs_close(zhp);
return (err != 0);
}
/*
- * zfs receive [-dnvF] <fs@snap>
+ * zfs receive [-vnFu] [-d | -e] <fs@snap>
*
* Restore a backup stream from stdin.
*/
@@ -2602,15 +3435,18 @@ static int
zfs_do_receive(int argc, char **argv)
{
int c, err;
- recvflags_t flags;
+ recvflags_t flags = { 0 };
- bzero(&flags, sizeof (recvflags_t));
/* check options */
- while ((c = getopt(argc, argv, ":dnuvF")) != -1) {
+ while ((c = getopt(argc, argv, ":denuvF")) != -1) {
switch (c) {
case 'd':
flags.isprefix = B_TRUE;
break;
+ case 'e':
+ flags.isprefix = B_TRUE;
+ flags.istail = B_TRUE;
+ break;
case 'n':
flags.dryrun = B_TRUE;
break;
@@ -2661,13 +3497,1652 @@ zfs_do_receive(int argc, char **argv)
return (err != 0);
}
-typedef struct get_all_cbdata {
- zfs_handle_t **cb_handles;
- size_t cb_alloc;
- size_t cb_used;
- uint_t cb_types;
- boolean_t cb_verbose;
-} get_all_cbdata_t;
+/*
+ * allow/unallow stuff
+ */
+/* copied from zfs/sys/dsl_deleg.h */
+#define ZFS_DELEG_PERM_CREATE "create"
+#define ZFS_DELEG_PERM_DESTROY "destroy"
+#define ZFS_DELEG_PERM_SNAPSHOT "snapshot"
+#define ZFS_DELEG_PERM_ROLLBACK "rollback"
+#define ZFS_DELEG_PERM_CLONE "clone"
+#define ZFS_DELEG_PERM_PROMOTE "promote"
+#define ZFS_DELEG_PERM_RENAME "rename"
+#define ZFS_DELEG_PERM_MOUNT "mount"
+#define ZFS_DELEG_PERM_SHARE "share"
+#define ZFS_DELEG_PERM_SEND "send"
+#define ZFS_DELEG_PERM_RECEIVE "receive"
+#define ZFS_DELEG_PERM_ALLOW "allow"
+#define ZFS_DELEG_PERM_USERPROP "userprop"
+#define ZFS_DELEG_PERM_VSCAN "vscan" /* ??? */
+#define ZFS_DELEG_PERM_USERQUOTA "userquota"
+#define ZFS_DELEG_PERM_GROUPQUOTA "groupquota"
+#define ZFS_DELEG_PERM_USERUSED "userused"
+#define ZFS_DELEG_PERM_GROUPUSED "groupused"
+#define ZFS_DELEG_PERM_HOLD "hold"
+#define ZFS_DELEG_PERM_RELEASE "release"
+#define ZFS_DELEG_PERM_DIFF "diff"
+
+#define ZFS_NUM_DELEG_NOTES ZFS_DELEG_NOTE_NONE
+
+static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = {
+ { ZFS_DELEG_PERM_ALLOW, ZFS_DELEG_NOTE_ALLOW },
+ { ZFS_DELEG_PERM_CLONE, ZFS_DELEG_NOTE_CLONE },
+ { ZFS_DELEG_PERM_CREATE, ZFS_DELEG_NOTE_CREATE },
+ { ZFS_DELEG_PERM_DESTROY, ZFS_DELEG_NOTE_DESTROY },
+ { ZFS_DELEG_PERM_DIFF, ZFS_DELEG_NOTE_DIFF},
+ { ZFS_DELEG_PERM_HOLD, ZFS_DELEG_NOTE_HOLD },
+ { ZFS_DELEG_PERM_MOUNT, ZFS_DELEG_NOTE_MOUNT },
+ { ZFS_DELEG_PERM_PROMOTE, ZFS_DELEG_NOTE_PROMOTE },
+ { ZFS_DELEG_PERM_RECEIVE, ZFS_DELEG_NOTE_RECEIVE },
+ { ZFS_DELEG_PERM_RELEASE, ZFS_DELEG_NOTE_RELEASE },
+ { ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME },
+ { ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK },
+ { ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND },
+ { ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
+ { ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
+
+ { ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
+ { ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED },
+ { ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP },
+ { ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA },
+ { ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED },
+ { NULL, ZFS_DELEG_NOTE_NONE }
+};
+
+/* permission structure */
+typedef struct deleg_perm {
+ zfs_deleg_who_type_t dp_who_type;
+ const char *dp_name;
+ boolean_t dp_local;
+ boolean_t dp_descend;
+} deleg_perm_t;
+
+/* */
+typedef struct deleg_perm_node {
+ deleg_perm_t dpn_perm;
+
+ uu_avl_node_t dpn_avl_node;
+} deleg_perm_node_t;
+
+typedef struct fs_perm fs_perm_t;
+
+/* permissions set */
+typedef struct who_perm {
+ zfs_deleg_who_type_t who_type;
+ const char *who_name; /* id */
+ char who_ug_name[256]; /* user/group name */
+ fs_perm_t *who_fsperm; /* uplink */
+
+ uu_avl_t *who_deleg_perm_avl; /* permissions */
+} who_perm_t;
+
+/* */
+typedef struct who_perm_node {
+ who_perm_t who_perm;
+ uu_avl_node_t who_avl_node;
+} who_perm_node_t;
+
+typedef struct fs_perm_set fs_perm_set_t;
+/* fs permissions */
+struct fs_perm {
+ const char *fsp_name;
+
+ uu_avl_t *fsp_sc_avl; /* sets,create */
+ uu_avl_t *fsp_uge_avl; /* user,group,everyone */
+
+ fs_perm_set_t *fsp_set; /* uplink */
+};
+
+/* */
+typedef struct fs_perm_node {
+ fs_perm_t fspn_fsperm;
+ uu_avl_t *fspn_avl;
+
+ uu_list_node_t fspn_list_node;
+} fs_perm_node_t;
+
+/* top level structure */
+struct fs_perm_set {
+ uu_list_pool_t *fsps_list_pool;
+ uu_list_t *fsps_list; /* list of fs_perms */
+
+ uu_avl_pool_t *fsps_named_set_avl_pool;
+ uu_avl_pool_t *fsps_who_perm_avl_pool;
+ uu_avl_pool_t *fsps_deleg_perm_avl_pool;
+};
+
+static inline const char *
+deleg_perm_type(zfs_deleg_note_t note)
+{
+ /* subcommands */
+ switch (note) {
+ /* SUBCOMMANDS */
+ /* OTHER */
+ case ZFS_DELEG_NOTE_GROUPQUOTA:
+ case ZFS_DELEG_NOTE_GROUPUSED:
+ case ZFS_DELEG_NOTE_USERPROP:
+ case ZFS_DELEG_NOTE_USERQUOTA:
+ case ZFS_DELEG_NOTE_USERUSED:
+ /* other */
+ return (gettext("other"));
+ default:
+ return (gettext("subcommand"));
+ }
+}
+
+static int inline
+who_type2weight(zfs_deleg_who_type_t who_type)
+{
+ int res;
+ switch (who_type) {
+ case ZFS_DELEG_NAMED_SET_SETS:
+ case ZFS_DELEG_NAMED_SET:
+ res = 0;
+ break;
+ case ZFS_DELEG_CREATE_SETS:
+ case ZFS_DELEG_CREATE:
+ res = 1;
+ break;
+ case ZFS_DELEG_USER_SETS:
+ case ZFS_DELEG_USER:
+ res = 2;
+ break;
+ case ZFS_DELEG_GROUP_SETS:
+ case ZFS_DELEG_GROUP:
+ res = 3;
+ break;
+ case ZFS_DELEG_EVERYONE_SETS:
+ case ZFS_DELEG_EVERYONE:
+ res = 4;
+ break;
+ default:
+ res = -1;
+ }
+
+ return (res);
+}
+
+/* ARGSUSED */
+static int
+who_perm_compare(const void *larg, const void *rarg, void *unused)
+{
+ const who_perm_node_t *l = larg;
+ const who_perm_node_t *r = rarg;
+ zfs_deleg_who_type_t ltype = l->who_perm.who_type;
+ zfs_deleg_who_type_t rtype = r->who_perm.who_type;
+ int lweight = who_type2weight(ltype);
+ int rweight = who_type2weight(rtype);
+ int res = lweight - rweight;
+ if (res == 0)
+ res = strncmp(l->who_perm.who_name, r->who_perm.who_name,
+ ZFS_MAX_DELEG_NAME-1);
+
+ if (res == 0)
+ return (0);
+ if (res > 0)
+ return (1);
+ else
+ return (-1);
+}
+
+/* ARGSUSED */
+static int
+deleg_perm_compare(const void *larg, const void *rarg, void *unused)
+{
+ const deleg_perm_node_t *l = larg;
+ const deleg_perm_node_t *r = rarg;
+ int res = strncmp(l->dpn_perm.dp_name, r->dpn_perm.dp_name,
+ ZFS_MAX_DELEG_NAME-1);
+
+ if (res == 0)
+ return (0);
+
+ if (res > 0)
+ return (1);
+ else
+ return (-1);
+}
+
+static inline void
+fs_perm_set_init(fs_perm_set_t *fspset)
+{
+ bzero(fspset, sizeof (fs_perm_set_t));
+
+ if ((fspset->fsps_list_pool = uu_list_pool_create("fsps_list_pool",
+ sizeof (fs_perm_node_t), offsetof(fs_perm_node_t, fspn_list_node),
+ NULL, UU_DEFAULT)) == NULL)
+ nomem();
+ if ((fspset->fsps_list = uu_list_create(fspset->fsps_list_pool, NULL,
+ UU_DEFAULT)) == NULL)
+ nomem();
+
+ if ((fspset->fsps_named_set_avl_pool = uu_avl_pool_create(
+ "named_set_avl_pool", sizeof (who_perm_node_t), offsetof(
+ who_perm_node_t, who_avl_node), who_perm_compare,
+ UU_DEFAULT)) == NULL)
+ nomem();
+
+ if ((fspset->fsps_who_perm_avl_pool = uu_avl_pool_create(
+ "who_perm_avl_pool", sizeof (who_perm_node_t), offsetof(
+ who_perm_node_t, who_avl_node), who_perm_compare,
+ UU_DEFAULT)) == NULL)
+ nomem();
+
+ if ((fspset->fsps_deleg_perm_avl_pool = uu_avl_pool_create(
+ "deleg_perm_avl_pool", sizeof (deleg_perm_node_t), offsetof(
+ deleg_perm_node_t, dpn_avl_node), deleg_perm_compare, UU_DEFAULT))
+ == NULL)
+ nomem();
+}
+
+static inline void fs_perm_fini(fs_perm_t *);
+static inline void who_perm_fini(who_perm_t *);
+
+static inline void
+fs_perm_set_fini(fs_perm_set_t *fspset)
+{
+ fs_perm_node_t *node = uu_list_first(fspset->fsps_list);
+
+ while (node != NULL) {
+ fs_perm_node_t *next_node =
+ uu_list_next(fspset->fsps_list, node);
+ fs_perm_t *fsperm = &node->fspn_fsperm;
+ fs_perm_fini(fsperm);
+ uu_list_remove(fspset->fsps_list, node);
+ free(node);
+ node = next_node;
+ }
+
+ uu_avl_pool_destroy(fspset->fsps_named_set_avl_pool);
+ uu_avl_pool_destroy(fspset->fsps_who_perm_avl_pool);
+ uu_avl_pool_destroy(fspset->fsps_deleg_perm_avl_pool);
+}
+
+static inline void
+deleg_perm_init(deleg_perm_t *deleg_perm, zfs_deleg_who_type_t type,
+ const char *name)
+{
+ deleg_perm->dp_who_type = type;
+ deleg_perm->dp_name = name;
+}
+
+static inline void
+who_perm_init(who_perm_t *who_perm, fs_perm_t *fsperm,
+ zfs_deleg_who_type_t type, const char *name)
+{
+ uu_avl_pool_t *pool;
+ pool = fsperm->fsp_set->fsps_deleg_perm_avl_pool;
+
+ bzero(who_perm, sizeof (who_perm_t));
+
+ if ((who_perm->who_deleg_perm_avl = uu_avl_create(pool, NULL,
+ UU_DEFAULT)) == NULL)
+ nomem();
+
+ who_perm->who_type = type;
+ who_perm->who_name = name;
+ who_perm->who_fsperm = fsperm;
+}
+
+static inline void
+who_perm_fini(who_perm_t *who_perm)
+{
+ deleg_perm_node_t *node = uu_avl_first(who_perm->who_deleg_perm_avl);
+
+ while (node != NULL) {
+ deleg_perm_node_t *next_node =
+ uu_avl_next(who_perm->who_deleg_perm_avl, node);
+
+ uu_avl_remove(who_perm->who_deleg_perm_avl, node);
+ free(node);
+ node = next_node;
+ }
+
+ uu_avl_destroy(who_perm->who_deleg_perm_avl);
+}
+
+static inline void
+fs_perm_init(fs_perm_t *fsperm, fs_perm_set_t *fspset, const char *fsname)
+{
+ uu_avl_pool_t *nset_pool = fspset->fsps_named_set_avl_pool;
+ uu_avl_pool_t *who_pool = fspset->fsps_who_perm_avl_pool;
+
+ bzero(fsperm, sizeof (fs_perm_t));
+
+ if ((fsperm->fsp_sc_avl = uu_avl_create(nset_pool, NULL, UU_DEFAULT))
+ == NULL)
+ nomem();
+
+ if ((fsperm->fsp_uge_avl = uu_avl_create(who_pool, NULL, UU_DEFAULT))
+ == NULL)
+ nomem();
+
+ fsperm->fsp_set = fspset;
+ fsperm->fsp_name = fsname;
+}
+
+static inline void
+fs_perm_fini(fs_perm_t *fsperm)
+{
+ who_perm_node_t *node = uu_avl_first(fsperm->fsp_sc_avl);
+ while (node != NULL) {
+ who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_sc_avl,
+ node);
+ who_perm_t *who_perm = &node->who_perm;
+ who_perm_fini(who_perm);
+ uu_avl_remove(fsperm->fsp_sc_avl, node);
+ free(node);
+ node = next_node;
+ }
+
+ node = uu_avl_first(fsperm->fsp_uge_avl);
+ while (node != NULL) {
+ who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_uge_avl,
+ node);
+ who_perm_t *who_perm = &node->who_perm;
+ who_perm_fini(who_perm);
+ uu_avl_remove(fsperm->fsp_uge_avl, node);
+ free(node);
+ node = next_node;
+ }
+
+ uu_avl_destroy(fsperm->fsp_sc_avl);
+ uu_avl_destroy(fsperm->fsp_uge_avl);
+}
+
+static void inline
+set_deleg_perm_node(uu_avl_t *avl, deleg_perm_node_t *node,
+ zfs_deleg_who_type_t who_type, const char *name, char locality)
+{
+ uu_avl_index_t idx = 0;
+
+ deleg_perm_node_t *found_node = NULL;
+ deleg_perm_t *deleg_perm = &node->dpn_perm;
+
+ deleg_perm_init(deleg_perm, who_type, name);
+
+ if ((found_node = uu_avl_find(avl, node, NULL, &idx))
+ == NULL)
+ uu_avl_insert(avl, node, idx);
+ else {
+ node = found_node;
+ deleg_perm = &node->dpn_perm;
+ }
+
+
+ switch (locality) {
+ case ZFS_DELEG_LOCAL:
+ deleg_perm->dp_local = B_TRUE;
+ break;
+ case ZFS_DELEG_DESCENDENT:
+ deleg_perm->dp_descend = B_TRUE;
+ break;
+ case ZFS_DELEG_NA:
+ break;
+ default:
+ assert(B_FALSE); /* invalid locality */
+ }
+}
+
+static inline int
+parse_who_perm(who_perm_t *who_perm, nvlist_t *nvl, char locality)
+{
+ nvpair_t *nvp = NULL;
+ fs_perm_set_t *fspset = who_perm->who_fsperm->fsp_set;
+ uu_avl_t *avl = who_perm->who_deleg_perm_avl;
+ zfs_deleg_who_type_t who_type = who_perm->who_type;
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ const char *name = nvpair_name(nvp);
+ data_type_t type = nvpair_type(nvp);
+ uu_avl_pool_t *avl_pool = fspset->fsps_deleg_perm_avl_pool;
+ deleg_perm_node_t *node =
+ safe_malloc(sizeof (deleg_perm_node_t));
+
+ assert(type == DATA_TYPE_BOOLEAN);
+
+ uu_avl_node_init(node, &node->dpn_avl_node, avl_pool);
+ set_deleg_perm_node(avl, node, who_type, name, locality);
+ }
+
+ return (0);
+}
+
+static inline int
+parse_fs_perm(fs_perm_t *fsperm, nvlist_t *nvl)
+{
+ nvpair_t *nvp = NULL;
+ fs_perm_set_t *fspset = fsperm->fsp_set;
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ nvlist_t *nvl2 = NULL;
+ const char *name = nvpair_name(nvp);
+ uu_avl_t *avl = NULL;
+ uu_avl_pool_t *avl_pool;
+ zfs_deleg_who_type_t perm_type = name[0];
+ char perm_locality = name[1];
+ const char *perm_name = name + 3;
+ boolean_t is_set = B_TRUE;
+ who_perm_t *who_perm = NULL;
+
+ assert('$' == name[2]);
+
+ if (nvpair_value_nvlist(nvp, &nvl2) != 0)
+ return (-1);
+
+ switch (perm_type) {
+ case ZFS_DELEG_CREATE:
+ case ZFS_DELEG_CREATE_SETS:
+ case ZFS_DELEG_NAMED_SET:
+ case ZFS_DELEG_NAMED_SET_SETS:
+ avl_pool = fspset->fsps_named_set_avl_pool;
+ avl = fsperm->fsp_sc_avl;
+ break;
+ case ZFS_DELEG_USER:
+ case ZFS_DELEG_USER_SETS:
+ case ZFS_DELEG_GROUP:
+ case ZFS_DELEG_GROUP_SETS:
+ case ZFS_DELEG_EVERYONE:
+ case ZFS_DELEG_EVERYONE_SETS:
+ avl_pool = fspset->fsps_who_perm_avl_pool;
+ avl = fsperm->fsp_uge_avl;
+ break;
+ }
+
+ if (is_set) {
+ who_perm_node_t *found_node = NULL;
+ who_perm_node_t *node = safe_malloc(
+ sizeof (who_perm_node_t));
+ who_perm = &node->who_perm;
+ uu_avl_index_t idx = 0;
+
+ uu_avl_node_init(node, &node->who_avl_node, avl_pool);
+ who_perm_init(who_perm, fsperm, perm_type, perm_name);
+
+ if ((found_node = uu_avl_find(avl, node, NULL, &idx))
+ == NULL) {
+ if (avl == fsperm->fsp_uge_avl) {
+ uid_t rid = 0;
+ struct passwd *p = NULL;
+ struct group *g = NULL;
+ const char *nice_name = NULL;
+
+ switch (perm_type) {
+ case ZFS_DELEG_USER_SETS:
+ case ZFS_DELEG_USER:
+ rid = atoi(perm_name);
+ p = getpwuid(rid);
+ if (p)
+ nice_name = p->pw_name;
+ break;
+ case ZFS_DELEG_GROUP_SETS:
+ case ZFS_DELEG_GROUP:
+ rid = atoi(perm_name);
+ g = getgrgid(rid);
+ if (g)
+ nice_name = g->gr_name;
+ break;
+ }
+
+ if (nice_name != NULL)
+ (void) strlcpy(
+ node->who_perm.who_ug_name,
+ nice_name, 256);
+ }
+
+ uu_avl_insert(avl, node, idx);
+ } else {
+ node = found_node;
+ who_perm = &node->who_perm;
+ }
+ }
+
+ (void) parse_who_perm(who_perm, nvl2, perm_locality);
+ }
+
+ return (0);
+}
+
+static inline int
+parse_fs_perm_set(fs_perm_set_t *fspset, nvlist_t *nvl)
+{
+ nvpair_t *nvp = NULL;
+ uu_avl_index_t idx = 0;
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ nvlist_t *nvl2 = NULL;
+ const char *fsname = nvpair_name(nvp);
+ data_type_t type = nvpair_type(nvp);
+ fs_perm_t *fsperm = NULL;
+ fs_perm_node_t *node = safe_malloc(sizeof (fs_perm_node_t));
+ if (node == NULL)
+ nomem();
+
+ fsperm = &node->fspn_fsperm;
+
+ assert(DATA_TYPE_NVLIST == type);
+
+ uu_list_node_init(node, &node->fspn_list_node,
+ fspset->fsps_list_pool);
+
+ idx = uu_list_numnodes(fspset->fsps_list);
+ fs_perm_init(fsperm, fspset, fsname);
+
+ if (nvpair_value_nvlist(nvp, &nvl2) != 0)
+ return (-1);
+
+ (void) parse_fs_perm(fsperm, nvl2);
+
+ uu_list_insert(fspset->fsps_list, node, idx);
+ }
+
+ return (0);
+}
+
+static inline const char *
+deleg_perm_comment(zfs_deleg_note_t note)
+{
+ const char *str = "";
+
+ /* subcommands */
+ switch (note) {
+ /* SUBCOMMANDS */
+ case ZFS_DELEG_NOTE_ALLOW:
+ str = gettext("Must also have the permission that is being"
+ "\n\t\t\t\tallowed");
+ break;
+ case ZFS_DELEG_NOTE_CLONE:
+ str = gettext("Must also have the 'create' ability and 'mount'"
+ "\n\t\t\t\tability in the origin file system");
+ break;
+ case ZFS_DELEG_NOTE_CREATE:
+ str = gettext("Must also have the 'mount' ability");
+ break;
+ case ZFS_DELEG_NOTE_DESTROY:
+ str = gettext("Must also have the 'mount' ability");
+ break;
+ case ZFS_DELEG_NOTE_DIFF:
+ str = gettext("Allows lookup of paths within a dataset;"
+ "\n\t\t\t\tgiven an object number. Ordinary users need this"
+ "\n\t\t\t\tin order to use zfs diff");
+ break;
+ case ZFS_DELEG_NOTE_HOLD:
+ str = gettext("Allows adding a user hold to a snapshot");
+ break;
+ case ZFS_DELEG_NOTE_MOUNT:
+ str = gettext("Allows mount/umount of ZFS datasets");
+ break;
+ case ZFS_DELEG_NOTE_PROMOTE:
+ str = gettext("Must also have the 'mount'\n\t\t\t\tand"
+ " 'promote' ability in the origin file system");
+ break;
+ case ZFS_DELEG_NOTE_RECEIVE:
+ str = gettext("Must also have the 'mount' and 'create'"
+ " ability");
+ break;
+ case ZFS_DELEG_NOTE_RELEASE:
+ str = gettext("Allows releasing a user hold which\n\t\t\t\t"
+ "might destroy the snapshot");
+ break;
+ case ZFS_DELEG_NOTE_RENAME:
+ str = gettext("Must also have the 'mount' and 'create'"
+ "\n\t\t\t\tability in the new parent");
+ break;
+ case ZFS_DELEG_NOTE_ROLLBACK:
+ str = gettext("");
+ break;
+ case ZFS_DELEG_NOTE_SEND:
+ str = gettext("");
+ break;
+ case ZFS_DELEG_NOTE_SHARE:
+ str = gettext("Allows sharing file systems over NFS or SMB"
+ "\n\t\t\t\tprotocols");
+ break;
+ case ZFS_DELEG_NOTE_SNAPSHOT:
+ str = gettext("");
+ break;
+/*
+ * case ZFS_DELEG_NOTE_VSCAN:
+ * str = gettext("");
+ * break;
+ */
+ /* OTHER */
+ case ZFS_DELEG_NOTE_GROUPQUOTA:
+ str = gettext("Allows accessing any groupquota@... property");
+ break;
+ case ZFS_DELEG_NOTE_GROUPUSED:
+ str = gettext("Allows reading any groupused@... property");
+ break;
+ case ZFS_DELEG_NOTE_USERPROP:
+ str = gettext("Allows changing any user property");
+ break;
+ case ZFS_DELEG_NOTE_USERQUOTA:
+ str = gettext("Allows accessing any userquota@... property");
+ break;
+ case ZFS_DELEG_NOTE_USERUSED:
+ str = gettext("Allows reading any userused@... property");
+ break;
+ /* other */
+ default:
+ str = "";
+ }
+
+ return (str);
+}
+
+struct allow_opts {
+ boolean_t local;
+ boolean_t descend;
+ boolean_t user;
+ boolean_t group;
+ boolean_t everyone;
+ boolean_t create;
+ boolean_t set;
+ boolean_t recursive; /* unallow only */
+ boolean_t prt_usage;
+
+ boolean_t prt_perms;
+ char *who;
+ char *perms;
+ const char *dataset;
+};
+
+static inline int
+prop_cmp(const void *a, const void *b)
+{
+ const char *str1 = *(const char **)a;
+ const char *str2 = *(const char **)b;
+ return (strcmp(str1, str2));
+}
+
+static void
+allow_usage(boolean_t un, boolean_t requested, const char *msg)
+{
+ const char *opt_desc[] = {
+ "-h", gettext("show this help message and exit"),
+ "-l", gettext("set permission locally"),
+ "-d", gettext("set permission for descents"),
+ "-u", gettext("set permission for user"),
+ "-g", gettext("set permission for group"),
+ "-e", gettext("set permission for everyone"),
+ "-c", gettext("set create time permission"),
+ "-s", gettext("define permission set"),
+ /* unallow only */
+ "-r", gettext("remove permissions recursively"),
+ };
+ size_t unallow_size = sizeof (opt_desc) / sizeof (char *);
+ size_t allow_size = unallow_size - 2;
+ const char *props[ZFS_NUM_PROPS];
+ int i;
+ size_t count = 0;
+ FILE *fp = requested ? stdout : stderr;
+ zprop_desc_t *pdtbl = zfs_prop_get_table();
+ const char *fmt = gettext("%-16s %-14s\t%s\n");
+
+ (void) fprintf(fp, gettext("Usage: %s\n"), get_usage(un ? HELP_UNALLOW :
+ HELP_ALLOW));
+ (void) fprintf(fp, gettext("Options:\n"));
+ for (i = 0; i < (un ? unallow_size : allow_size); i++) {
+ const char *opt = opt_desc[i++];
+ const char *optdsc = opt_desc[i];
+ (void) fprintf(fp, gettext(" %-10s %s\n"), opt, optdsc);
+ }
+
+ (void) fprintf(fp, gettext("\nThe following permissions are "
+ "supported:\n\n"));
+ (void) fprintf(fp, fmt, gettext("NAME"), gettext("TYPE"),
+ gettext("NOTES"));
+ for (i = 0; i < ZFS_NUM_DELEG_NOTES; i++) {
+ const char *perm_name = zfs_deleg_perm_tbl[i].z_perm;
+ zfs_deleg_note_t perm_note = zfs_deleg_perm_tbl[i].z_note;
+ const char *perm_type = deleg_perm_type(perm_note);
+ const char *perm_comment = deleg_perm_comment(perm_note);
+ (void) fprintf(fp, fmt, perm_name, perm_type, perm_comment);
+ }
+
+ for (i = 0; i < ZFS_NUM_PROPS; i++) {
+ zprop_desc_t *pd = &pdtbl[i];
+ if (pd->pd_visible != B_TRUE)
+ continue;
+
+ if (pd->pd_attr == PROP_READONLY)
+ continue;
+
+ props[count++] = pd->pd_name;
+ }
+ props[count] = NULL;
+
+ qsort(props, count, sizeof (char *), prop_cmp);
+
+ for (i = 0; i < count; i++)
+ (void) fprintf(fp, fmt, props[i], gettext("property"), "");
+
+ if (msg != NULL)
+ (void) fprintf(fp, gettext("\nzfs: error: %s"), msg);
+
+ exit(requested ? 0 : 2);
+}
+
+static inline const char *
+munge_args(int argc, char **argv, boolean_t un, size_t expected_argc,
+ char **permsp)
+{
+ if (un && argc == expected_argc - 1)
+ *permsp = NULL;
+ else if (argc == expected_argc)
+ *permsp = argv[argc - 2];
+ else
+ allow_usage(un, B_FALSE,
+ gettext("wrong number of parameters\n"));
+
+ return (argv[argc - 1]);
+}
+
+static void
+parse_allow_args(int argc, char **argv, boolean_t un, struct allow_opts *opts)
+{
+ int uge_sum = opts->user + opts->group + opts->everyone;
+ int csuge_sum = opts->create + opts->set + uge_sum;
+ int ldcsuge_sum = csuge_sum + opts->local + opts->descend;
+ int all_sum = un ? ldcsuge_sum + opts->recursive : ldcsuge_sum;
+
+ if (uge_sum > 1)
+ allow_usage(un, B_FALSE,
+ gettext("-u, -g, and -e are mutually exclusive\n"));
+
+ if (opts->prt_usage)
+ if (argc == 0 && all_sum == 0)
+ allow_usage(un, B_TRUE, NULL);
+ else
+ usage(B_FALSE);
+
+ if (opts->set) {
+ if (csuge_sum > 1)
+ allow_usage(un, B_FALSE,
+ gettext("invalid options combined with -s\n"));
+
+ opts->dataset = munge_args(argc, argv, un, 3, &opts->perms);
+ if (argv[0][0] != '@')
+ allow_usage(un, B_FALSE,
+ gettext("invalid set name: missing '@' prefix\n"));
+ opts->who = argv[0];
+ } else if (opts->create) {
+ if (ldcsuge_sum > 1)
+ allow_usage(un, B_FALSE,
+ gettext("invalid options combined with -c\n"));
+ opts->dataset = munge_args(argc, argv, un, 2, &opts->perms);
+ } else if (opts->everyone) {
+ if (csuge_sum > 1)
+ allow_usage(un, B_FALSE,
+ gettext("invalid options combined with -e\n"));
+ opts->dataset = munge_args(argc, argv, un, 2, &opts->perms);
+ } else if (uge_sum == 0 && argc > 0 && strcmp(argv[0], "everyone")
+ == 0) {
+ opts->everyone = B_TRUE;
+ argc--;
+ argv++;
+ opts->dataset = munge_args(argc, argv, un, 2, &opts->perms);
+ } else if (argc == 1) {
+ opts->prt_perms = B_TRUE;
+ opts->dataset = argv[argc-1];
+ } else {
+ opts->dataset = munge_args(argc, argv, un, 3, &opts->perms);
+ opts->who = argv[0];
+ }
+
+ if (!opts->local && !opts->descend) {
+ opts->local = B_TRUE;
+ opts->descend = B_TRUE;
+ }
+}
+
+static void
+store_allow_perm(zfs_deleg_who_type_t type, boolean_t local, boolean_t descend,
+ const char *who, char *perms, nvlist_t *top_nvl)
+{
+ int i;
+ char ld[2] = { '\0', '\0' };
+ char who_buf[ZFS_MAXNAMELEN+32];
+ char base_type;
+ char set_type;
+ nvlist_t *base_nvl = NULL;
+ nvlist_t *set_nvl = NULL;
+ nvlist_t *nvl;
+
+ if (nvlist_alloc(&base_nvl, NV_UNIQUE_NAME, 0) != 0)
+ nomem();
+ if (nvlist_alloc(&set_nvl, NV_UNIQUE_NAME, 0) != 0)
+ nomem();
+
+ switch (type) {
+ case ZFS_DELEG_NAMED_SET_SETS:
+ case ZFS_DELEG_NAMED_SET:
+ set_type = ZFS_DELEG_NAMED_SET_SETS;
+ base_type = ZFS_DELEG_NAMED_SET;
+ ld[0] = ZFS_DELEG_NA;
+ break;
+ case ZFS_DELEG_CREATE_SETS:
+ case ZFS_DELEG_CREATE:
+ set_type = ZFS_DELEG_CREATE_SETS;
+ base_type = ZFS_DELEG_CREATE;
+ ld[0] = ZFS_DELEG_NA;
+ break;
+ case ZFS_DELEG_USER_SETS:
+ case ZFS_DELEG_USER:
+ set_type = ZFS_DELEG_USER_SETS;
+ base_type = ZFS_DELEG_USER;
+ if (local)
+ ld[0] = ZFS_DELEG_LOCAL;
+ if (descend)
+ ld[1] = ZFS_DELEG_DESCENDENT;
+ break;
+ case ZFS_DELEG_GROUP_SETS:
+ case ZFS_DELEG_GROUP:
+ set_type = ZFS_DELEG_GROUP_SETS;
+ base_type = ZFS_DELEG_GROUP;
+ if (local)
+ ld[0] = ZFS_DELEG_LOCAL;
+ if (descend)
+ ld[1] = ZFS_DELEG_DESCENDENT;
+ break;
+ case ZFS_DELEG_EVERYONE_SETS:
+ case ZFS_DELEG_EVERYONE:
+ set_type = ZFS_DELEG_EVERYONE_SETS;
+ base_type = ZFS_DELEG_EVERYONE;
+ if (local)
+ ld[0] = ZFS_DELEG_LOCAL;
+ if (descend)
+ ld[1] = ZFS_DELEG_DESCENDENT;
+ }
+
+ if (perms != NULL) {
+ char *curr = perms;
+ char *end = curr + strlen(perms);
+
+ while (curr < end) {
+ char *delim = strchr(curr, ',');
+ if (delim == NULL)
+ delim = end;
+ else
+ *delim = '\0';
+
+ if (curr[0] == '@')
+ nvl = set_nvl;
+ else
+ nvl = base_nvl;
+
+ (void) nvlist_add_boolean(nvl, curr);
+ if (delim != end)
+ *delim = ',';
+ curr = delim + 1;
+ }
+
+ for (i = 0; i < 2; i++) {
+ char locality = ld[i];
+ if (locality == 0)
+ continue;
+
+ if (!nvlist_empty(base_nvl)) {
+ if (who != NULL)
+ (void) snprintf(who_buf,
+ sizeof (who_buf), "%c%c$%s",
+ base_type, locality, who);
+ else
+ (void) snprintf(who_buf,
+ sizeof (who_buf), "%c%c$",
+ base_type, locality);
+
+ (void) nvlist_add_nvlist(top_nvl, who_buf,
+ base_nvl);
+ }
+
+
+ if (!nvlist_empty(set_nvl)) {
+ if (who != NULL)
+ (void) snprintf(who_buf,
+ sizeof (who_buf), "%c%c$%s",
+ set_type, locality, who);
+ else
+ (void) snprintf(who_buf,
+ sizeof (who_buf), "%c%c$",
+ set_type, locality);
+
+ (void) nvlist_add_nvlist(top_nvl, who_buf,
+ set_nvl);
+ }
+ }
+ } else {
+ for (i = 0; i < 2; i++) {
+ char locality = ld[i];
+ if (locality == 0)
+ continue;
+
+ if (who != NULL)
+ (void) snprintf(who_buf, sizeof (who_buf),
+ "%c%c$%s", base_type, locality, who);
+ else
+ (void) snprintf(who_buf, sizeof (who_buf),
+ "%c%c$", base_type, locality);
+ (void) nvlist_add_boolean(top_nvl, who_buf);
+
+ if (who != NULL)
+ (void) snprintf(who_buf, sizeof (who_buf),
+ "%c%c$%s", set_type, locality, who);
+ else
+ (void) snprintf(who_buf, sizeof (who_buf),
+ "%c%c$", set_type, locality);
+ (void) nvlist_add_boolean(top_nvl, who_buf);
+ }
+ }
+}
+
+static int
+construct_fsacl_list(boolean_t un, struct allow_opts *opts, nvlist_t **nvlp)
+{
+ if (nvlist_alloc(nvlp, NV_UNIQUE_NAME, 0) != 0)
+ nomem();
+
+ if (opts->set) {
+ store_allow_perm(ZFS_DELEG_NAMED_SET, opts->local,
+ opts->descend, opts->who, opts->perms, *nvlp);
+ } else if (opts->create) {
+ store_allow_perm(ZFS_DELEG_CREATE, opts->local,
+ opts->descend, NULL, opts->perms, *nvlp);
+ } else if (opts->everyone) {
+ store_allow_perm(ZFS_DELEG_EVERYONE, opts->local,
+ opts->descend, NULL, opts->perms, *nvlp);
+ } else {
+ char *curr = opts->who;
+ char *end = curr + strlen(curr);
+
+ while (curr < end) {
+ const char *who;
+ zfs_deleg_who_type_t who_type;
+ char *endch;
+ char *delim = strchr(curr, ',');
+ char errbuf[256];
+ char id[64];
+ struct passwd *p = NULL;
+ struct group *g = NULL;
+
+ uid_t rid;
+ if (delim == NULL)
+ delim = end;
+ else
+ *delim = '\0';
+
+ rid = (uid_t)strtol(curr, &endch, 0);
+ if (opts->user) {
+ who_type = ZFS_DELEG_USER;
+ if (*endch != '\0')
+ p = getpwnam(curr);
+ else
+ p = getpwuid(rid);
+
+ if (p != NULL)
+ rid = p->pw_uid;
+ else {
+ (void) snprintf(errbuf, 256, gettext(
+ "invalid user %s"), curr);
+ allow_usage(un, B_TRUE, errbuf);
+ }
+ } else if (opts->group) {
+ who_type = ZFS_DELEG_GROUP;
+ if (*endch != '\0')
+ g = getgrnam(curr);
+ else
+ g = getgrgid(rid);
+
+ if (g != NULL)
+ rid = g->gr_gid;
+ else {
+ (void) snprintf(errbuf, 256, gettext(
+ "invalid group %s"), curr);
+ allow_usage(un, B_TRUE, errbuf);
+ }
+ } else {
+ if (*endch != '\0') {
+ p = getpwnam(curr);
+ } else {
+ p = getpwuid(rid);
+ }
+
+ if (p == NULL)
+ if (*endch != '\0') {
+ g = getgrnam(curr);
+ } else {
+ g = getgrgid(rid);
+ }
+
+ if (p != NULL) {
+ who_type = ZFS_DELEG_USER;
+ rid = p->pw_uid;
+ } else if (g != NULL) {
+ who_type = ZFS_DELEG_GROUP;
+ rid = g->gr_gid;
+ } else {
+ (void) snprintf(errbuf, 256, gettext(
+ "invalid user/group %s"), curr);
+ allow_usage(un, B_TRUE, errbuf);
+ }
+ }
+
+ (void) sprintf(id, "%u", rid);
+ who = id;
+
+ store_allow_perm(who_type, opts->local,
+ opts->descend, who, opts->perms, *nvlp);
+ curr = delim + 1;
+ }
+ }
+
+ return (0);
+}
+
+static void
+print_set_creat_perms(uu_avl_t *who_avl)
+{
+ const char *sc_title[] = {
+ gettext("Permission sets:\n"),
+ gettext("Create time permissions:\n"),
+ NULL
+ };
+ const char **title_ptr = sc_title;
+ who_perm_node_t *who_node = NULL;
+ int prev_weight = -1;
+
+ for (who_node = uu_avl_first(who_avl); who_node != NULL;
+ who_node = uu_avl_next(who_avl, who_node)) {
+ uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl;
+ zfs_deleg_who_type_t who_type = who_node->who_perm.who_type;
+ const char *who_name = who_node->who_perm.who_name;
+ int weight = who_type2weight(who_type);
+ boolean_t first = B_TRUE;
+ deleg_perm_node_t *deleg_node;
+
+ if (prev_weight != weight) {
+ (void) printf(*title_ptr++);
+ prev_weight = weight;
+ }
+
+ if (who_name == NULL || strnlen(who_name, 1) == 0)
+ (void) printf("\t");
+ else
+ (void) printf("\t%s ", who_name);
+
+ for (deleg_node = uu_avl_first(avl); deleg_node != NULL;
+ deleg_node = uu_avl_next(avl, deleg_node)) {
+ if (first) {
+ (void) printf("%s",
+ deleg_node->dpn_perm.dp_name);
+ first = B_FALSE;
+ } else
+ (void) printf(",%s",
+ deleg_node->dpn_perm.dp_name);
+ }
+
+ (void) printf("\n");
+ }
+}
+
+static void inline
+print_uge_deleg_perms(uu_avl_t *who_avl, boolean_t local, boolean_t descend,
+ const char *title)
+{
+ who_perm_node_t *who_node = NULL;
+ boolean_t prt_title = B_TRUE;
+ uu_avl_walk_t *walk;
+
+ if ((walk = uu_avl_walk_start(who_avl, UU_WALK_ROBUST)) == NULL)
+ nomem();
+
+ while ((who_node = uu_avl_walk_next(walk)) != NULL) {
+ const char *who_name = who_node->who_perm.who_name;
+ const char *nice_who_name = who_node->who_perm.who_ug_name;
+ uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl;
+ zfs_deleg_who_type_t who_type = who_node->who_perm.who_type;
+ char delim = ' ';
+ deleg_perm_node_t *deleg_node;
+ boolean_t prt_who = B_TRUE;
+
+ for (deleg_node = uu_avl_first(avl);
+ deleg_node != NULL;
+ deleg_node = uu_avl_next(avl, deleg_node)) {
+ if (local != deleg_node->dpn_perm.dp_local ||
+ descend != deleg_node->dpn_perm.dp_descend)
+ continue;
+
+ if (prt_who) {
+ const char *who = NULL;
+ if (prt_title) {
+ prt_title = B_FALSE;
+ (void) printf(title);
+ }
+
+ switch (who_type) {
+ case ZFS_DELEG_USER_SETS:
+ case ZFS_DELEG_USER:
+ who = gettext("user");
+ if (nice_who_name)
+ who_name = nice_who_name;
+ break;
+ case ZFS_DELEG_GROUP_SETS:
+ case ZFS_DELEG_GROUP:
+ who = gettext("group");
+ if (nice_who_name)
+ who_name = nice_who_name;
+ break;
+ case ZFS_DELEG_EVERYONE_SETS:
+ case ZFS_DELEG_EVERYONE:
+ who = gettext("everyone");
+ who_name = NULL;
+ }
+
+ prt_who = B_FALSE;
+ if (who_name == NULL)
+ (void) printf("\t%s", who);
+ else
+ (void) printf("\t%s %s", who, who_name);
+ }
+
+ (void) printf("%c%s", delim,
+ deleg_node->dpn_perm.dp_name);
+ delim = ',';
+ }
+
+ if (!prt_who)
+ (void) printf("\n");
+ }
+
+ uu_avl_walk_end(walk);
+}
+
+static void
+print_fs_perms(fs_perm_set_t *fspset)
+{
+ fs_perm_node_t *node = NULL;
+ char buf[ZFS_MAXNAMELEN+32];
+ const char *dsname = buf;
+
+ for (node = uu_list_first(fspset->fsps_list); node != NULL;
+ node = uu_list_next(fspset->fsps_list, node)) {
+ uu_avl_t *sc_avl = node->fspn_fsperm.fsp_sc_avl;
+ uu_avl_t *uge_avl = node->fspn_fsperm.fsp_uge_avl;
+ int left = 0;
+
+ (void) snprintf(buf, ZFS_MAXNAMELEN+32,
+ gettext("---- Permissions on %s "),
+ node->fspn_fsperm.fsp_name);
+ (void) printf(dsname);
+ left = 70 - strlen(buf);
+ while (left-- > 0)
+ (void) printf("-");
+ (void) printf("\n");
+
+ print_set_creat_perms(sc_avl);
+ print_uge_deleg_perms(uge_avl, B_TRUE, B_FALSE,
+ gettext("Local permissions:\n"));
+ print_uge_deleg_perms(uge_avl, B_FALSE, B_TRUE,
+ gettext("Descendent permissions:\n"));
+ print_uge_deleg_perms(uge_avl, B_TRUE, B_TRUE,
+ gettext("Local+Descendent permissions:\n"));
+ }
+}
+
+static fs_perm_set_t fs_perm_set = { NULL, NULL, NULL, NULL };
+
+struct deleg_perms {
+ boolean_t un;
+ nvlist_t *nvl;
+};
+
+static int
+set_deleg_perms(zfs_handle_t *zhp, void *data)
+{
+ struct deleg_perms *perms = (struct deleg_perms *)data;
+ zfs_type_t zfs_type = zfs_get_type(zhp);
+
+ if (zfs_type != ZFS_TYPE_FILESYSTEM && zfs_type != ZFS_TYPE_VOLUME)
+ return (0);
+
+ return (zfs_set_fsacl(zhp, perms->un, perms->nvl));
+}
+
+static int
+zfs_do_allow_unallow_impl(int argc, char **argv, boolean_t un)
+{
+ zfs_handle_t *zhp;
+ nvlist_t *perm_nvl = NULL;
+ nvlist_t *update_perm_nvl = NULL;
+ int error = 1;
+ int c;
+ struct allow_opts opts = { 0 };
+
+ const char *optstr = un ? "ldugecsrh" : "ldugecsh";
+
+ /* check opts */
+ while ((c = getopt(argc, argv, optstr)) != -1) {
+ switch (c) {
+ case 'l':
+ opts.local = B_TRUE;
+ break;
+ case 'd':
+ opts.descend = B_TRUE;
+ break;
+ case 'u':
+ opts.user = B_TRUE;
+ break;
+ case 'g':
+ opts.group = B_TRUE;
+ break;
+ case 'e':
+ opts.everyone = B_TRUE;
+ break;
+ case 's':
+ opts.set = B_TRUE;
+ break;
+ case 'c':
+ opts.create = B_TRUE;
+ break;
+ case 'r':
+ opts.recursive = B_TRUE;
+ break;
+ case ':':
+ (void) fprintf(stderr, gettext("missing argument for "
+ "'%c' option\n"), optopt);
+ usage(B_FALSE);
+ break;
+ case 'h':
+ opts.prt_usage = B_TRUE;
+ break;
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ /* check arguments */
+ parse_allow_args(argc, argv, un, &opts);
+
+ /* try to open the dataset */
+ if ((zhp = zfs_open(g_zfs, opts.dataset, ZFS_TYPE_FILESYSTEM))
+ == NULL) {
+ (void) fprintf(stderr, "Failed to open Dataset *%s*\n",
+ opts.dataset);
+ return (-1);
+ }
+
+ if (zfs_get_fsacl(zhp, &perm_nvl) != 0)
+ goto cleanup2;
+
+ fs_perm_set_init(&fs_perm_set);
+ if (parse_fs_perm_set(&fs_perm_set, perm_nvl) != 0) {
+ (void) fprintf(stderr, "Failed to parse fsacl permissionsn");
+ goto cleanup1;
+ }
+
+ if (opts.prt_perms)
+ print_fs_perms(&fs_perm_set);
+ else {
+ (void) construct_fsacl_list(un, &opts, &update_perm_nvl);
+ if (zfs_set_fsacl(zhp, un, update_perm_nvl) != 0)
+ goto cleanup0;
+
+ if (un && opts.recursive) {
+ struct deleg_perms data = { un, update_perm_nvl };
+ if (zfs_iter_filesystems(zhp, set_deleg_perms,
+ &data) != 0)
+ goto cleanup0;
+ }
+ }
+
+ error = 0;
+
+cleanup0:
+ nvlist_free(perm_nvl);
+ if (update_perm_nvl != NULL)
+ nvlist_free(update_perm_nvl);
+cleanup1:
+ fs_perm_set_fini(&fs_perm_set);
+cleanup2:
+ zfs_close(zhp);
+
+ return (error);
+}
+
+/*
+ * zfs allow [-r] [-t] <tag> <snap> ...
+ *
+ * -r Recursively hold
+ * -t Temporary hold (hidden option)
+ *
+ * Apply a user-hold with the given tag to the list of snapshots.
+ */
+static int
+zfs_do_allow(int argc, char **argv)
+{
+ return (zfs_do_allow_unallow_impl(argc, argv, B_FALSE));
+}
+
+/*
+ * zfs unallow [-r] [-t] <tag> <snap> ...
+ *
+ * -r Recursively hold
+ * -t Temporary hold (hidden option)
+ *
+ * Apply a user-hold with the given tag to the list of snapshots.
+ */
+static int
+zfs_do_unallow(int argc, char **argv)
+{
+ return (zfs_do_allow_unallow_impl(argc, argv, B_TRUE));
+}
+
+static int
+zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
+{
+ int errors = 0;
+ int i;
+ const char *tag;
+ boolean_t recursive = B_FALSE;
+ boolean_t temphold = B_FALSE;
+ const char *opts = holding ? "rt" : "r";
+ int c;
+
+ /* check options */
+ while ((c = getopt(argc, argv, opts)) != -1) {
+ switch (c) {
+ case 'r':
+ recursive = B_TRUE;
+ break;
+ case 't':
+ temphold = B_TRUE;
+ break;
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ /* check number of arguments */
+ if (argc < 2)
+ usage(B_FALSE);
+
+ tag = argv[0];
+ --argc;
+ ++argv;
+
+ if (holding && tag[0] == '.') {
+ /* tags starting with '.' are reserved for libzfs */
+ (void) fprintf(stderr, gettext("tag may not start with '.'\n"));
+ usage(B_FALSE);
+ }
+
+ for (i = 0; i < argc; ++i) {
+ zfs_handle_t *zhp;
+ char parent[ZFS_MAXNAMELEN];
+ const char *delim;
+ char *path = argv[i];
+
+ delim = strchr(path, '@');
+ if (delim == NULL) {
+ (void) fprintf(stderr,
+ gettext("'%s' is not a snapshot\n"), path);
+ ++errors;
+ continue;
+ }
+ (void) strncpy(parent, path, delim - path);
+ parent[delim - path] = '\0';
+
+ zhp = zfs_open(g_zfs, parent,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+ if (zhp == NULL) {
+ ++errors;
+ continue;
+ }
+ if (holding) {
+ if (zfs_hold(zhp, delim+1, tag, recursive,
+ temphold, B_FALSE, -1, 0, 0) != 0)
+ ++errors;
+ } else {
+ if (zfs_release(zhp, delim+1, tag, recursive) != 0)
+ ++errors;
+ }
+ zfs_close(zhp);
+ }
+
+ return (errors != 0);
+}
+
+/*
+ * zfs hold [-r] [-t] <tag> <snap> ...
+ *
+ * -r Recursively hold
+ * -t Temporary hold (hidden option)
+ *
+ * Apply a user-hold with the given tag to the list of snapshots.
+ */
+static int
+zfs_do_hold(int argc, char **argv)
+{
+ return (zfs_do_hold_rele_impl(argc, argv, B_TRUE));
+}
+
+/*
+ * zfs release [-r] <tag> <snap> ...
+ *
+ * -r Recursively release
+ *
+ * Release a user-hold with the given tag from the list of snapshots.
+ */
+static int
+zfs_do_release(int argc, char **argv)
+{
+ return (zfs_do_hold_rele_impl(argc, argv, B_FALSE));
+}
+
+typedef struct holds_cbdata {
+ boolean_t cb_recursive;
+ const char *cb_snapname;
+ nvlist_t **cb_nvlp;
+ size_t cb_max_namelen;
+ size_t cb_max_taglen;
+} holds_cbdata_t;
+
+#define STRFTIME_FMT_STR "%a %b %e %k:%M %Y"
+#define DATETIME_BUF_LEN (32)
+/*
+ *
+ */
+static void
+print_holds(boolean_t scripted, size_t nwidth, size_t tagwidth, nvlist_t *nvl)
+{
+ int i;
+ nvpair_t *nvp = NULL;
+ char *hdr_cols[] = { "NAME", "TAG", "TIMESTAMP" };
+ const char *col;
+
+ if (!scripted) {
+ for (i = 0; i < 3; i++) {
+ col = gettext(hdr_cols[i]);
+ if (i < 2)
+ (void) printf("%-*s ", i ? tagwidth : nwidth,
+ col);
+ else
+ (void) printf("%s\n", col);
+ }
+ }
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ char *zname = nvpair_name(nvp);
+ nvlist_t *nvl2;
+ nvpair_t *nvp2 = NULL;
+ (void) nvpair_value_nvlist(nvp, &nvl2);
+ while ((nvp2 = nvlist_next_nvpair(nvl2, nvp2)) != NULL) {
+ char tsbuf[DATETIME_BUF_LEN];
+ char *tagname = nvpair_name(nvp2);
+ uint64_t val = 0;
+ time_t time;
+ struct tm t;
+ char sep = scripted ? '\t' : ' ';
+ size_t sepnum = scripted ? 1 : 2;
+
+ (void) nvpair_value_uint64(nvp2, &val);
+ time = (time_t)val;
+ (void) localtime_r(&time, &t);
+ (void) strftime(tsbuf, DATETIME_BUF_LEN,
+ gettext(STRFTIME_FMT_STR), &t);
+
+ (void) printf("%-*s%*c%-*s%*c%s\n", nwidth, zname,
+ sepnum, sep, tagwidth, tagname, sepnum, sep, tsbuf);
+ }
+ }
+}
+
+/*
+ * Generic callback function to list a dataset or snapshot.
+ */
+static int
+holds_callback(zfs_handle_t *zhp, void *data)
+{
+ holds_cbdata_t *cbp = data;
+ nvlist_t *top_nvl = *cbp->cb_nvlp;
+ nvlist_t *nvl = NULL;
+ nvpair_t *nvp = NULL;
+ const char *zname = zfs_get_name(zhp);
+ size_t znamelen = strnlen(zname, ZFS_MAXNAMELEN);
+
+ if (cbp->cb_recursive) {
+ const char *snapname;
+ char *delim = strchr(zname, '@');
+ if (delim == NULL)
+ return (0);
+
+ snapname = delim + 1;
+ if (strcmp(cbp->cb_snapname, snapname))
+ return (0);
+ }
+
+ if (zfs_get_holds(zhp, &nvl) != 0)
+ return (-1);
+
+ if (znamelen > cbp->cb_max_namelen)
+ cbp->cb_max_namelen = znamelen;
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ const char *tag = nvpair_name(nvp);
+ size_t taglen = strnlen(tag, MAXNAMELEN);
+ if (taglen > cbp->cb_max_taglen)
+ cbp->cb_max_taglen = taglen;
+ }
+
+ return (nvlist_add_nvlist(top_nvl, zname, nvl));
+}
+
+/*
+ * zfs holds [-r] <snap> ...
+ *
+ * -r Recursively hold
+ */
+static int
+zfs_do_holds(int argc, char **argv)
+{
+ int errors = 0;
+ int c;
+ int i;
+ boolean_t scripted = B_FALSE;
+ boolean_t recursive = B_FALSE;
+ const char *opts = "rH";
+ nvlist_t *nvl;
+
+ int types = ZFS_TYPE_SNAPSHOT;
+ holds_cbdata_t cb = { 0 };
+
+ int limit = 0;
+ int ret;
+ int flags = 0;
+
+ /* check options */
+ while ((c = getopt(argc, argv, opts)) != -1) {
+ switch (c) {
+ case 'r':
+ recursive = B_TRUE;
+ break;
+ case 'H':
+ scripted = B_TRUE;
+ break;
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ if (recursive) {
+ types |= ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME;
+ flags |= ZFS_ITER_RECURSE;
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ /* check number of arguments */
+ if (argc < 1)
+ usage(B_FALSE);
+
+ if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
+ nomem();
+
+ for (i = 0; i < argc; ++i) {
+ char *snapshot = argv[i];
+ const char *delim;
+ const char *snapname;
+
+ delim = strchr(snapshot, '@');
+ if (delim == NULL) {
+ (void) fprintf(stderr,
+ gettext("'%s' is not a snapshot\n"), snapshot);
+ ++errors;
+ continue;
+ }
+ snapname = delim + 1;
+ if (recursive)
+ snapshot[delim - snapshot] = '\0';
+
+ cb.cb_recursive = recursive;
+ cb.cb_snapname = snapname;
+ cb.cb_nvlp = &nvl;
+
+ /*
+ * 1. collect holds data, set format options
+ */
+ ret = zfs_for_each(argc, argv, flags, types, NULL, NULL, limit,
+ holds_callback, &cb);
+ if (ret != 0)
+ ++errors;
+ }
+
+ /*
+ * 2. print holds data
+ */
+ print_holds(scripted, cb.cb_max_namelen, cb.cb_max_taglen, nvl);
+
+ if (nvlist_empty(nvl))
+ (void) printf(gettext("no datasets available\n"));
+
+ nvlist_free(nvl);
+
+ return (0 != errors);
+}
#define CHECK_SPINNER 30
#define SPINNER_TIME 3 /* seconds */
@@ -2676,19 +5151,18 @@ typedef struct get_all_cbdata {
static int
get_one_dataset(zfs_handle_t *zhp, void *data)
{
- static char spin[] = { '-', '\\', '|', '/' };
+ static char *spin[] = { "-", "\\", "|", "/" };
static int spinval = 0;
static int spincheck = 0;
static time_t last_spin_time = (time_t)0;
- get_all_cbdata_t *cbp = data;
+ get_all_cb_t *cbp = data;
zfs_type_t type = zfs_get_type(zhp);
if (cbp->cb_verbose) {
if (--spincheck < 0) {
time_t now = time(NULL);
if (last_spin_time + SPINNER_TIME < now) {
- (void) printf("\b%c", spin[spinval++ % 4]);
- (void) fflush(stdout);
+ update_progress(spin[spinval++ % 4]);
last_spin_time = now;
}
spincheck = CHECK_SPINNER;
@@ -2698,8 +5172,7 @@ get_one_dataset(zfs_handle_t *zhp, void *data)
/*
* Interate over any nested datasets.
*/
- if (type == ZFS_TYPE_FILESYSTEM &&
- zfs_iter_filesystems(zhp, get_one_dataset, data) != 0) {
+ if (zfs_iter_filesystems(zhp, get_one_dataset, data) != 0) {
zfs_close(zhp);
return (1);
}
@@ -2707,83 +5180,32 @@ get_one_dataset(zfs_handle_t *zhp, void *data)
/*
* Skip any datasets whose type does not match.
*/
- if ((type & cbp->cb_types) == 0) {
+ if ((type & ZFS_TYPE_FILESYSTEM) == 0) {
zfs_close(zhp);
return (0);
}
-
- if (cbp->cb_alloc == cbp->cb_used) {
- zfs_handle_t **handles;
-
- if (cbp->cb_alloc == 0)
- cbp->cb_alloc = 64;
- else
- cbp->cb_alloc *= 2;
-
- handles = safe_malloc(cbp->cb_alloc * sizeof (void *));
-
- if (cbp->cb_handles) {
- bcopy(cbp->cb_handles, handles,
- cbp->cb_used * sizeof (void *));
- free(cbp->cb_handles);
- }
-
- cbp->cb_handles = handles;
- }
-
- cbp->cb_handles[cbp->cb_used++] = zhp;
+ libzfs_add_handle(cbp, zhp);
+ assert(cbp->cb_used <= cbp->cb_alloc);
return (0);
}
static void
-get_all_datasets(uint_t types, zfs_handle_t ***dslist, size_t *count,
- boolean_t verbose)
+get_all_datasets(zfs_handle_t ***dslist, size_t *count, boolean_t verbose)
{
- get_all_cbdata_t cb = { 0 };
- cb.cb_types = types;
+ get_all_cb_t cb = { 0 };
cb.cb_verbose = verbose;
+ cb.cb_getone = get_one_dataset;
- if (verbose) {
- (void) printf("%s: *", gettext("Reading ZFS config"));
- (void) fflush(stdout);
- }
-
+ if (verbose)
+ set_progress_header(gettext("Reading ZFS config"));
(void) zfs_iter_root(g_zfs, get_one_dataset, &cb);
*dslist = cb.cb_handles;
*count = cb.cb_used;
- if (verbose) {
- (void) printf("\b%s\n", gettext("done."));
- }
-}
-
-static int
-dataset_cmp(const void *a, const void *b)
-{
- zfs_handle_t **za = (zfs_handle_t **)a;
- zfs_handle_t **zb = (zfs_handle_t **)b;
- char mounta[MAXPATHLEN];
- char mountb[MAXPATHLEN];
- boolean_t gota, gotb;
-
- if ((gota = (zfs_get_type(*za) == ZFS_TYPE_FILESYSTEM)) != 0)
- verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta,
- sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
- if ((gotb = (zfs_get_type(*zb) == ZFS_TYPE_FILESYSTEM)) != 0)
- verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb,
- sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
-
- if (gota && gotb)
- return (strcmp(mounta, mountb));
-
- if (gota)
- return (-1);
- if (gotb)
- return (1);
-
- return (strcmp(zfs_get_name(a), zfs_get_name(b)));
+ if (verbose)
+ finish_progress(gettext("done."));
}
/*
@@ -2807,216 +5229,179 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol,
const char *cmdname = op == OP_SHARE ? "share" : "mount";
struct mnttab mnt;
uint64_t zoned, canmount;
- zfs_type_t type = zfs_get_type(zhp);
boolean_t shared_nfs, shared_smb;
- assert(type & (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME));
+ assert(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM);
- if (type == ZFS_TYPE_FILESYSTEM) {
- /*
- * Check to make sure we can mount/share this dataset. If we
- * are in the global zone and the filesystem is exported to a
- * local zone, or if we are in a local zone and the
- * filesystem is not exported, then it is an error.
- */
- zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
+ /*
+ * Check to make sure we can mount/share this dataset. If we
+ * are in the global zone and the filesystem is exported to a
+ * local zone, or if we are in a local zone and the
+ * filesystem is not exported, then it is an error.
+ */
+ zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
- if (zoned && getzoneid() == GLOBAL_ZONEID) {
- if (!explicit)
- return (0);
+ if (zoned && getzoneid() == GLOBAL_ZONEID) {
+ if (!explicit)
+ return (0);
- (void) fprintf(stderr, gettext("cannot %s '%s': "
- "dataset is exported to a local zone\n"), cmdname,
- zfs_get_name(zhp));
- return (1);
+ (void) fprintf(stderr, gettext("cannot %s '%s': "
+ "dataset is exported to a local zone\n"), cmdname,
+ zfs_get_name(zhp));
+ return (1);
- } else if (!zoned && getzoneid() != GLOBAL_ZONEID) {
- if (!explicit)
- return (0);
+ } else if (!zoned && getzoneid() != GLOBAL_ZONEID) {
+ if (!explicit)
+ return (0);
- (void) fprintf(stderr, gettext("cannot %s '%s': "
- "permission denied\n"), cmdname,
- zfs_get_name(zhp));
- return (1);
- }
+ (void) fprintf(stderr, gettext("cannot %s '%s': "
+ "permission denied\n"), cmdname,
+ zfs_get_name(zhp));
+ return (1);
+ }
- /*
- * Ignore any filesystems which don't apply to us. This
- * includes those with a legacy mountpoint, or those with
- * legacy share options.
- */
- verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
- sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
- verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts,
- sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0);
- verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshareopts,
- sizeof (smbshareopts), NULL, NULL, 0, B_FALSE) == 0);
-
- if (op == OP_SHARE && strcmp(shareopts, "off") == 0 &&
- strcmp(smbshareopts, "off") == 0) {
- if (!explicit)
- return (0);
+ /*
+ * Ignore any filesystems which don't apply to us. This
+ * includes those with a legacy mountpoint, or those with
+ * legacy share options.
+ */
+ verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
+ sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
+ verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts,
+ sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0);
+ verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshareopts,
+ sizeof (smbshareopts), NULL, NULL, 0, B_FALSE) == 0);
+
+ if (op == OP_SHARE && strcmp(shareopts, "off") == 0 &&
+ strcmp(smbshareopts, "off") == 0) {
+ if (!explicit)
+ return (0);
- (void) fprintf(stderr, gettext("cannot share '%s': "
- "legacy share\n"), zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("use share(1M) to "
- "share this filesystem, or set "
- "sharenfs property on\n"));
- return (1);
- }
+ (void) fprintf(stderr, gettext("cannot share '%s': "
+ "legacy share\n"), zfs_get_name(zhp));
+ (void) fprintf(stderr, gettext("use share(1M) to "
+ "share this filesystem, or set "
+ "sharenfs property on\n"));
+ return (1);
+ }
- /*
- * We cannot share or mount legacy filesystems. If the
- * shareopts is non-legacy but the mountpoint is legacy, we
- * treat it as a legacy share.
- */
- if (strcmp(mountpoint, "legacy") == 0) {
- if (!explicit)
- return (0);
+ /*
+ * We cannot share or mount legacy filesystems. If the
+ * shareopts is non-legacy but the mountpoint is legacy, we
+ * treat it as a legacy share.
+ */
+ if (strcmp(mountpoint, "legacy") == 0) {
+ if (!explicit)
+ return (0);
- (void) fprintf(stderr, gettext("cannot %s '%s': "
- "legacy mountpoint\n"), cmdname, zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("use %s(1M) to "
- "%s this filesystem\n"), cmdname, cmdname);
- return (1);
- }
+ (void) fprintf(stderr, gettext("cannot %s '%s': "
+ "legacy mountpoint\n"), cmdname, zfs_get_name(zhp));
+ (void) fprintf(stderr, gettext("use %s(1M) to "
+ "%s this filesystem\n"), cmdname, cmdname);
+ return (1);
+ }
- if (strcmp(mountpoint, "none") == 0) {
- if (!explicit)
- return (0);
+ if (strcmp(mountpoint, "none") == 0) {
+ if (!explicit)
+ return (0);
- (void) fprintf(stderr, gettext("cannot %s '%s': no "
- "mountpoint set\n"), cmdname, zfs_get_name(zhp));
- return (1);
- }
+ (void) fprintf(stderr, gettext("cannot %s '%s': no "
+ "mountpoint set\n"), cmdname, zfs_get_name(zhp));
+ return (1);
+ }
- /*
- * canmount explicit outcome
- * on no pass through
- * on yes pass through
- * off no return 0
- * off yes display error, return 1
- * noauto no return 0
- * noauto yes pass through
- */
- canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT);
- if (canmount == ZFS_CANMOUNT_OFF) {
+ /*
+ * canmount explicit outcome
+ * on no pass through
+ * on yes pass through
+ * off no return 0
+ * off yes display error, return 1
+ * noauto no return 0
+ * noauto yes pass through
+ */
+ canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT);
+ if (canmount == ZFS_CANMOUNT_OFF) {
+ if (!explicit)
+ return (0);
+
+ (void) fprintf(stderr, gettext("cannot %s '%s': "
+ "'canmount' property is set to 'off'\n"), cmdname,
+ zfs_get_name(zhp));
+ return (1);
+ } else if (canmount == ZFS_CANMOUNT_NOAUTO && !explicit) {
+ return (0);
+ }
+
+ /*
+ * At this point, we have verified that the mountpoint and/or
+ * shareopts are appropriate for auto management. If the
+ * filesystem is already mounted or shared, return (failing
+ * for explicit requests); otherwise mount or share the
+ * filesystem.
+ */
+ switch (op) {
+ case OP_SHARE:
+
+ shared_nfs = zfs_is_shared_nfs(zhp, NULL);
+ shared_smb = zfs_is_shared_smb(zhp, NULL);
+
+ if (shared_nfs && shared_smb ||
+ (shared_nfs && strcmp(shareopts, "on") == 0 &&
+ strcmp(smbshareopts, "off") == 0) ||
+ (shared_smb && strcmp(smbshareopts, "on") == 0 &&
+ strcmp(shareopts, "off") == 0)) {
if (!explicit)
return (0);
- (void) fprintf(stderr, gettext("cannot %s '%s': "
- "'canmount' property is set to 'off'\n"), cmdname,
+ (void) fprintf(stderr, gettext("cannot share "
+ "'%s': filesystem already shared\n"),
zfs_get_name(zhp));
return (1);
- } else if (canmount == ZFS_CANMOUNT_NOAUTO && !explicit) {
- return (0);
}
- /*
- * At this point, we have verified that the mountpoint and/or
- * shareopts are appropriate for auto management. If the
- * filesystem is already mounted or shared, return (failing
- * for explicit requests); otherwise mount or share the
- * filesystem.
- */
- switch (op) {
- case OP_SHARE:
-
- shared_nfs = zfs_is_shared_nfs(zhp, NULL);
- shared_smb = zfs_is_shared_smb(zhp, NULL);
-
- if (shared_nfs && shared_smb ||
- (shared_nfs && strcmp(shareopts, "on") == 0 &&
- strcmp(smbshareopts, "off") == 0) ||
- (shared_smb && strcmp(smbshareopts, "on") == 0 &&
- strcmp(shareopts, "off") == 0)) {
- if (!explicit)
- return (0);
-
- (void) fprintf(stderr, gettext("cannot share "
- "'%s': filesystem already shared\n"),
- zfs_get_name(zhp));
- return (1);
- }
+ if (!zfs_is_mounted(zhp, NULL) &&
+ zfs_mount(zhp, NULL, 0) != 0)
+ return (1);
- if (!zfs_is_mounted(zhp, NULL) &&
- zfs_mount(zhp, NULL, 0) != 0)
+ if (protocol == NULL) {
+ if (zfs_shareall(zhp) != 0)
return (1);
-
- if (protocol == NULL) {
- if (zfs_shareall(zhp) != 0)
- return (1);
- } else if (strcmp(protocol, "nfs") == 0) {
- if (zfs_share_nfs(zhp))
- return (1);
- } else if (strcmp(protocol, "smb") == 0) {
- if (zfs_share_smb(zhp))
- return (1);
- } else {
- (void) fprintf(stderr, gettext("cannot share "
- "'%s': invalid share type '%s' "
- "specified\n"),
- zfs_get_name(zhp), protocol);
+ } else if (strcmp(protocol, "nfs") == 0) {
+ if (zfs_share_nfs(zhp))
return (1);
- }
-
- break;
-
- case OP_MOUNT:
- if (options == NULL)
- mnt.mnt_mntopts = "";
- else
- mnt.mnt_mntopts = (char *)options;
-
- if (!hasmntopt(&mnt, MNTOPT_REMOUNT) &&
- zfs_is_mounted(zhp, NULL)) {
- if (!explicit)
- return (0);
-
- (void) fprintf(stderr, gettext("cannot mount "
- "'%s': filesystem already mounted\n"),
- zfs_get_name(zhp));
- return (1);
- }
-
- if (zfs_mount(zhp, options, flags) != 0)
+ } else if (strcmp(protocol, "smb") == 0) {
+ if (zfs_share_smb(zhp))
return (1);
- break;
+ } else {
+ (void) fprintf(stderr, gettext("cannot share "
+ "'%s': invalid share type '%s' "
+ "specified\n"),
+ zfs_get_name(zhp), protocol);
+ return (1);
}
- } else {
- assert(op == OP_SHARE);
- /*
- * Ignore any volumes that aren't shared.
- */
- verify(zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI, shareopts,
- sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0);
+ break;
- if (strcmp(shareopts, "off") == 0) {
- if (!explicit)
- return (0);
-
- (void) fprintf(stderr, gettext("cannot share '%s': "
- "'shareiscsi' property not set\n"),
- zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("set 'shareiscsi' "
- "property or use iscsitadm(1M) to share this "
- "volume\n"));
- return (1);
- }
+ case OP_MOUNT:
+ if (options == NULL)
+ mnt.mnt_mntopts = "";
+ else
+ mnt.mnt_mntopts = (char *)options;
- if (zfs_is_shared_iscsi(zhp)) {
+ if (!hasmntopt(&mnt, MNTOPT_REMOUNT) &&
+ zfs_is_mounted(zhp, NULL)) {
if (!explicit)
return (0);
- (void) fprintf(stderr, gettext("cannot share "
- "'%s': volume already shared\n"),
+ (void) fprintf(stderr, gettext("cannot mount "
+ "'%s': filesystem already mounted\n"),
zfs_get_name(zhp));
return (1);
}
- if (zfs_share_iscsi(zhp) != 0)
+ if (zfs_mount(zhp, options, flags) != 0)
return (1);
+ break;
}
return (0);
@@ -3028,19 +5413,16 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol,
static void
report_mount_progress(int current, int total)
{
- static int len;
- static char *reverse = "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
- "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b";
- static time_t last_progress_time;
+ static time_t last_progress_time = 0;
time_t now = time(NULL);
+ char info[32];
/* report 1..n instead of 0..n-1 */
++current;
/* display header if we're here for the first time */
if (current == 1) {
- (void) printf(gettext("Mounting ZFS filesystems: "));
- len = 0;
+ set_progress_header(gettext("Mounting ZFS filesystems"));
} else if (current != total && last_progress_time + MOUNT_TIME >= now) {
/* too soon to report again */
return;
@@ -3048,13 +5430,12 @@ report_mount_progress(int current, int total)
last_progress_time = now;
- /* back up to prepare for overwriting */
- if (len)
- (void) printf("%*.*s", len, len, reverse);
+ (void) sprintf(info, "(%d/%d)", current, total);
- /* We put a newline at the end if this is the last one. */
- len = printf("(%d/%d)%s", current, total, current == total ? "\n" : "");
- (void) fflush(stdout);
+ if (current == total)
+ finish_progress(info);
+ else
+ update_progress(info);
}
static void
@@ -3083,7 +5464,7 @@ share_mount(int op, int argc, char **argv)
boolean_t verbose = B_FALSE;
int c, ret = 0;
char *options = NULL;
- int types, flags = 0;
+ int flags = 0;
/* check options */
while ((c = getopt(argc, argv, op == OP_MOUNT ? ":avo:O" : "a"))
@@ -3133,24 +5514,16 @@ share_mount(int op, int argc, char **argv)
size_t i, count = 0;
char *protocol = NULL;
- if (op == OP_MOUNT) {
- types = ZFS_TYPE_FILESYSTEM;
- } else if (argc > 0) {
- if (strcmp(argv[0], "nfs") == 0 ||
- strcmp(argv[0], "smb") == 0) {
- types = ZFS_TYPE_FILESYSTEM;
- } else if (strcmp(argv[0], "iscsi") == 0) {
- types = ZFS_TYPE_VOLUME;
- } else {
+ if (op == OP_SHARE && argc > 0) {
+ if (strcmp(argv[0], "nfs") != 0 &&
+ strcmp(argv[0], "smb") != 0) {
(void) fprintf(stderr, gettext("share type "
- "must be 'nfs', 'smb' or 'iscsi'\n"));
+ "must be 'nfs' or 'smb'\n"));
usage(B_FALSE);
}
protocol = argv[0];
argc--;
argv++;
- } else {
- types = ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME;
}
if (argc != 0) {
@@ -3158,12 +5531,13 @@ share_mount(int op, int argc, char **argv)
usage(B_FALSE);
}
- get_all_datasets(types, &dslist, &count, verbose);
+ start_progress_timer();
+ get_all_datasets(&dslist, &count, verbose);
if (count == 0)
return (0);
- qsort(dslist, count, sizeof (void *), dataset_cmp);
+ qsort(dslist, count, sizeof (void *), libzfs_dataset_cmp);
for (i = 0; i < count; i++) {
if (verbose)
@@ -3177,8 +5551,7 @@ share_mount(int op, int argc, char **argv)
free(dslist);
} else if (argc == 0) {
- struct statfs *sfs;
- int i, n;
+ struct mnttab entry;
if ((op == OP_SHARE) || (options != NULL)) {
(void) fprintf(stderr, gettext("missing filesystem "
@@ -3191,33 +5564,27 @@ share_mount(int op, int argc, char **argv)
* display any active ZFS mounts. We hide any snapshots, since
* they are controlled automatically.
*/
- if ((n = getmntinfo(&sfs, MNT_WAIT)) == 0) {
- fprintf(stderr, "getmntinfo(): %s\n", strerror(errno));
- return (0);
- }
- for (i = 0; i < n; i++) {
- if (strcmp(sfs[i].f_fstypename, MNTTYPE_ZFS) != 0 ||
- strchr(sfs[i].f_mntfromname, '@') != NULL)
+ rewind(mnttab_file);
+ while (getmntent(mnttab_file, &entry) == 0) {
+ if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0 ||
+ strchr(entry.mnt_special, '@') != NULL)
continue;
- (void) printf("%-30s %s\n", sfs[i].f_mntfromname,
- sfs[i].f_mntonname);
+ (void) printf("%-30s %s\n", entry.mnt_special,
+ entry.mnt_mountp);
}
} else {
zfs_handle_t *zhp;
- types = ZFS_TYPE_FILESYSTEM;
- if (op == OP_SHARE)
- types |= ZFS_TYPE_VOLUME;
-
if (argc > 1) {
(void) fprintf(stderr,
gettext("too many arguments\n"));
usage(B_FALSE);
}
- if ((zhp = zfs_open(g_zfs, argv[0], types)) == NULL) {
+ if ((zhp = zfs_open(g_zfs, argv[0],
+ ZFS_TYPE_FILESYSTEM)) == NULL) {
ret = 1;
} else {
ret = share_mount_one(zhp, op, flags, NULL, B_TRUE,
@@ -3230,7 +5597,7 @@ share_mount(int op, int argc, char **argv)
}
/*
- * zfs mount -a [nfs | iscsi]
+ * zfs mount -a [nfs]
* zfs mount filesystem
*
* Mount all filesystems, or mount the given filesystem.
@@ -3242,7 +5609,7 @@ zfs_do_mount(int argc, char **argv)
}
/*
- * zfs share -a [nfs | iscsi | smb]
+ * zfs share -a [nfs | smb]
* zfs share filesystem
*
* Share all filesystems, or share the given filesystem.
@@ -3280,7 +5647,7 @@ unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual)
zfs_handle_t *zhp;
int ret;
struct stat64 statbuf;
- struct mnttab search = { 0 }, entry;
+ struct extmnttab entry;
const char *cmdname = (op == OP_SHARE) ? "unshare" : "unmount";
ino_t path_inode;
@@ -3300,9 +5667,26 @@ unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual)
/*
* Search for the given (major,minor) pair in the mount table.
*/
- search.mnt_mountp = path;
+#ifdef sun
rewind(mnttab_file);
- if (getmntany(mnttab_file, &entry, &search) != 0) {
+ while ((ret = getextmntent(mnttab_file, &entry, 0)) == 0) {
+ if (entry.mnt_major == major(statbuf.st_dev) &&
+ entry.mnt_minor == minor(statbuf.st_dev))
+ break;
+ }
+#else
+ {
+ struct statfs sfs;
+
+ if (statfs(path, &sfs) != 0) {
+ (void) fprintf(stderr, "%s: %s\n", path,
+ strerror(errno));
+ ret = -1;
+ }
+ statfs2mnttab(&sfs, &entry);
+ }
+#endif
+ if (ret != 0) {
if (op == OP_SHARE) {
(void) fprintf(stderr, gettext("cannot %s '%s': not "
"currently mounted\n"), cmdname, path);
@@ -3392,9 +5776,9 @@ unshare_unmount(int op, int argc, char **argv)
int do_all = 0;
int flags = 0;
int ret = 0;
- int types, c;
+ int c;
zfs_handle_t *zhp;
- char nfsiscsi_mnt_prop[ZFS_MAXPROPLEN];
+ char nfs_mnt_prop[ZFS_MAXPROPLEN];
char sharesmb[ZFS_MAXPROPLEN];
/* check options */
@@ -3431,51 +5815,37 @@ unshare_unmount(int op, int argc, char **argv)
* the special type (dataset name), and walk the result in
* reverse to make sure to get any snapshots first.
*/
+ struct mnttab entry;
uu_avl_pool_t *pool;
uu_avl_t *tree;
unshare_unmount_node_t *node;
uu_avl_index_t idx;
uu_avl_walk_t *walk;
- struct statfs *sfs;
- int i, n;
if (argc != 0) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}
- if ((pool = uu_avl_pool_create("unmount_pool",
+ if (((pool = uu_avl_pool_create("unmount_pool",
sizeof (unshare_unmount_node_t),
offsetof(unshare_unmount_node_t, un_avlnode),
- unshare_unmount_compare,
- UU_DEFAULT)) == NULL) {
- (void) fprintf(stderr, gettext("internal error: "
- "out of memory\n"));
- exit(1);
- }
-
- if ((tree = uu_avl_create(pool, NULL, UU_DEFAULT)) == NULL) {
- (void) fprintf(stderr, gettext("internal error: "
- "out of memory\n"));
- exit(1);
- }
+ unshare_unmount_compare, UU_DEFAULT)) == NULL) ||
+ ((tree = uu_avl_create(pool, NULL, UU_DEFAULT)) == NULL))
+ nomem();
- if ((n = getmntinfo(&sfs, MNT_WAIT)) == 0) {
- (void) fprintf(stderr, gettext("internal error: "
- "getmntinfo() failed\n"));
- exit(1);
- }
- for (i = 0; i < n; i++) {
+ rewind(mnttab_file);
+ while (getmntent(mnttab_file, &entry) == 0) {
/* ignore non-ZFS entries */
- if (strcmp(sfs[i].f_fstypename, MNTTYPE_ZFS) != 0)
+ if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
continue;
/* ignore snapshots */
- if (strchr(sfs[i].f_mntfromname, '@') != NULL)
+ if (strchr(entry.mnt_special, '@') != NULL)
continue;
- if ((zhp = zfs_open(g_zfs, sfs[i].f_mntfromname,
+ if ((zhp = zfs_open(g_zfs, entry.mnt_special,
ZFS_TYPE_FILESYSTEM)) == NULL) {
ret = 1;
continue;
@@ -3484,25 +5854,25 @@ unshare_unmount(int op, int argc, char **argv)
switch (op) {
case OP_SHARE:
verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS,
- nfsiscsi_mnt_prop,
- sizeof (nfsiscsi_mnt_prop),
+ nfs_mnt_prop,
+ sizeof (nfs_mnt_prop),
NULL, NULL, 0, B_FALSE) == 0);
- if (strcmp(nfsiscsi_mnt_prop, "off") != 0)
+ if (strcmp(nfs_mnt_prop, "off") != 0)
break;
verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB,
- nfsiscsi_mnt_prop,
- sizeof (nfsiscsi_mnt_prop),
+ nfs_mnt_prop,
+ sizeof (nfs_mnt_prop),
NULL, NULL, 0, B_FALSE) == 0);
- if (strcmp(nfsiscsi_mnt_prop, "off") == 0)
+ if (strcmp(nfs_mnt_prop, "off") == 0)
continue;
break;
case OP_MOUNT:
/* Ignore legacy mounts */
verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT,
- nfsiscsi_mnt_prop,
- sizeof (nfsiscsi_mnt_prop),
+ nfs_mnt_prop,
+ sizeof (nfs_mnt_prop),
NULL, NULL, 0, B_FALSE) == 0);
- if (strcmp(nfsiscsi_mnt_prop, "legacy") == 0)
+ if (strcmp(nfs_mnt_prop, "legacy") == 0)
continue;
/* Ignore canmount=noauto mounts */
if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) ==
@@ -3514,13 +5884,7 @@ unshare_unmount(int op, int argc, char **argv)
node = safe_malloc(sizeof (unshare_unmount_node_t));
node->un_zhp = zhp;
-
- if ((node->un_mountp = strdup(sfs[i].f_mntonname)) ==
- NULL) {
- (void) fprintf(stderr, gettext("internal error:"
- " out of memory\n"));
- exit(1);
- }
+ node->un_mountp = safe_strdup(entry.mnt_mountp);
uu_avl_node_init(node, &node->un_avlnode, pool);
@@ -3538,11 +5902,8 @@ unshare_unmount(int op, int argc, char **argv)
* removing it from the AVL tree in the process.
*/
if ((walk = uu_avl_walk_start(tree,
- UU_WALK_REVERSE | UU_WALK_ROBUST)) == NULL) {
- (void) fprintf(stderr,
- gettext("internal error: out of memory"));
- exit(1);
- }
+ UU_WALK_REVERSE | UU_WALK_ROBUST)) == NULL)
+ nomem();
while ((node = uu_avl_walk_next(walk)) != NULL) {
uu_avl_remove(tree, node);
@@ -3570,29 +5931,6 @@ unshare_unmount(int op, int argc, char **argv)
uu_avl_destroy(tree);
uu_avl_pool_destroy(pool);
- if (op == OP_SHARE) {
- /*
- * Finally, unshare any volumes shared via iSCSI.
- */
- zfs_handle_t **dslist = NULL;
- size_t i, count = 0;
-
- get_all_datasets(ZFS_TYPE_VOLUME, &dslist, &count,
- B_FALSE);
-
- if (count != 0) {
- qsort(dslist, count, sizeof (void *),
- dataset_cmp);
-
- for (i = 0; i < count; i++) {
- if (zfs_unshare_iscsi(dslist[i]) != 0)
- ret = 1;
- zfs_close(dslist[i]);
- }
-
- free(dslist);
- }
- }
} else {
if (argc != 1) {
if (argc == 0)
@@ -3614,91 +5952,63 @@ unshare_unmount(int op, int argc, char **argv)
return (unshare_unmount_path(op, argv[0],
flags, B_FALSE));
- types = ZFS_TYPE_FILESYSTEM;
- if (op == OP_SHARE)
- types |= ZFS_TYPE_VOLUME;
-
- if ((zhp = zfs_open(g_zfs, argv[0], types)) == NULL)
+ if ((zhp = zfs_open(g_zfs, argv[0],
+ ZFS_TYPE_FILESYSTEM)) == NULL)
return (1);
- if (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
- verify(zfs_prop_get(zhp, op == OP_SHARE ?
- ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT,
- nfsiscsi_mnt_prop, sizeof (nfsiscsi_mnt_prop), NULL,
- NULL, 0, B_FALSE) == 0);
+ verify(zfs_prop_get(zhp, op == OP_SHARE ?
+ ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT,
+ nfs_mnt_prop, sizeof (nfs_mnt_prop), NULL,
+ NULL, 0, B_FALSE) == 0);
- switch (op) {
- case OP_SHARE:
- verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS,
- nfsiscsi_mnt_prop,
- sizeof (nfsiscsi_mnt_prop),
- NULL, NULL, 0, B_FALSE) == 0);
- verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB,
- sharesmb, sizeof (sharesmb), NULL, NULL,
- 0, B_FALSE) == 0);
-
- if (strcmp(nfsiscsi_mnt_prop, "off") == 0 &&
- strcmp(sharesmb, "off") == 0) {
- (void) fprintf(stderr, gettext("cannot "
- "unshare '%s': legacy share\n"),
- zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("use "
- "unshare(1M) to unshare this "
- "filesystem\n"));
- ret = 1;
- } else if (!zfs_is_shared(zhp)) {
- (void) fprintf(stderr, gettext("cannot "
- "unshare '%s': not currently "
- "shared\n"), zfs_get_name(zhp));
- ret = 1;
- } else if (zfs_unshareall(zhp) != 0) {
- ret = 1;
- }
- break;
-
- case OP_MOUNT:
- if (strcmp(nfsiscsi_mnt_prop, "legacy") == 0) {
- (void) fprintf(stderr, gettext("cannot "
- "unmount '%s': legacy "
- "mountpoint\n"), zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("use "
- "umount(1M) to unmount this "
- "filesystem\n"));
- ret = 1;
- } else if (!zfs_is_mounted(zhp, NULL)) {
- (void) fprintf(stderr, gettext("cannot "
- "unmount '%s': not currently "
- "mounted\n"),
- zfs_get_name(zhp));
- ret = 1;
- } else if (zfs_unmountall(zhp, flags) != 0) {
- ret = 1;
- }
- break;
- }
- } else {
- assert(op == OP_SHARE);
-
- verify(zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI,
- nfsiscsi_mnt_prop, sizeof (nfsiscsi_mnt_prop),
+ switch (op) {
+ case OP_SHARE:
+ verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS,
+ nfs_mnt_prop,
+ sizeof (nfs_mnt_prop),
NULL, NULL, 0, B_FALSE) == 0);
+ verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB,
+ sharesmb, sizeof (sharesmb), NULL, NULL,
+ 0, B_FALSE) == 0);
- if (strcmp(nfsiscsi_mnt_prop, "off") == 0) {
- (void) fprintf(stderr, gettext("cannot unshare "
- "'%s': 'shareiscsi' property not set\n"),
+ if (strcmp(nfs_mnt_prop, "off") == 0 &&
+ strcmp(sharesmb, "off") == 0) {
+ (void) fprintf(stderr, gettext("cannot "
+ "unshare '%s': legacy share\n"),
zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("set "
- "'shareiscsi' property or use "
- "iscsitadm(1M) to share this volume\n"));
+ (void) fprintf(stderr, gettext("use "
+ "unshare(1M) to unshare this "
+ "filesystem\n"));
ret = 1;
- } else if (!zfs_is_shared_iscsi(zhp)) {
+ } else if (!zfs_is_shared(zhp)) {
(void) fprintf(stderr, gettext("cannot "
- "unshare '%s': not currently shared\n"),
+ "unshare '%s': not currently "
+ "shared\n"), zfs_get_name(zhp));
+ ret = 1;
+ } else if (zfs_unshareall(zhp) != 0) {
+ ret = 1;
+ }
+ break;
+
+ case OP_MOUNT:
+ if (strcmp(nfs_mnt_prop, "legacy") == 0) {
+ (void) fprintf(stderr, gettext("cannot "
+ "unmount '%s': legacy "
+ "mountpoint\n"), zfs_get_name(zhp));
+ (void) fprintf(stderr, gettext("use "
+ "umount(1M) to unmount this "
+ "filesystem\n"));
+ ret = 1;
+ } else if (!zfs_is_mounted(zhp, NULL)) {
+ (void) fprintf(stderr, gettext("cannot "
+ "unmount '%s': not currently "
+ "mounted\n"),
zfs_get_name(zhp));
ret = 1;
- } else if (zfs_unshare_iscsi(zhp) != 0) {
+ } else if (zfs_unmountall(zhp, flags) != 0) {
ret = 1;
}
+ break;
}
zfs_close(zhp);
@@ -3793,16 +6103,6 @@ zfs_do_unjail(int argc, char **argv)
return (do_jail(argc, argv, 0));
}
-/* ARGSUSED */
-static int
-zfs_do_python(int argc, char **argv)
-{
- (void) execv(pypath, argv-1);
- (void) fprintf(stderr, "internal error: %s not found\n", pypath);
- (void) fprintf(stderr, " install sysutils/py-zfs port to correct this\n");
- return (-1);
-}
-
/*
* Called when invoked as /etc/fs/zfs/mount. Do the mount if the mountpoint is
* 'legacy'. Otherwise, complain that use should be using 'zfs mount'.
@@ -3825,14 +6125,10 @@ manual_mount(int argc, char **argv)
(void) strlcpy(mntopts, optarg, sizeof (mntopts));
break;
case 'O':
-#if 0 /* FreeBSD: No support for MS_OVERLAY. */
flags |= MS_OVERLAY;
-#endif
break;
case 'm':
-#if 0 /* FreeBSD: No support for MS_NOMNTTAB. */
flags |= MS_NOMNTTAB;
-#endif
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
@@ -3943,27 +6239,6 @@ manual_unmount(int argc, char **argv)
}
static int
-volcheck(zpool_handle_t *zhp, void *data)
-{
- boolean_t isinit = *((boolean_t *)data);
-
- if (isinit)
- return (zpool_create_zvol_links(zhp));
- else
- return (zpool_remove_zvol_links(zhp));
-}
-
-/*
- * Iterate over all pools in the system and either create or destroy /dev/zvol
- * links, depending on the value of 'isinit'.
- */
-static int
-do_volcheck(boolean_t isinit)
-{
- return (zpool_iter(g_zfs, volcheck, &isinit) ? 1 : 0);
-}
-
-static int
find_command_idx(char *command, int *idx)
{
int i;
@@ -3980,6 +6255,81 @@ find_command_idx(char *command, int *idx)
return (1);
}
+static int
+zfs_do_diff(int argc, char **argv)
+{
+ zfs_handle_t *zhp;
+ int flags = 0;
+ char *tosnap = NULL;
+ char *fromsnap = NULL;
+ char *atp, *copy;
+ int err;
+ int c;
+
+ while ((c = getopt(argc, argv, "FHt")) != -1) {
+ switch (c) {
+ case 'F':
+ flags |= ZFS_DIFF_CLASSIFY;
+ break;
+ case 'H':
+ flags |= ZFS_DIFF_PARSEABLE;
+ break;
+ case 't':
+ flags |= ZFS_DIFF_TIMESTAMP;
+ break;
+ default:
+ (void) fprintf(stderr,
+ gettext("invalid option '%c'\n"), optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 1) {
+ (void) fprintf(stderr,
+ gettext("must provide at least one snapshot name\n"));
+ usage(B_FALSE);
+ }
+
+ if (argc > 2) {
+ (void) fprintf(stderr, gettext("too many arguments\n"));
+ usage(B_FALSE);
+ }
+
+ fromsnap = argv[0];
+ tosnap = (argc == 2) ? argv[1] : NULL;
+
+ copy = NULL;
+ if (*fromsnap != '@')
+ copy = strdup(fromsnap);
+ else if (tosnap)
+ copy = strdup(tosnap);
+ if (copy == NULL)
+ usage(B_FALSE);
+
+ if (atp = strchr(copy, '@'))
+ *atp = '\0';
+
+ if ((zhp = zfs_open(g_zfs, copy, ZFS_TYPE_FILESYSTEM)) == NULL)
+ return (1);
+
+ free(copy);
+
+ /*
+ * Ignore SIGPIPE so that the library can give us
+ * information on any failure
+ */
+ (void) sigignore(SIGPIPE);
+
+ err = zfs_show_diffs(zhp, STDOUT_FILENO, fromsnap, tosnap, flags);
+
+ zfs_close(zhp);
+
+ return (err != 0);
+}
+
int
main(int argc, char **argv)
{
@@ -4049,15 +6399,6 @@ main(int argc, char **argv)
usage(B_TRUE);
/*
- * 'volinit' and 'volfini' do not appear in the usage message,
- * so we have to special case them here.
- */
- if (strcmp(cmdname, "volinit") == 0)
- return (do_volcheck(B_TRUE));
- else if (strcmp(cmdname, "volfini") == 0)
- return (do_volcheck(B_FALSE));
-
- /*
* Run the appropriate command.
*/
libzfs_mnttab_cache(g_zfs, B_TRUE);
diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_util.h b/cddl/contrib/opensolaris/cmd/zfs/zfs_util.h
index c7f2f16..3ddff9e 100644
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs_util.h
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_util.h
@@ -19,15 +19,12 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _ZFS_UTIL_H
#define _ZFS_UTIL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <libzfs.h>
#ifdef __cplusplus
@@ -35,6 +32,7 @@ extern "C" {
#endif
void * safe_malloc(size_t size);
+void nomem(void);
libzfs_handle_t *g_zfs;
#ifdef __cplusplus
diff --git a/cddl/contrib/opensolaris/cmd/zinject/translate.c b/cddl/contrib/opensolaris/cmd/zinject/translate.c
index da26cd6..442f220 100644
--- a/cddl/contrib/opensolaris/cmd/zinject/translate.c
+++ b/cddl/contrib/opensolaris/cmd/zinject/translate.c
@@ -19,14 +19,11 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <libzfs.h>
-#undef verify /* both libzfs.h and zfs_context.h want to define this */
-
#include <sys/zfs_context.h>
#include <errno.h>
@@ -49,9 +46,6 @@
#include "zinject.h"
-#include <assert.h>
-#define verify assert
-
extern void kernel_init(int);
extern void kernel_fini(void);
@@ -70,6 +64,18 @@ ziprintf(const char *fmt, ...)
va_end(ap);
}
+static void
+compress_slashes(const char *src, char *dest)
+{
+ while (*src != '\0') {
+ *dest = *src++;
+ while (*dest == '/' && *src == '/')
+ ++src;
+ ++dest;
+ }
+ *dest = '\0';
+}
+
/*
* Given a full path to a file, translate into a dataset name and a relative
* path within the dataset. 'dataset' must be at least MAXNAMELEN characters,
@@ -77,11 +83,14 @@ ziprintf(const char *fmt, ...)
* buffer, which we need later to get the object ID.
*/
static int
-parse_pathname(const char *fullpath, char *dataset, char *relpath,
+parse_pathname(const char *inpath, char *dataset, char *relpath,
struct stat64 *statbuf)
{
struct statfs sfs;
const char *rel;
+ char fullpath[MAXPATHLEN];
+
+ compress_slashes(inpath, fullpath);
if (fullpath[0] != '/') {
(void) fprintf(stderr, "invalid object '%s': must be full "
@@ -148,8 +157,8 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
*/
sync();
- if ((err = dmu_objset_open(dataset, DMU_OST_ZFS,
- DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) {
+ err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os);
+ if (err != 0) {
(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
dataset, strerror(err));
return (-1);
@@ -158,7 +167,7 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
record->zi_objset = dmu_objset_id(os);
record->zi_object = statbuf->st_ino;
- dmu_objset_close(os);
+ dmu_objset_disown(os, FTAG);
return (0);
}
@@ -233,17 +242,17 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range,
* Get the dnode associated with object, so we can calculate the block
* size.
*/
- if ((err = dmu_objset_open(dataset, DMU_OST_ANY,
- DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) {
+ if ((err = dmu_objset_own(dataset, DMU_OST_ANY,
+ B_TRUE, FTAG, &os)) != 0) {
(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
dataset, strerror(err));
goto out;
}
if (record->zi_object == 0) {
- dn = os->os->os_meta_dnode;
+ dn = DMU_META_DNODE(os);
} else {
- err = dnode_hold(os->os, record->zi_object, FTAG, &dn);
+ err = dnode_hold(os, record->zi_object, FTAG, &dn);
if (err != 0) {
(void) fprintf(stderr, "failed to hold dnode "
"for object %llu\n",
@@ -292,11 +301,11 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range,
ret = 0;
out:
if (dn) {
- if (dn != os->os->os_meta_dnode)
+ if (dn != DMU_META_DNODE(os))
dnode_rele(dn, FTAG);
}
if (os)
- dmu_objset_close(os);
+ dmu_objset_disown(os, FTAG);
return (ret);
}
@@ -333,8 +342,8 @@ translate_record(err_type_t type, const char *object, const char *range,
case TYPE_CONFIG:
record->zi_type = DMU_OT_PACKED_NVLIST;
break;
- case TYPE_BPLIST:
- record->zi_type = DMU_OT_BPLIST;
+ case TYPE_BPOBJ:
+ record->zi_type = DMU_OT_BPOBJ;
break;
case TYPE_SPACEMAP:
record->zi_type = DMU_OT_SPACE_MAP;
@@ -455,6 +464,14 @@ translate_device(const char *pool, const char *device, err_type_t label_type,
record->zi_start = offsetof(vdev_label_t, vl_vdev_phys);
record->zi_end = record->zi_start + VDEV_PHYS_SIZE - 1;
break;
+ case TYPE_LABEL_PAD1:
+ record->zi_start = offsetof(vdev_label_t, vl_pad1);
+ record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
+ break;
+ case TYPE_LABEL_PAD2:
+ record->zi_start = offsetof(vdev_label_t, vl_pad2);
+ record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
+ break;
}
return (0);
}
diff --git a/cddl/contrib/opensolaris/cmd/zinject/zinject.c b/cddl/contrib/opensolaris/cmd/zinject/zinject.c
index e8327e8..51d2fc9 100644
--- a/cddl/contrib/opensolaris/cmd/zinject/zinject.c
+++ b/cddl/contrib/opensolaris/cmd/zinject/zinject.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -42,12 +41,12 @@
* any attempt to read from the device will return EIO, but any attempt to
* reopen the device will also return ENXIO.
* For label faults, the -L option must be specified. This allows faults
- * to be injected into either the nvlist or uberblock region of all the labels
- * for the specified device.
+ * to be injected into either the nvlist, uberblock, pad1, or pad2 region
+ * of all the labels for the specified device.
*
* This form of the command looks like:
*
- * zinject -d device [-e errno] [-L <uber | nvlist>] pool
+ * zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
*
*
* DATA FAULTS
@@ -70,7 +69,7 @@
* mos Any data in the MOS
* mosdir object directory
* config pool configuration
- * bplist blkptr list
+ * bpobj blkptr list
* spacemap spacemap
* metaslab metaslab
* errlog persistent error log
@@ -167,11 +166,13 @@ static const char *errtable[TYPE_INVAL] = {
"mosdir",
"metaslab",
"config",
- "bplist",
+ "bpobj",
"spacemap",
"errlog",
"uber",
- "nvlist"
+ "nvlist",
+ "pad1",
+ "pad2"
};
static err_type_t
@@ -195,8 +196,8 @@ type_to_name(uint64_t type)
return ("metaslab");
case DMU_OT_PACKED_NVLIST:
return ("config");
- case DMU_OT_BPLIST:
- return ("bplist");
+ case DMU_OT_BPOBJ:
+ return ("bpobj");
case DMU_OT_SPACE_MAP:
return ("spacemap");
case DMU_OT_ERROR_LOG:
@@ -225,10 +226,27 @@ usage(void)
"\t\tClear the particular record (if given a numeric ID), or\n"
"\t\tall records if 'all' is specificed.\n"
"\n"
- "\tzinject -d device [-e errno] [-L <nvlist|uber>] [-F] pool\n"
+ "\tzinject -p <function name> pool\n"
+ "\t\tInject a panic fault at the specified function. Only \n"
+ "\t\tfunctions which call spa_vdev_config_exit(), or \n"
+ "\t\tspa_vdev_exit() will trigger a panic.\n"
+ "\n"
+ "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
+ "\t [-T <read|write|free|claim|all> pool\n"
"\t\tInject a fault into a particular device or the device's\n"
- "\t\tlabel. Label injection can either be 'nvlist' or 'uber'.\n"
- "\t\t'errno' can either be 'nxio' (the default) or 'io'.\n"
+ "\t\tlabel. Label injection can either be 'nvlist', 'uber',\n "
+ "\t\t'pad1', or 'pad2'.\n"
+ "\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n"
+ "\n"
+ "\tzinject -d device -A <degrade|fault> pool\n"
+ "\t\tPerform a specific action on a particular device\n"
+ "\n"
+ "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
+ "\t\tCause the pool to stop writing blocks yet not\n"
+ "\t\treport errors for a duration. Simulates buggy hardware\n"
+ "\t\tthat fails to honor cache flush requests.\n"
+ "\t\tDefault duration is 30 seconds. The machine is panicked\n"
+ "\t\tat the end of the duration.\n"
"\n"
"\tzinject -b objset:object:level:blkid pool\n"
"\n"
@@ -270,7 +288,7 @@ usage(void)
"\t\t\ton a ZFS filesystem.\n"
"\n"
"\t-t <mos>\tInject errors into the MOS for objects of the given\n"
- "\t\t\ttype. Valid types are: mos, mosdir, config, bplist,\n"
+ "\t\t\ttype. Valid types are: mos, mosdir, config, bpobj,\n"
"\t\t\tspacemap, metaslab, errlog. The only valid <object> is\n"
"\t\t\tthe poolname.\n");
}
@@ -289,6 +307,12 @@ iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
&zc.zc_inject_record, data)) != 0)
return (ret);
+ if (errno != ENOENT) {
+ (void) fprintf(stderr, "Unable to list handlers: %s\n",
+ strerror(errno));
+ return (-1);
+ }
+
return (0);
}
@@ -298,7 +322,7 @@ print_data_handler(int id, const char *pool, zinject_record_t *record,
{
int *count = data;
- if (record->zi_guid != 0)
+ if (record->zi_guid != 0 || record->zi_func[0] != '\0')
return (0);
if (*count == 0) {
@@ -330,7 +354,7 @@ print_device_handler(int id, const char *pool, zinject_record_t *record,
{
int *count = data;
- if (record->zi_guid == 0)
+ if (record->zi_guid == 0 || record->zi_func[0] != '\0')
return (0);
if (*count == 0) {
@@ -346,6 +370,27 @@ print_device_handler(int id, const char *pool, zinject_record_t *record,
return (0);
}
+static int
+print_panic_handler(int id, const char *pool, zinject_record_t *record,
+ void *data)
+{
+ int *count = data;
+
+ if (record->zi_func[0] == '\0')
+ return (0);
+
+ if (*count == 0) {
+ (void) printf("%3s %-15s %s\n", "ID", "POOL", "FUNCTION");
+ (void) printf("--- --------------- ----------------\n");
+ }
+
+ *count += 1;
+
+ (void) printf("%3d %-15s %s\n", id, pool, record->zi_func);
+
+ return (0);
+}
+
/*
* Print all registered error handlers. Returns the number of handlers
* registered.
@@ -353,14 +398,25 @@ print_device_handler(int id, const char *pool, zinject_record_t *record,
static int
print_all_handlers(void)
{
- int count = 0;
+ int count = 0, total = 0;
(void) iter_handlers(print_device_handler, &count);
- (void) printf("\n");
- count = 0;
+ if (count > 0) {
+ total += count;
+ (void) printf("\n");
+ count = 0;
+ }
+
(void) iter_handlers(print_data_handler, &count);
+ if (count > 0) {
+ total += count;
+ (void) printf("\n");
+ count = 0;
+ }
+
+ (void) iter_handlers(print_panic_handler, &count);
- return (count);
+ return (count + total);
}
/* ARGSUSED */
@@ -389,7 +445,8 @@ cancel_all_handlers(void)
{
int ret = iter_handlers(cancel_one_handler, NULL);
- (void) printf("removed all registered handlers\n");
+ if (ret == 0)
+ (void) printf("removed all registered handlers\n");
return (ret);
}
@@ -446,6 +503,15 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
if (record->zi_guid) {
(void) printf(" vdev: %llx\n",
(u_longlong_t)record->zi_guid);
+ } else if (record->zi_func[0] != '\0') {
+ (void) printf(" panic function: %s\n",
+ record->zi_func);
+ } else if (record->zi_duration > 0) {
+ (void) printf(" time: %lld seconds\n",
+ (u_longlong_t)record->zi_duration);
+ } else if (record->zi_duration < 0) {
+ (void) printf(" txgs: %lld \n",
+ (u_longlong_t)-record->zi_duration);
} else {
(void) printf("objset: %llu\n",
(u_longlong_t)record->zi_objset);
@@ -468,6 +534,22 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
}
int
+perform_action(const char *pool, zinject_record_t *record, int cmd)
+{
+ zfs_cmd_t zc;
+
+ ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
+ (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
+ zc.zc_guid = record->zi_guid;
+ zc.zc_cookie = cmd;
+
+ if (ioctl(zfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
+ return (0);
+
+ return (1);
+}
+
+int
main(int argc, char **argv)
{
int c;
@@ -480,12 +562,17 @@ main(int argc, char **argv)
int quiet = 0;
int error = 0;
int domount = 0;
+ int io_type = ZIO_TYPES;
+ int action = VDEV_STATE_UNKNOWN;
err_type_t type = TYPE_INVAL;
err_type_t label = TYPE_INVAL;
zinject_record_t record = { 0 };
char pool[MAXNAMELEN];
char dataset[MAXNAMELEN];
zfs_handle_t *zhp;
+ int nowrites = 0;
+ int dur_txg = 0;
+ int dur_secs = 0;
int ret;
int flags = 0;
@@ -517,11 +604,24 @@ main(int argc, char **argv)
return (0);
}
- while ((c = getopt(argc, argv, ":ab:d:f:Fqhc:t:l:mr:e:uL:")) != -1) {
+ while ((c = getopt(argc, argv,
+ ":aA:b:d:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) {
switch (c) {
case 'a':
flags |= ZINJECT_FLUSH_ARC;
break;
+ case 'A':
+ if (strcasecmp(optarg, "degrade") == 0) {
+ action = VDEV_STATE_DEGRADED;
+ } else if (strcasecmp(optarg, "fault") == 0) {
+ action = VDEV_STATE_FAULTED;
+ } else {
+ (void) fprintf(stderr, "invalid action '%s': "
+ "must be 'degrade' or 'fault'\n", optarg);
+ usage();
+ return (1);
+ }
+ break;
case 'b':
raw = optarg;
break;
@@ -538,6 +638,8 @@ main(int argc, char **argv)
error = ECKSUM;
} else if (strcasecmp(optarg, "nxio") == 0) {
error = ENXIO;
+ } else if (strcasecmp(optarg, "dtl") == 0) {
+ error = ECHILD;
} else {
(void) fprintf(stderr, "invalid error type "
"'%s': must be 'io', 'checksum' or "
@@ -557,9 +659,27 @@ main(int argc, char **argv)
case 'F':
record.zi_failfast = B_TRUE;
break;
+ case 'g':
+ dur_txg = 1;
+ record.zi_duration = (int)strtol(optarg, &end, 10);
+ if (record.zi_duration <= 0 || *end != '\0') {
+ (void) fprintf(stderr, "invalid duration '%s': "
+ "must be a positive integer\n", optarg);
+ usage();
+ return (1);
+ }
+ /* store duration of txgs as its negative */
+ record.zi_duration *= -1;
+ break;
case 'h':
usage();
return (0);
+ case 'I':
+ /* default duration, if one hasn't yet been defined */
+ nowrites = 1;
+ if (dur_secs == 0 && dur_txg == 0)
+ record.zi_duration = 30;
+ break;
case 'l':
level = (int)strtol(optarg, &end, 10);
if (*end != '\0') {
@@ -572,12 +692,45 @@ main(int argc, char **argv)
case 'm':
domount = 1;
break;
+ case 'p':
+ (void) strlcpy(record.zi_func, optarg,
+ sizeof (record.zi_func));
+ break;
case 'q':
quiet = 1;
break;
case 'r':
range = optarg;
break;
+ case 's':
+ dur_secs = 1;
+ record.zi_duration = (int)strtol(optarg, &end, 10);
+ if (record.zi_duration <= 0 || *end != '\0') {
+ (void) fprintf(stderr, "invalid duration '%s': "
+ "must be a positive integer\n", optarg);
+ usage();
+ return (1);
+ }
+ break;
+ case 'T':
+ if (strcasecmp(optarg, "read") == 0) {
+ io_type = ZIO_TYPE_READ;
+ } else if (strcasecmp(optarg, "write") == 0) {
+ io_type = ZIO_TYPE_WRITE;
+ } else if (strcasecmp(optarg, "free") == 0) {
+ io_type = ZIO_TYPE_FREE;
+ } else if (strcasecmp(optarg, "claim") == 0) {
+ io_type = ZIO_TYPE_CLAIM;
+ } else if (strcasecmp(optarg, "all") == 0) {
+ io_type = ZIO_TYPES;
+ } else {
+ (void) fprintf(stderr, "invalid I/O type "
+ "'%s': must be 'read', 'write', 'free', "
+ "'claim' or 'all'\n", optarg);
+ usage();
+ return (1);
+ }
+ break;
case 't':
if ((type = name_to_type(optarg)) == TYPE_INVAL &&
!MOS_TYPE(type)) {
@@ -620,7 +773,8 @@ main(int argc, char **argv)
* '-c' is invalid with any other options.
*/
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
- level != 0) {
+ level != 0 || record.zi_func[0] != '\0' ||
+ record.zi_duration != 0) {
(void) fprintf(stderr, "cancel (-c) incompatible with "
"any other options\n");
usage();
@@ -652,7 +806,8 @@ main(int argc, char **argv)
* for doing injection, so handle it separately here.
*/
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
- level != 0) {
+ level != 0 || record.zi_func[0] != '\0' ||
+ record.zi_duration != 0) {
(void) fprintf(stderr, "device (-d) incompatible with "
"data error injection\n");
usage();
@@ -675,12 +830,18 @@ main(int argc, char **argv)
return (1);
}
+ record.zi_iotype = io_type;
if (translate_device(pool, device, label, &record) != 0)
return (1);
if (!error)
error = ENXIO;
+
+ if (action != VDEV_STATE_UNKNOWN)
+ return (perform_action(pool, &record, action));
+
} else if (raw != NULL) {
- if (range != NULL || type != TYPE_INVAL || level != 0) {
+ if (range != NULL || type != TYPE_INVAL || level != 0 ||
+ record.zi_func[0] != '\0' || record.zi_duration != 0) {
(void) fprintf(stderr, "raw (-b) format with "
"any other options\n");
usage();
@@ -707,10 +868,52 @@ main(int argc, char **argv)
return (1);
if (!error)
error = EIO;
+ } else if (record.zi_func[0] != '\0') {
+ if (raw != NULL || range != NULL || type != TYPE_INVAL ||
+ level != 0 || device != NULL || record.zi_duration != 0) {
+ (void) fprintf(stderr, "panic (-p) incompatible with "
+ "other options\n");
+ usage();
+ return (2);
+ }
+
+ if (argc < 1 || argc > 2) {
+ (void) fprintf(stderr, "panic (-p) injection requires "
+ "a single pool name and an optional id\n");
+ usage();
+ return (2);
+ }
+
+ (void) strcpy(pool, argv[0]);
+ if (argv[1] != NULL)
+ record.zi_type = atoi(argv[1]);
+ dataset[0] = '\0';
+ } else if (record.zi_duration != 0) {
+ if (nowrites == 0) {
+ (void) fprintf(stderr, "-s or -g meaningless "
+ "without -I (ignore writes)\n");
+ usage();
+ return (2);
+ } else if (dur_secs && dur_txg) {
+ (void) fprintf(stderr, "choose a duration either "
+ "in seconds (-s) or a number of txgs (-g) "
+ "but not both\n");
+ usage();
+ return (2);
+ } else if (argc != 1) {
+ (void) fprintf(stderr, "ignore writes (-I) "
+ "injection requires a single pool name\n");
+ usage();
+ return (2);
+ }
+
+ (void) strcpy(pool, argv[0]);
+ dataset[0] = '\0';
} else if (type == TYPE_INVAL) {
if (flags == 0) {
(void) fprintf(stderr, "at least one of '-b', '-d', "
- "'-t', '-a', or '-u' must be specified\n");
+ "'-t', '-a', '-p', '-I' or '-u' "
+ "must be specified\n");
usage();
return (2);
}
diff --git a/cddl/contrib/opensolaris/cmd/zinject/zinject.h b/cddl/contrib/opensolaris/cmd/zinject/zinject.h
index adc3efe..46fdcad 100644
--- a/cddl/contrib/opensolaris/cmd/zinject/zinject.h
+++ b/cddl/contrib/opensolaris/cmd/zinject/zinject.h
@@ -19,15 +19,12 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _ZINJECT_H
#define _ZINJECT_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/zfs_ioctl.h>
#ifdef __cplusplus
@@ -41,11 +38,13 @@ typedef enum {
TYPE_MOSDIR, /* MOS object directory */
TYPE_METASLAB, /* metaslab objects */
TYPE_CONFIG, /* MOS config */
- TYPE_BPLIST, /* block pointer list */
+ TYPE_BPOBJ, /* block pointer list */
TYPE_SPACEMAP, /* space map objects */
TYPE_ERRLOG, /* persistent error log */
TYPE_LABEL_UBERBLOCK, /* label specific uberblock */
TYPE_LABEL_NVLIST, /* label specific nvlist */
+ TYPE_LABEL_PAD1, /* label specific 8K pad1 area */
+ TYPE_LABEL_PAD2, /* label specific 8K pad2 area */
TYPE_INVAL
} err_type_t;
diff --git a/cddl/contrib/opensolaris/cmd/zlook/zlook.c b/cddl/contrib/opensolaris/cmd/zlook/zlook.c
new file mode 100644
index 0000000..29a6559
--- /dev/null
+++ b/cddl/contrib/opensolaris/cmd/zlook/zlook.c
@@ -0,0 +1,411 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * This is a test program that uses ioctls to the ZFS Unit Test driver
+ * to perform readdirs or lookups using flags not normally available
+ * to user-land programs. This allows testing of the flags'
+ * behavior outside of a complicated consumer, such as the SMB driver.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/dirent.h>
+#include <sys/attr.h>
+#include <stddef.h>
+#include <fcntl.h>
+#include <string.h>
+#include <time.h>
+
+#define _KERNEL
+
+#include <sys/fs/zut.h>
+#include <sys/extdirent.h>
+
+#undef _KERNEL
+
+#define MAXBUF (64 * 1024)
+#define BIGBUF 4096
+#define LILBUF (sizeof (dirent_t))
+
+#define DIRENT_NAMELEN(reclen) \
+ ((reclen) - (offsetof(dirent_t, d_name[0])))
+
+static void
+usage(char *pnam)
+{
+ (void) fprintf(stderr, "Usage:\n %s -l [-is] dir-to-look-in "
+ "file-in-dir [xfile-on-file]\n", pnam);
+ (void) fprintf(stderr, " %s -i [-ls] dir-to-look-in "
+ "file-in-dir [xfile-on-file]\n", pnam);
+ (void) fprintf(stderr, " %s -s [-il] dir-to-look-in "
+ "file-in-dir [xfile-on-file]\n", pnam);
+ (void) fprintf(stderr, "\t Perform a lookup\n");
+ (void) fprintf(stderr, "\t -l == lookup\n");
+ (void) fprintf(stderr, "\t -i == request FIGNORECASE\n");
+ (void) fprintf(stderr, "\t -s == request stat(2) and xvattr info\n");
+ (void) fprintf(stderr, " %s -r [-ea] [-b buffer-size-in-bytes] "
+ "dir-to-look-in [file-in-dir]\n", pnam);
+ (void) fprintf(stderr, " %s -e [-ra] [-b buffer-size-in-bytes] "
+ "dir-to-look-in [file-in-dir]\n", pnam);
+ (void) fprintf(stderr, " %s -a [-re] [-b buffer-size-in-bytes] "
+ "dir-to-look-in [file-in-dir]\n", pnam);
+ (void) fprintf(stderr, "\t Perform a readdir\n");
+ (void) fprintf(stderr, "\t -r == readdir\n");
+ (void) fprintf(stderr, "\t -e == request extended entries\n");
+ (void) fprintf(stderr, "\t -a == request access filtering\n");
+ (void) fprintf(stderr, "\t -b == buffer size (default 4K)\n");
+ (void) fprintf(stderr, " %s -A path\n", pnam);
+ (void) fprintf(stderr, "\t Look up _PC_ACCESS_FILTERING "
+ "for path with pathconf(2)\n");
+ (void) fprintf(stderr, " %s -E path\n", pnam);
+ (void) fprintf(stderr, "\t Look up _PC_SATTR_EXISTS "
+ "for path with pathconf(2)\n");
+ (void) fprintf(stderr, " %s -S path\n", pnam);
+ (void) fprintf(stderr, "\t Look up _PC_SATTR_EXISTS "
+ "for path with pathconf(2)\n");
+ exit(EINVAL);
+}
+
+static void
+print_extd_entries(zut_readdir_t *r)
+{
+ struct edirent *eodp;
+ char *bufstart;
+
+ eodp = (edirent_t *)(uintptr_t)r->zr_buf;
+ bufstart = (char *)eodp;
+ while ((char *)eodp < bufstart + r->zr_bytes) {
+ char *blanks = " ";
+ int i = 0;
+ while (i < EDIRENT_NAMELEN(eodp->ed_reclen)) {
+ if (!eodp->ed_name[i])
+ break;
+ (void) printf("%c", eodp->ed_name[i++]);
+ }
+ if (i < 16)
+ (void) printf("%.*s", 16 - i, blanks);
+ (void) printf("\t%x\n", eodp->ed_eflags);
+ eodp = (edirent_t *)((intptr_t)eodp + eodp->ed_reclen);
+ }
+}
+
+static void
+print_entries(zut_readdir_t *r)
+{
+ dirent64_t *dp;
+ char *bufstart;
+
+ dp = (dirent64_t *)(intptr_t)r->zr_buf;
+ bufstart = (char *)dp;
+ while ((char *)dp < bufstart + r->zr_bytes) {
+ int i = 0;
+ while (i < DIRENT_NAMELEN(dp->d_reclen)) {
+ if (!dp->d_name[i])
+ break;
+ (void) printf("%c", dp->d_name[i++]);
+ }
+ (void) printf("\n");
+ dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen);
+ }
+}
+
+static void
+print_stats(struct stat64 *sb)
+{
+ char timebuf[512];
+
+ (void) printf("st_mode\t\t\t%04lo\n", (unsigned long)sb->st_mode);
+ (void) printf("st_ino\t\t\t%llu\n", (unsigned long long)sb->st_ino);
+ (void) printf("st_nlink\t\t%lu\n", (unsigned long)sb->st_nlink);
+ (void) printf("st_uid\t\t\t%d\n", sb->st_uid);
+ (void) printf("st_gid\t\t\t%d\n", sb->st_gid);
+ (void) printf("st_size\t\t\t%lld\n", (long long)sb->st_size);
+ (void) printf("st_blksize\t\t%ld\n", (long)sb->st_blksize);
+ (void) printf("st_blocks\t\t%lld\n", (long long)sb->st_blocks);
+
+ timebuf[0] = 0;
+ if (ctime_r(&sb->st_atime, timebuf, 512)) {
+ (void) printf("st_atime\t\t");
+ (void) printf("%s", timebuf);
+ }
+ timebuf[0] = 0;
+ if (ctime_r(&sb->st_mtime, timebuf, 512)) {
+ (void) printf("st_mtime\t\t");
+ (void) printf("%s", timebuf);
+ }
+ timebuf[0] = 0;
+ if (ctime_r(&sb->st_ctime, timebuf, 512)) {
+ (void) printf("st_ctime\t\t");
+ (void) printf("%s", timebuf);
+ }
+}
+
+static void
+print_xvs(uint64_t xvs)
+{
+ uint_t bits;
+ int idx = 0;
+
+ if (xvs == 0)
+ return;
+
+ (void) printf("-------------------\n");
+ (void) printf("Attribute bit(s) set:\n");
+ (void) printf("-------------------\n");
+
+ bits = xvs & ((1 << F_ATTR_ALL) - 1);
+ while (bits) {
+ uint_t rest = bits >> 1;
+ if (bits & 1) {
+ (void) printf("%s", attr_to_name((f_attr_t)idx));
+ if (rest)
+ (void) printf(", ");
+ }
+ idx++;
+ bits = rest;
+ }
+ (void) printf("\n");
+}
+
+int
+main(int argc, char **argv)
+{
+ zut_lookup_t lk = {0};
+ zut_readdir_t rd = {0};
+ boolean_t checking = B_FALSE;
+ boolean_t looking = B_FALSE;
+ boolean_t reading = B_FALSE;
+ boolean_t bflag = B_FALSE;
+ long rddir_bufsize = BIGBUF;
+ int error = 0;
+ int check;
+ int fd;
+ int c;
+
+ while ((c = getopt(argc, argv, "lisaerb:ASE")) != -1) {
+ switch (c) {
+ case 'l':
+ looking = B_TRUE;
+ break;
+ case 'i':
+ lk.zl_reqflags |= ZUT_IGNORECASE;
+ looking = B_TRUE;
+ break;
+ case 's':
+ lk.zl_reqflags |= ZUT_GETSTAT;
+ looking = B_TRUE;
+ break;
+ case 'a':
+ rd.zr_reqflags |= ZUT_ACCFILTER;
+ reading = B_TRUE;
+ break;
+ case 'e':
+ rd.zr_reqflags |= ZUT_EXTRDDIR;
+ reading = B_TRUE;
+ break;
+ case 'r':
+ reading = B_TRUE;
+ break;
+ case 'b':
+ reading = B_TRUE;
+ bflag = B_TRUE;
+ rddir_bufsize = strtol(optarg, NULL, 0);
+ break;
+ case 'A':
+ checking = B_TRUE;
+ check = _PC_ACCESS_FILTERING;
+ break;
+ case 'S':
+ checking = B_TRUE;
+ check = _PC_SATTR_ENABLED;
+ break;
+ case 'E':
+ checking = B_TRUE;
+ check = _PC_SATTR_EXISTS;
+ break;
+ case '?':
+ default:
+ usage(argv[0]); /* no return */
+ }
+ }
+
+ if ((checking && looking) || (checking && reading) ||
+ (looking && reading) || (!reading && bflag) ||
+ (!checking && !reading && !looking))
+ usage(argv[0]); /* no return */
+
+ if (rddir_bufsize < LILBUF || rddir_bufsize > MAXBUF) {
+ (void) fprintf(stderr, "Sorry, buffer size "
+ "must be >= %d and less than or equal to %d bytes.\n",
+ (int)LILBUF, MAXBUF);
+ exit(EINVAL);
+ }
+
+ if (checking) {
+ char pathbuf[MAXPATHLEN];
+ long result;
+
+ if (argc - optind < 1)
+ usage(argv[0]); /* no return */
+ (void) strlcpy(pathbuf, argv[optind], MAXPATHLEN);
+ result = pathconf(pathbuf, check);
+ (void) printf("pathconf(2) check for %s\n", pathbuf);
+ switch (check) {
+ case _PC_SATTR_ENABLED:
+ (void) printf("System attributes ");
+ if (result != 0)
+ (void) printf("Enabled\n");
+ else
+ (void) printf("Not enabled\n");
+ break;
+ case _PC_SATTR_EXISTS:
+ (void) printf("System attributes ");
+ if (result != 0)
+ (void) printf("Exist\n");
+ else
+ (void) printf("Do not exist\n");
+ break;
+ case _PC_ACCESS_FILTERING:
+ (void) printf("Access filtering ");
+ if (result != 0)
+ (void) printf("Available\n");
+ else
+ (void) printf("Not available\n");
+ break;
+ }
+ return (result);
+ }
+
+ if ((fd = open(ZUT_DEV, O_RDONLY)) < 0) {
+ perror(ZUT_DEV);
+ return (ENXIO);
+ }
+
+ if (reading) {
+ char *buf;
+
+ if (argc - optind < 1)
+ usage(argv[0]); /* no return */
+
+ (void) strlcpy(rd.zr_dir, argv[optind], MAXPATHLEN);
+ if (argc - optind > 1) {
+ (void) strlcpy(rd.zr_file, argv[optind + 1],
+ MAXNAMELEN);
+ rd.zr_reqflags |= ZUT_XATTR;
+ }
+
+ if ((buf = malloc(rddir_bufsize)) == NULL) {
+ error = errno;
+ perror("malloc");
+ (void) close(fd);
+ return (error);
+ }
+
+ rd.zr_buf = (uint64_t)(uintptr_t)buf;
+ rd.zr_buflen = rddir_bufsize;
+
+ while (!rd.zr_eof) {
+ int ierr;
+
+ if ((ierr = ioctl(fd, ZUT_IOC_READDIR, &rd)) != 0) {
+ (void) fprintf(stderr,
+ "IOCTL error: %s (%d)\n",
+ strerror(ierr), ierr);
+ free(buf);
+ (void) close(fd);
+ return (ierr);
+ }
+ if (rd.zr_retcode) {
+ (void) fprintf(stderr,
+ "readdir result: %s (%d)\n",
+ strerror(rd.zr_retcode), rd.zr_retcode);
+ free(buf);
+ (void) close(fd);
+ return (rd.zr_retcode);
+ }
+ if (rd.zr_reqflags & ZUT_EXTRDDIR)
+ print_extd_entries(&rd);
+ else
+ print_entries(&rd);
+ }
+ free(buf);
+ } else {
+ int ierr;
+
+ if (argc - optind < 2)
+ usage(argv[0]); /* no return */
+
+ (void) strlcpy(lk.zl_dir, argv[optind], MAXPATHLEN);
+ (void) strlcpy(lk.zl_file, argv[optind + 1], MAXNAMELEN);
+ if (argc - optind > 2) {
+ (void) strlcpy(lk.zl_xfile,
+ argv[optind + 2], MAXNAMELEN);
+ lk.zl_reqflags |= ZUT_XATTR;
+ }
+
+ if ((ierr = ioctl(fd, ZUT_IOC_LOOKUP, &lk)) != 0) {
+ (void) fprintf(stderr,
+ "IOCTL error: %s (%d)\n",
+ strerror(ierr), ierr);
+ (void) close(fd);
+ return (ierr);
+ }
+
+ (void) printf("\nLookup of ");
+ if (lk.zl_reqflags & ZUT_XATTR) {
+ (void) printf("extended attribute \"%s\" of ",
+ lk.zl_xfile);
+ }
+ (void) printf("file \"%s\" ", lk.zl_file);
+ (void) printf("in directory \"%s\" ", lk.zl_dir);
+ if (lk.zl_retcode) {
+ (void) printf("failed: %s (%d)\n",
+ strerror(lk.zl_retcode), lk.zl_retcode);
+ (void) close(fd);
+ return (lk.zl_retcode);
+ }
+
+ (void) printf("succeeded.\n");
+ if (lk.zl_reqflags & ZUT_IGNORECASE) {
+ (void) printf("----------------------------\n");
+ (void) printf("dirent flags: 0x%0x\n", lk.zl_deflags);
+ (void) printf("real name: %s\n", lk.zl_real);
+ }
+ if (lk.zl_reqflags & ZUT_GETSTAT) {
+ (void) printf("----------------------------\n");
+ print_stats(&lk.zl_statbuf);
+ print_xvs(lk.zl_xvattrs);
+ }
+ }
+
+ (void) close(fd);
+ return (0);
+}
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool.8 b/cddl/contrib/opensolaris/cmd/zpool/zpool.8
index b6c97c1..ff71dff 100644
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool.8
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool.8
@@ -1,9 +1,9 @@
'\" te
.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
-.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
-.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
-.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH zpool 1M "5 Mar 2009" "SunOS 5.11" "System Administration Commands"
+.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
+.TH zpool 1M "21 Sep 2009" "SunOS 5.11" "System Administration Commands"
.SH NAME
zpool \- configures ZFS storage pools
.SH SYNOPSIS
@@ -14,125 +14,125 @@ zpool \- configures ZFS storage pools
.LP
.nf
-\fBzpool create\fR [\fB-fn\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-O\fR \fIfile-system-property=value\fR]
- ... [\fB-m\fR \fImountpoint\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR \fIvdev\fR ...
+\fBzpool add\fR [\fB-fn\fR] \fIpool\fR \fIvdev\fR ...
.fi
.LP
.nf
-\fBzpool destroy\fR [\fB-f\fR] \fIpool\fR
+\fBzpool attach\fR [\fB-f\fR] \fIpool\fR \fIdevice\fR \fInew_device\fR
.fi
.LP
.nf
-\fBzpool add\fR [\fB-fn\fR] \fIpool\fR \fIvdev\fR ...
+\fBzpool clear\fR \fIpool\fR [\fIdevice\fR]
.fi
.LP
.nf
-\fBzpool remove\fR \fIpool\fR \fIdevice\fR ...
+\fBzpool create\fR [\fB-fn\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-O\fR \fIfile-system-property=value\fR]
+ ... [\fB-m\fR \fImountpoint\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR \fIvdev\fR ...
.fi
.LP
.nf
-\fBzpool list\fR [\fB-H\fR] [\fB-o\fR \fIproperty\fR[,...]] [\fIpool\fR] ...
+\fBzpool destroy\fR [\fB-f\fR] \fIpool\fR
.fi
.LP
.nf
-\fBzpool iostat\fR [\fB-v\fR] [\fIpool\fR] ... [\fIinterval\fR[\fIcount\fR]]
+\fBzpool detach\fR \fIpool\fR \fIdevice\fR
.fi
.LP
.nf
-\fBzpool status\fR [\fB-xv\fR] [\fIpool\fR] ...
+\fBzpool export\fR [\fB-f\fR] \fIpool\fR ...
.fi
.LP
.nf
-\fBzpool online\fR \fIpool\fR \fIdevice\fR ...
+\fBzpool get\fR "\fIall\fR" | \fIproperty\fR[,...] \fIpool\fR ...
.fi
.LP
.nf
-\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ...
+\fBzpool history\fR [\fB-il\fR] [\fIpool\fR] ...
.fi
.LP
.nf
-\fBzpool clear\fR \fIpool\fR [\fIdevice\fR]
+\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR]
.fi
.LP
.nf
-\fBzpool attach\fR [\fB-f\fR] \fIpool\fR \fIdevice\fR \fInew_device\fR
+\fBzpool import\fR [\fB-o \fImntopts\fR\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-d\fR \fIdir\fR | \fB-c\fR \fIcachefile\fR]
+ [\fB-D\fR] [\fB-f\fR] [\fB-R\fR \fIroot\fR] \fB-a\fR
.fi
.LP
.nf
-\fBzpool detach\fR \fIpool\fR \fIdevice\fR
+\fBzpool import\fR [\fB-o \fImntopts\fR\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-d\fR \fIdir\fR | \fB-c\fR \fIcachefile\fR]
+ [\fB-D\fR] [\fB-f\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR |\fIid\fR [\fInewpool\fR]
.fi
.LP
.nf
-\fBzpool replace\fR [\fB-f\fR] \fIpool\fR \fIdevice\fR [\fInew_device\fR]
+\fBzpool iostat\fR [\fB-T\fR u | d ] [\fB-v\fR] [\fIpool\fR] ... [\fIinterval\fR[\fIcount\fR]]
.fi
.LP
.nf
-\fBzpool scrub\fR [\fB-s\fR] \fIpool\fR ...
+\fBzpool list\fR [\fB-H\fR] [\fB-o\fR \fIproperty\fR[,...]] [\fIpool\fR] ...
.fi
.LP
.nf
-\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR]
+\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ...
.fi
.LP
.nf
-\fBzpool import\fR [\fB-o \fImntopts\fR\fR] [\fB-p\fR \fIproperty=value\fR] ... [\fB-d\fR \fIdir\fR | \fB-c\fR \fIcachefile\fR]
- [\fB-D\fR] [\fB-f\fR] [\fB-R\fR \fIroot\fR] \fB-a\fR
+\fBzpool online\fR \fIpool\fR \fIdevice\fR ...
.fi
.LP
.nf
-\fBzpool import\fR [\fB-o \fImntopts\fR\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-d\fR \fIdir\fR | \fB-c\fR \fIcachefile\fR]
- [\fB-D\fR] [\fB-f\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR |\fIid\fR [\fInewpool\fR]
+\fBzpool remove\fR \fIpool\fR \fIdevice\fR ...
.fi
.LP
.nf
-\fBzpool export\fR [\fB-f\fR] \fIpool\fR ...
+\fBzpool replace\fR [\fB-f\fR] \fIpool\fR \fIdevice\fR [\fInew_device\fR]
.fi
.LP
.nf
-\fBzpool upgrade\fR
+\fBzpool scrub\fR [\fB-s\fR] \fIpool\fR ...
.fi
.LP
.nf
-\fBzpool upgrade\fR \fB-v\fR
+\fBzpool set\fR \fIproperty\fR=\fIvalue\fR \fIpool\fR
.fi
.LP
.nf
-\fBzpool upgrade\fR [\fB-V\fR \fIversion\fR] \fB-a\fR | \fIpool\fR ...
+\fBzpool status\fR [\fB-xv\fR] [\fIpool\fR] ...
.fi
.LP
.nf
-\fBzpool history\fR [\fB-il\fR] [\fIpool\fR] ...
+\fBzpool upgrade\fR
.fi
.LP
.nf
-\fBzpool get\fR "\fIall\fR" | \fIproperty\fR[,...] \fIpool\fR ...
+\fBzpool upgrade\fR \fB-v\fR
.fi
.LP
.nf
-\fBzpool set\fR \fIproperty\fR=\fIvalue\fR \fIpool\fR
+\fBzpool upgrade\fR [\fB-V\fR \fIversion\fR] \fB-a\fR | \fIpool\fR ...
.fi
.SH DESCRIPTION
@@ -141,8 +141,8 @@ zpool \- configures ZFS storage pools
The \fBzpool\fR command configures \fBZFS\fR storage pools. A storage pool is a collection of devices that provides physical storage and data replication for \fBZFS\fR datasets.
.sp
.LP
-All datasets within a storage pool share the same space. See \fBzfs\fR(1M) for information on managing datasets.
-.SS "Virtual Devices (vdevs)"
+All datasets within a storage pool share the same space. See \fBzfs\fR(1M) for information on managing datasets.
+.SS "Virtual Devices (\fBvdev\fRs)"
.sp
.LP
A "virtual device" describes a single device or a collection of devices organized according to certain performance and fault characteristics. The following virtual devices are supported:
@@ -150,18 +150,18 @@ A "virtual device" describes a single device or a collection of devices organize
.ne 2
.mk
.na
-\fBdisk\fR
+\fB\fBdisk\fR\fR
.ad
.RS 10n
.rt
-A block device, typically located under "/dev/dsk". \fBZFS\fR can use individual slices or partitions, though the recommended mode of operation is to use whole disks. A disk can be specified by a full path, or it can be a shorthand name (the relative portion of the path under "/dev/dsk"). A whole disk can be specified by omitting the slice or partition designation. For example, "c0t0d0" is equivalent to "/dev/dsk/c0t0d0s2". When given a whole disk, \fBZFS\fR automatically labels the disk, if necessary.
+A block device, typically located under \fB/dev/dsk\fR. \fBZFS\fR can use individual slices or partitions, though the recommended mode of operation is to use whole disks. A disk can be specified by a full path, or it can be a shorthand name (the relative portion of the path under "/dev/dsk"). A whole disk can be specified by omitting the slice or partition designation. For example, "c0t0d0" is equivalent to "/dev/dsk/c0t0d0s2". When given a whole disk, \fBZFS\fR automatically labels the disk, if necessary.
.RE
.sp
.ne 2
.mk
.na
-\fBfile\fR
+\fB\fBfile\fR\fR
.ad
.RS 10n
.rt
@@ -172,7 +172,7 @@ A regular file. The use of files as a backing store is strongly discouraged. It
.ne 2
.mk
.na
-\fBmirror\fR
+\fB\fBmirror\fR\fR
.ad
.RS 10n
.rt
@@ -183,21 +183,25 @@ A mirror of two or more devices. Data is replicated in an identical fashion acro
.ne 2
.mk
.na
-\fBraidz\fR
+\fB\fBraidz\fR\fR
+.ad
+.br
+.na
+\fB\fBraidz1\fR\fR
.ad
.br
.na
-\fBraidz1\fR
+\fB\fBraidz2\fR\fR
.ad
.br
.na
-\fBraidz2\fR
+\fB\fBraidz3\fR\fR
.ad
.RS 10n
.rt
A variation on \fBRAID-5\fR that allows for better distribution of parity and eliminates the "\fBRAID-5\fR write hole" (in which data and parity become inconsistent after a power loss). Data and parity is striped across all disks within a \fBraidz\fR group.
.sp
-A \fBraidz\fR group can have either single- or double-parity, meaning that the \fBraidz\fR group can sustain one or two failures respectively without losing any data. The \fBraidz1\fR \fBvdev\fR type specifies a single-parity \fBraidz\fR group and the \fBraidz2\fR \fBvdev\fR type specifies a double-parity \fBraidz\fR group. The \fBraidz\fR \fBvdev\fR type is an alias for \fBraidz1\fR.
+A \fBraidz\fR group can have single-, double- , or triple parity, meaning that the \fBraidz\fR group can sustain one, two, or three failures, respectively, without losing any data. The \fBraidz1\fR \fBvdev\fR type specifies a single-parity \fBraidz\fR group; the \fBraidz2\fR \fBvdev\fR type specifies a double-parity \fBraidz\fR group; and the \fBraidz3\fR \fBvdev\fR type specifies a triple-parity \fBraidz\fR group. The \fBraidz\fR \fBvdev\fR type is an alias for \fBraidz1\fR.
.sp
A \fBraidz\fR group with \fIN\fR disks of size \fIX\fR with \fIP\fR parity disks can hold approximately (\fIN-P\fR)*\fIX\fR bytes and can withstand \fIP\fR device(s) failing before data integrity is compromised. The minimum number of devices in a \fBraidz\fR group is one more than the number of parity disks. The recommended number is between 3 and 9 to help increase performance.
.RE
@@ -206,7 +210,7 @@ A \fBraidz\fR group with \fIN\fR disks of size \fIX\fR with \fIP\fR parity disks
.ne 2
.mk
.na
-\fBspare\fR
+\fB\fBspare\fR\fR
.ad
.RS 10n
.rt
@@ -217,22 +221,22 @@ A special pseudo-\fBvdev\fR which keeps track of available hot spares for a pool
.ne 2
.mk
.na
-\fBlog\fR
+\fB\fBlog\fR\fR
.ad
.RS 10n
.rt
-A separate intent log device. If more than one log device is specified, then writes are load-balanced between devices. Log devices can be mirrored. However, \fBraidz\fR and \fBraidz2\fR are not supported for the intent log. For more information, see the "Intent Log" section.
+A separate-intent log device. If more than one log device is specified, then writes are load-balanced between devices. Log devices can be mirrored. However, \fBraidz\fR \fBvdev\fR types are not supported for the intent log. For more information, see the "Intent Log" section.
.RE
.sp
.ne 2
.mk
.na
-\fBcache\fR
+\fB\fBcache\fR\fR
.ad
.RS 10n
.rt
-A device used to cache storage pool data. A cache device cannot be mirrored or part of a \fBraidz\fR or \fBraidz2\fR configuration. For more information, see the "Cache Devices" section.
+A device used to cache storage pool data. A cache device cannot be cannot be configured as a mirror or \fBraidz\fR group. For more information, see the "Cache Devices" section.
.RE
.sp
@@ -247,7 +251,7 @@ Virtual devices are specified one at a time on the command line, separated by wh
.sp
.in +2
.nf
-\fB# zpool create mypool mirror c0t0d0 c0t1d0 mirror c1t0d0 c1t1d0\fR
+# \fBzpool create mypool mirror c0t0d0 c0t1d0 mirror c1t0d0 c1t1d0\fR
.fi
.in -2
.sp
@@ -403,7 +407,7 @@ The \fBZFS\fR Intent Log (\fBZIL\fR) satisfies \fBPOSIX\fR requirements for sync
Multiple log devices can also be specified, and they can be mirrored. See the EXAMPLES section for an example of mirroring multiple log devices.
.sp
.LP
-Log devices can be added, replaced, attached, detached, and imported and exported as part of the larger pool.
+Log devices can be added, replaced, attached, detached, and imported and exported as part of the larger pool. Mirrored log devices can be removed by specifying the top-level mirror for the log.
.SS "Cache Devices"
.sp
.LP
@@ -433,7 +437,7 @@ Each pool has several properties associated with it. Some properties are read-on
.ne 2
.mk
.na
-\fBavailable\fR
+\fB\fBavailable\fR\fR
.ad
.RS 20n
.rt
@@ -444,7 +448,7 @@ Amount of storage available within the pool. This property can also be referred
.ne 2
.mk
.na
-\fBcapacity\fR
+\fB\fBcapacity\fR\fR
.ad
.RS 20n
.rt
@@ -455,7 +459,7 @@ Percentage of pool space used. This property can also be referred to by its shor
.ne 2
.mk
.na
-\fBhealth\fR
+\fB\fBhealth\fR\fR
.ad
.RS 20n
.rt
@@ -466,7 +470,7 @@ The current health of the pool. Health can be "\fBONLINE\fR", "\fBDEGRADED\fR",
.ne 2
.mk
.na
-\fBguid\fR
+\fB\fBguid\fR\fR
.ad
.RS 20n
.rt
@@ -477,7 +481,7 @@ A unique identifier for the pool.
.ne 2
.mk
.na
-\fBsize\fR
+\fB\fBsize\fR\fR
.ad
.RS 20n
.rt
@@ -488,7 +492,7 @@ Total size of the storage pool.
.ne 2
.mk
.na
-\fBused\fR
+\fB\fBused\fR\fR
.ad
.RS 20n
.rt
@@ -514,12 +518,23 @@ Alternate root directory. If set, this directory is prepended to any mount point
.sp
.LP
-The following properties can be set at creation time and import time, and later changed with the "\fBzpool set\fR" command:
+The following properties can be set at creation time and import time, and later changed with the \fBzpool set\fR command:
.sp
.ne 2
.mk
.na
-\fB\fBautoreplace\fR=on | off\fR
+\fB\fBautoexpand\fR=\fBon\fR | \fBoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls automatic pool expansion when the underlying LUN is grown. If set to \fBon\fR, the pool will be resized according to the size of the expanded device. If the device is part of a mirror or \fBraidz\fR then all devices within that mirror/\fBraidz\fR group must be expanded before the new space is made available to the pool. The default behavior is \fBoff\fR. This property can also be referred to by its shortened column name, \fBexpand\fR.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBautoreplace\fR=\fBon\fR | \fBoff\fR\fR
.ad
.sp .6
.RS 4n
@@ -541,7 +556,7 @@ Identifies the default bootable dataset for the root pool. This property is expe
.ne 2
.mk
.na
-\fB\fBcachefile\fR=\fIpath\fR | "none"\fR
+\fB\fBcachefile\fR=\fIpath\fR | \fBnone\fR\fR
.ad
.sp .6
.RS 4n
@@ -574,7 +589,7 @@ Controls the system behavior in the event of catastrophic pool failure. This con
.ne 2
.mk
.na
-\fBwait\fR
+\fB\fBwait\fR\fR
.ad
.RS 12n
.rt
@@ -585,7 +600,7 @@ Blocks all \fBI/O\fR access until the device connectivity is recovered and the e
.ne 2
.mk
.na
-\fBcontinue\fR
+\fB\fBcontinue\fR\fR
.ad
.RS 12n
.rt
@@ -596,7 +611,7 @@ Returns \fBEIO\fR to any new write \fBI/O\fR requests but allows reads to any of
.ne 2
.mk
.na
-\fBpanic\fR
+\fB\fBpanic\fR\fR
.ad
.RS 12n
.rt
@@ -613,7 +628,7 @@ Prints out a message to the console and generates a system crash dump.
.ad
.sp .6
.RS 4n
-Controls whether information about snapshots associated with this pool is output when "\fBzfs list\fR" is run without the \fB-t\fR option. The default value is "off".
+Controls whether information about snapshots associated with this pool is output when "\fBzfs list\fR" is run without the \fB-t\fR option. The default value is "off".
.RE
.sp
@@ -649,25 +664,19 @@ Displays a help message.
.ne 2
.mk
.na
-\fB\fBzpool create\fR [\fB-fn\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-O\fR \fIfile-system-property=value\fR] ... [\fB-m\fR \fImountpoint\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR \fIvdev\fR ...\fR
+\fB\fBzpool add\fR [\fB-fn\fR] \fIpool\fR \fIvdev\fR ...\fR
.ad
.sp .6
.RS 4n
-Creates a new storage pool containing the virtual devices specified on the command line. The pool name must begin with a letter, and can only contain alphanumeric characters as well as underscore ("_"), dash ("-"), and period ("."). The pool names "mirror", "raidz", "spare" and "log" are reserved, as are names beginning with the pattern "c[0-9]". The \fBvdev\fR specification is described in the "Virtual Devices" section.
-.sp
-The command verifies that each device specified is accessible and not currently in use by another subsystem. There are some uses, such as being currently mounted, or specified as the dedicated dump device, that prevents a device from ever being used by \fBZFS\fR. Other uses, such as having a preexisting \fBUFS\fR file system, can be overridden with the \fB-f\fR option.
-.sp
-The command also checks that the replication strategy for the pool is consistent. An attempt to combine redundant and non-redundant storage in a single pool, or to mix disks and files, results in an error unless \fB-f\fR is specified. The use of differently sized devices within a single \fBraidz\fR or mirror group is also flagged as an error unless \fB-f\fR is specified.
-.sp
-Unless the \fB-R\fR option is specified, the default mount point is "/\fIpool\fR". The mount point must not exist or must be empty, or else the root dataset cannot be mounted. This can be overridden with the \fB-m\fR option.
+Adds the specified virtual devices to the given pool. The \fIvdev\fR specification is described in the "Virtual Devices" section. The behavior of the \fB-f\fR option, and the device checks performed are described in the "zpool create" subcommand.
.sp
.ne 2
.mk
.na
\fB\fB-f\fR\fR
.ad
-.sp .6
-.RS 4n
+.RS 6n
+.rt
Forces use of \fBvdev\fRs, even if they appear in use or specify a conflicting replication level. Not all devices can be overridden in this manner.
.RE
@@ -677,57 +686,32 @@ Forces use of \fBvdev\fRs, even if they appear in use or specify a conflicting r
.na
\fB\fB-n\fR\fR
.ad
-.sp .6
-.RS 4n
-Displays the configuration that would be used without actually creating the pool. The actual pool creation can still fail due to insufficient privileges or device sharing.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-o\fR \fIproperty=value\fR [\fB-o\fR \fIproperty=value\fR] ...\fR
-.ad
-.sp .6
-.RS 4n
-Sets the given pool properties. See the "Properties" section for a list of valid properties that can be set.
+.RS 6n
+.rt
+Displays the configuration that would be used without actually adding the \fBvdev\fRs. The actual pool creation can still fail due to insufficient privileges or device sharing.
.RE
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-O\fR \fIfile-system-property=value\fR\fR
-.ad
-.br
-.na
-\fB[\fB-O\fR \fIfile-system-property=value\fR] ...\fR
-.ad
-.sp .6
-.RS 4n
-Sets the given file system properties in the root file system of the pool. See the "Properties" section of \fBzfs\fR(1M) for a list of valid properties that can be set.
+Do not add a disk that is currently configured as a quorum device to a zpool. After a disk is in the pool, that disk can then be configured as a quorum device.
.RE
.sp
.ne 2
.mk
.na
-\fB\fB-R\fR \fIroot\fR\fR
+\fB\fBzpool attach\fR [\fB-f\fR] \fIpool\fR \fIdevice\fR \fInew_device\fR\fR
.ad
.sp .6
.RS 4n
-Equivalent to "-o cachefile=none,altroot=\fIroot\fR"
-.RE
-
+Attaches \fInew_device\fR to an existing \fBzpool\fR device. The existing device cannot be part of a \fBraidz\fR configuration. If \fIdevice\fR is not currently part of a mirrored configuration, \fIdevice\fR automatically transforms into a two-way mirror of \fIdevice\fR and \fInew_device\fR. If \fIdevice\fR is part of a two-way mirror, attaching \fInew_device\fR creates a three-way mirror, and so on. In either case, \fInew_device\fR begins to resilver immediately.
.sp
.ne 2
.mk
.na
-\fB\fB-m\fR \fImountpoint\fR\fR
+\fB\fB-f\fR\fR
.ad
-.sp .6
-.RS 4n
-Sets the mount point for the root dataset. The default mount point is "/\fIpool\fR" or "\fBaltroot\fR/\fIpool\fR" if \fBaltroot\fR is specified. The mount point must be an absolute path, "\fBlegacy\fR", or "\fBnone\fR". For more information on dataset mount points, see \fBzfs\fR(1M).
+.RS 6n
+.rt
+Forces use of \fInew_device\fR, even if its appears to be in use. Not all devices can be overridden in this manner.
.RE
.RE
@@ -736,41 +720,36 @@ Sets the mount point for the root dataset. The default mount point is "/\fIpool\
.ne 2
.mk
.na
-\fB\fBzpool destroy\fR [\fB-f\fR] \fIpool\fR\fR
+\fB\fBzpool clear\fR \fIpool\fR [\fIdevice\fR] ...\fR
.ad
.sp .6
.RS 4n
-Destroys the given pool, freeing up any devices for other use. This command tries to unmount any active datasets before destroying the pool.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-f\fR\fR
-.ad
-.RS 6n
-.rt
-Forces any active datasets contained within the pool to be unmounted.
-.RE
-
+Clears device errors in a pool. If no arguments are specified, all device errors within the pool are cleared. If one or more devices is specified, only those errors associated with the specified device or devices are cleared.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool add\fR [\fB-fn\fR] \fIpool\fR \fIvdev\fR ...\fR
+\fB\fBzpool create\fR [\fB-fn\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-O\fR \fIfile-system-property=value\fR] ... [\fB-m\fR \fImountpoint\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR \fIvdev\fR ...\fR
.ad
.sp .6
.RS 4n
-Adds the specified virtual devices to the given pool. The \fIvdev\fR specification is described in the "Virtual Devices" section. The behavior of the \fB-f\fR option, and the device checks performed are described in the "zpool create" subcommand.
+Creates a new storage pool containing the virtual devices specified on the command line. The pool name must begin with a letter, and can only contain alphanumeric characters as well as underscore ("_"), dash ("-"), and period ("."). The pool names "mirror", "raidz", "spare" and "log" are reserved, as are names beginning with the pattern "c[0-9]". The \fBvdev\fR specification is described in the "Virtual Devices" section.
+.sp
+The command verifies that each device specified is accessible and not currently in use by another subsystem. There are some uses, such as being currently mounted, or specified as the dedicated dump device, that prevents a device from ever being used by \fBZFS\fR. Other uses, such as having a preexisting \fBUFS\fR file system, can be overridden with the \fB-f\fR option.
+.sp
+The command also checks that the replication strategy for the pool is consistent. An attempt to combine redundant and non-redundant storage in a single pool, or to mix disks and files, results in an error unless \fB-f\fR is specified. The use of differently sized devices within a single \fBraidz\fR or mirror group is also flagged as an error unless \fB-f\fR is specified.
+.sp
+Unless the \fB-R\fR option is specified, the default mount point is "/\fIpool\fR". The mount point must not exist or must be empty, or else the root dataset cannot be mounted. This can be overridden with the \fB-m\fR option.
.sp
.ne 2
.mk
.na
\fB\fB-f\fR\fR
.ad
-.RS 6n
-.rt
+.sp .6
+.RS 4n
Forces use of \fBvdev\fRs, even if they appear in use or specify a conflicting replication level. Not all devices can be overridden in this manner.
.RE
@@ -780,148 +759,79 @@ Forces use of \fBvdev\fRs, even if they appear in use or specify a conflicting r
.na
\fB\fB-n\fR\fR
.ad
-.RS 6n
-.rt
-Displays the configuration that would be used without actually adding the \fBvdev\fRs. The actual pool creation can still fail due to insufficient privileges or device sharing.
-.RE
-
-Do not add a disk that is currently configured as a quorum device to a zpool. After a disk is in the pool, that disk can then be configured as a quorum device.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fBzpool remove\fR \fIpool\fR \fIdevice\fR ...\fR
-.ad
.sp .6
.RS 4n
-Removes the specified device from the pool. This command currently only supports removing hot spares and cache devices. Devices that are part of a mirrored configuration can be removed using the "\fBzpool detach\fR" command. Non-redundant and \fBraidz\fR devices cannot be removed from a pool.
+Displays the configuration that would be used without actually creating the pool. The actual pool creation can still fail due to insufficient privileges or device sharing.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool list\fR [\fB-H\fR] [\fB-o\fR \fIprops\fR[,...]] [\fIpool\fR] ...\fR
+\fB\fB-o\fR \fIproperty=value\fR [\fB-o\fR \fIproperty=value\fR] ...\fR
.ad
.sp .6
.RS 4n
-Lists the given pools along with a health status and space usage. When given no arguments, all pools in the system are listed.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-H\fR\fR
-.ad
-.RS 12n
-.rt
-Scripted mode. Do not display headers, and separate fields by a single tab instead of arbitrary space.
+Sets the given pool properties. See the "Properties" section for a list of valid properties that can be set.
.RE
.sp
.ne 2
.mk
.na
-\fB\fB-o\fR \fIprops\fR\fR
+\fB\fB-O\fR \fIfile-system-property=value\fR\fR
.ad
-.RS 12n
-.rt
-Comma-separated list of properties to display. See the "Properties" section for a list of valid properties. The default list is "name, size, used, available, capacity, health, altroot"
-.RE
-
-.RE
-
-.sp
-.ne 2
-.mk
+.br
.na
-\fB\fBzpool iostat\fR [\fB-v\fR] [\fIpool\fR] ... [\fIinterval\fR[\fIcount\fR]]\fR
+\fB[\fB-O\fR \fIfile-system-property=value\fR] ...\fR
.ad
.sp .6
.RS 4n
-Displays \fBI/O\fR statistics for the given pools. When given an interval, the statistics are printed every \fIinterval\fR seconds until \fBCtrl-C\fR is pressed. If no \fIpools\fR are specified, statistics for every pool in the system is shown. If \fIcount\fR is specified, the command exits after \fIcount\fR reports are printed.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-v\fR\fR
-.ad
-.RS 6n
-.rt
-Verbose statistics. Reports usage statistics for individual \fIvdevs\fR within the pool, in addition to the pool-wide statistics.
-.RE
-
+Sets the given file system properties in the root file system of the pool. See the "Properties" section of \fBzfs\fR(1M) for a list of valid properties that can be set.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool status\fR [\fB-xv\fR] [\fIpool\fR] ...\fR
+\fB\fB-R\fR \fIroot\fR\fR
.ad
.sp .6
.RS 4n
-Displays the detailed health status for the given pools. If no \fIpool\fR is specified, then the status of each pool in the system is displayed. For more information on pool and device health, see the "Device Failure and Recovery" section.
-.sp
-If a scrub or resilver is in progress, this command reports the percentage done and the estimated time to completion. Both of these are only approximate, because the amount of data in the pool and the other workloads on the system can change.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-x\fR\fR
-.ad
-.RS 6n
-.rt
-Only display status for pools that are exhibiting errors or are otherwise unavailable.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-v\fR\fR
-.ad
-.RS 6n
-.rt
-Displays verbose data error information, printing out a complete list of all data errors since the last complete pool scrub.
-.RE
-
+Equivalent to "-o cachefile=none,altroot=\fIroot\fR"
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool online\fR \fIpool\fR \fIdevice\fR ...\fR
+\fB\fB-m\fR \fImountpoint\fR\fR
.ad
.sp .6
.RS 4n
-Brings the specified physical device online.
-.sp
-This command is not applicable to spares or cache devices.
+Sets the mount point for the root dataset. The default mount point is "/\fIpool\fR" or "\fBaltroot\fR/\fIpool\fR" if \fBaltroot\fR is specified. The mount point must be an absolute path, "\fBlegacy\fR", or "\fBnone\fR". For more information on dataset mount points, see \fBzfs\fR(1M).
+.RE
+
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ...\fR
+\fB\fBzpool destroy\fR [\fB-f\fR] \fIpool\fR\fR
.ad
.sp .6
.RS 4n
-Takes the specified physical device offline. While the \fIdevice\fR is offline, no attempt is made to read or write to the device.
-.sp
-This command is not applicable to spares or cache devices.
+Destroys the given pool, freeing up any devices for other use. This command tries to unmount any active datasets before destroying the pool.
.sp
.ne 2
.mk
.na
-\fB\fB-t\fR\fR
+\fB\fB-f\fR\fR
.ad
.RS 6n
.rt
-Temporary. Upon reboot, the specified physical device reverts to its previous state.
+Forces any active datasets contained within the pool to be unmounted.
.RE
.RE
@@ -930,22 +840,26 @@ Temporary. Upon reboot, the specified physical device reverts to its previous st
.ne 2
.mk
.na
-\fB\fBzpool clear\fR \fIpool\fR [\fIdevice\fR] ...\fR
+\fB\fBzpool detach\fR \fIpool\fR \fIdevice\fR\fR
.ad
.sp .6
.RS 4n
-Clears device errors in a pool. If no arguments are specified, all device errors within the pool are cleared. If one or more devices is specified, only those errors associated with the specified device or devices are cleared.
+Detaches \fIdevice\fR from a mirror. The operation is refused if there are no other valid replicas of the data.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool attach\fR [\fB-f\fR] \fIpool\fR \fIdevice\fR \fInew_device\fR\fR
+\fB\fBzpool export\fR [\fB-f\fR] \fIpool\fR ...\fR
.ad
.sp .6
.RS 4n
-Attaches \fInew_device\fR to an existing \fBzpool\fR device. The existing device cannot be part of a \fBraidz\fR configuration. If \fIdevice\fR is not currently part of a mirrored configuration, \fIdevice\fR automatically transforms into a two-way mirror of \fIdevice\fR and \fInew_device\fR. If \fIdevice\fR is part of a two-way mirror, attaching \fInew_device\fR creates a three-way mirror, and so on. In either case, \fInew_device\fR begins to resilver immediately.
+Exports the given pools from the system. All devices are marked as exported, but are still considered in use by other subsystems. The devices can be moved between systems (even those of different endianness) and imported as long as a sufficient number of devices are present.
+.sp
+Before exporting the pool, all datasets within the pool are unmounted. A pool can not be exported if it has a shared spare that is currently being used.
+.sp
+For pools to be portable, you must give the \fBzpool\fR command whole disks, not just slices, so that \fBZFS\fR can label the disks with portable \fBEFI\fR labels. Otherwise, disk drivers on platforms of different endianness will not recognize the disks.
.sp
.ne 2
.mk
@@ -954,7 +868,9 @@ Attaches \fInew_device\fR to an existing \fBzpool\fR device. The existing device
.ad
.RS 6n
.rt
-Forces use of \fInew_device\fR, even if its appears to be in use. Not all devices can be overridden in this manner.
+Forcefully unmount all datasets, using the "\fBunmount -f\fR" command.
+.sp
+This command will forcefully export the pool even if it has a shared spare that is currently being used. This may lead to potential data corruption.
.RE
.RE
@@ -963,61 +879,54 @@ Forces use of \fInew_device\fR, even if its appears to be in use. Not all device
.ne 2
.mk
.na
-\fB\fBzpool detach\fR \fIpool\fR \fIdevice\fR\fR
+\fB\fBzpool get\fR "\fIall\fR" | \fIproperty\fR[,...] \fIpool\fR ...\fR
.ad
.sp .6
.RS 4n
-Detaches \fIdevice\fR from a mirror. The operation is refused if there are no other valid replicas of the data.
+Retrieves the given list of properties (or all properties if "\fBall\fR" is used) for the specified storage pool(s). These properties are displayed with the following fields:
+.sp
+.in +2
+.nf
+ name Name of storage pool
+ property Property name
+ value Property value
+ source Property source, either 'default' or 'local'.
+.fi
+.in -2
+.sp
+
+See the "Properties" section for more information on the available pool properties.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool replace\fR [\fB-f\fR] \fIpool\fR \fIold_device\fR [\fInew_device\fR]\fR
+\fB\fBzpool history\fR [\fB-il\fR] [\fIpool\fR] ...\fR
.ad
.sp .6
.RS 4n
-Replaces \fIold_device\fR with \fInew_device\fR. This is equivalent to attaching \fInew_device\fR, waiting for it to resilver, and then detaching \fIold_device\fR.
-.sp
-The size of \fInew_device\fR must be greater than or equal to the minimum size of all the devices in a mirror or \fBraidz\fR configuration.
-.sp
-\fInew_device\fR is required if the pool is not redundant. If \fInew_device\fR is not specified, it defaults to \fIold_device\fR. This form of replacement is useful after an existing disk has failed and has been physically replaced. In this case, the new disk may have the same \fB/dev/dsk\fR path as the old device, even though it is actually a different disk. \fBZFS\fR recognizes this.
+Displays the command history of the specified pools or all pools if no pool is specified.
.sp
.ne 2
.mk
.na
-\fB\fB-f\fR\fR
+\fB\fB-i\fR\fR
.ad
.RS 6n
.rt
-Forces use of \fInew_device\fR, even if its appears to be in use. Not all devices can be overridden in this manner.
-.RE
-
+Displays internally logged \fBZFS\fR events in addition to user initiated events.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool scrub\fR [\fB-s\fR] \fIpool\fR ...\fR
-.ad
-.sp .6
-.RS 4n
-Begins a scrub. The scrub examines all data in the specified pools to verify that it checksums correctly. For replicated (mirror or \fBraidz\fR) devices, \fBZFS\fR automatically repairs any damage discovered during the scrub. The "\fBzpool status\fR" command reports the progress of the scrub and summarizes the results of the scrub upon completion.
-.sp
-Scrubbing and resilvering are very similar operations. The difference is that resilvering only examines data that \fBZFS\fR knows to be out of date (for example, when attaching a new device to a mirror or replacing an existing device), whereas scrubbing examines all data to discover silent errors due to hardware faults or disk failure.
-.sp
-Because scrubbing and resilvering are \fBI/O\fR-intensive operations, \fBZFS\fR only allows one at a time. If a scrub is already in progress, the "\fBzpool scrub\fR" command terminates it and starts a new scrub. If a resilver is in progress, \fBZFS\fR does not allow a scrub to be started until the resilver completes.
-.sp
-.ne 2
-.mk
-.na
-\fB\fB-s\fR\fR
+\fB\fB-l\fR\fR
.ad
.RS 6n
.rt
-Stop scrubbing.
+Displays log records in long format, which in addition to standard format includes, the user name, the hostname, and the zone in which the operation was performed.
.RE
.RE
@@ -1261,26 +1170,66 @@ Sets the "\fBcachefile\fR" property to "\fBnone\fR" and the "\fIaltroot\fR" prop
.ne 2
.mk
.na
-\fB\fBzpool export\fR [\fB-f\fR] \fIpool\fR ...\fR
+\fB\fBzpool iostat\fR [\fB-T\fR \fBu\fR | \fBd\fR] [\fB-v\fR] [\fIpool\fR] ... [\fIinterval\fR[\fIcount\fR]]\fR
.ad
.sp .6
.RS 4n
-Exports the given pools from the system. All devices are marked as exported, but are still considered in use by other subsystems. The devices can be moved between systems (even those of different endianness) and imported as long as a sufficient number of devices are present.
+Displays \fBI/O\fR statistics for the given pools. When given an interval, the statistics are printed every \fIinterval\fR seconds until \fBCtrl-C\fR is pressed. If no \fIpools\fR are specified, statistics for every pool in the system is shown. If \fIcount\fR is specified, the command exits after \fIcount\fR reports are printed.
.sp
-Before exporting the pool, all datasets within the pool are unmounted. A pool can not be exported if it has a shared spare that is currently being used.
+.ne 2
+.mk
+.na
+\fB\fB-T\fR \fBu\fR | \fBd\fR\fR
+.ad
+.RS 12n
+.rt
+Display a time stamp.
.sp
-For pools to be portable, you must give the \fBzpool\fR command whole disks, not just slices, so that \fBZFS\fR can label the disks with portable \fBEFI\fR labels. Otherwise, disk drivers on platforms of different endianness will not recognize the disks.
+Specify \fBu\fR for a printed representation of the internal representation of time. See \fBtime\fR(2). Specify \fBd\fR for standard date format. See \fBdate\fR(1).
+.RE
+
.sp
.ne 2
.mk
.na
-\fB\fB-f\fR\fR
+\fB\fB-v\fR\fR
.ad
-.RS 6n
+.RS 12n
.rt
-Forcefully unmount all datasets, using the "\fBunmount -f\fR" command.
+Verbose statistics. Reports usage statistics for individual \fIvdevs\fR within the pool, in addition to the pool-wide statistics.
+.RE
+
+.RE
+
.sp
-This command will forcefully export the pool even if it has a shared spare that is currently being used. This may lead to potential data corruption.
+.ne 2
+.mk
+.na
+\fB\fBzpool list\fR [\fB-H\fR] [\fB-o\fR \fIprops\fR[,...]] [\fIpool\fR] ...\fR
+.ad
+.sp .6
+.RS 4n
+Lists the given pools along with a health status and space usage. When given no arguments, all pools in the system are listed.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-H\fR\fR
+.ad
+.RS 12n
+.rt
+Scripted mode. Do not display headers, and separate fields by a single tab instead of arbitrary space.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-o\fR \fIprops\fR\fR
+.ad
+.RS 12n
+.rt
+Comma-separated list of properties to display. See the "Properties" section for a list of valid properties. The default list is "name, size, used, available, capacity, health, altroot"
.RE
.RE
@@ -1289,53 +1238,109 @@ This command will forcefully export the pool even if it has a shared spare that
.ne 2
.mk
.na
-\fB\fBzpool upgrade\fR\fR
+\fB\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ...\fR
.ad
.sp .6
.RS 4n
-Displays all pools formatted using a different \fBZFS\fR on-disk version. Older versions can continue to be used, but some features may not be available. These pools can be upgraded using "\fBzpool upgrade -a\fR". Pools that are formatted with a more recent version are also displayed, although these pools will be inaccessible on the system.
+Takes the specified physical device offline. While the \fIdevice\fR is offline, no attempt is made to read or write to the device.
+.sp
+This command is not applicable to spares or cache devices.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-t\fR\fR
+.ad
+.RS 6n
+.rt
+Temporary. Upon reboot, the specified physical device reverts to its previous state.
+.RE
+
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool upgrade\fR \fB-v\fR\fR
+\fB\fBzpool online\fR [\fB-e\fR] \fIpool\fR \fIdevice\fR...\fR
.ad
.sp .6
.RS 4n
-Displays \fBZFS\fR versions supported by the current software. The current \fBZFS\fR versions and all previous supported versions are displayed, along with an explanation of the features provided with each version.
+Brings the specified physical device online.
+.sp
+This command is not applicable to spares or cache devices.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-e\fR\fR
+.ad
+.RS 6n
+.rt
+Expand the device to use all available space. If the device is part of a mirror or \fBraidz\fR then all devices must be expanded before the new space will become available to the pool.
+.RE
+
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool upgrade\fR [\fB-V\fR \fIversion\fR] \fB-a\fR | \fIpool\fR ...\fR
+\fB\fBzpool remove\fR \fIpool\fR \fIdevice\fR ...\fR
.ad
.sp .6
.RS 4n
-Upgrades the given pool to the latest on-disk version. Once this is done, the pool will no longer be accessible on systems running older versions of the software.
+Removes the specified device from the pool. This command currently only supports removing hot spares, cache, and log devices. A mirrored log device can be removed by specifying the top-level mirror for the log. Non-log devices that are part of a mirrored configuration can be removed using the \fBzpool detach\fR command. Non-redundant and \fBraidz\fR devices cannot be removed from a pool.
+.RE
+
.sp
.ne 2
.mk
.na
-\fB\fB-a\fR\fR
+\fB\fBzpool replace\fR [\fB-f\fR] \fIpool\fR \fIold_device\fR [\fInew_device\fR]\fR
.ad
-.RS 14n
+.sp .6
+.RS 4n
+Replaces \fIold_device\fR with \fInew_device\fR. This is equivalent to attaching \fInew_device\fR, waiting for it to resilver, and then detaching \fIold_device\fR.
+.sp
+The size of \fInew_device\fR must be greater than or equal to the minimum size of all the devices in a mirror or \fBraidz\fR configuration.
+.sp
+\fInew_device\fR is required if the pool is not redundant. If \fInew_device\fR is not specified, it defaults to \fIold_device\fR. This form of replacement is useful after an existing disk has failed and has been physically replaced. In this case, the new disk may have the same \fB/dev/dsk\fR path as the old device, even though it is actually a different disk. \fBZFS\fR recognizes this.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-f\fR\fR
+.ad
+.RS 6n
.rt
-Upgrades all pools.
+Forces use of \fInew_device\fR, even if its appears to be in use. Not all devices can be overridden in this manner.
+.RE
+
.RE
.sp
.ne 2
.mk
.na
-\fB\fB-V\fR \fIversion\fR\fR
+\fB\fBzpool scrub\fR [\fB-s\fR] \fIpool\fR ...\fR
.ad
-.RS 14n
+.sp .6
+.RS 4n
+Begins a scrub. The scrub examines all data in the specified pools to verify that it checksums correctly. For replicated (mirror or \fBraidz\fR) devices, \fBZFS\fR automatically repairs any damage discovered during the scrub. The "\fBzpool status\fR" command reports the progress of the scrub and summarizes the results of the scrub upon completion.
+.sp
+Scrubbing and resilvering are very similar operations. The difference is that resilvering only examines data that \fBZFS\fR knows to be out of date (for example, when attaching a new device to a mirror or replacing an existing device), whereas scrubbing examines all data to discover silent errors due to hardware faults or disk failure.
+.sp
+Because scrubbing and resilvering are \fBI/O\fR-intensive operations, \fBZFS\fR only allows one at a time. If a scrub is already in progress, the "\fBzpool scrub\fR" command terminates it and starts a new scrub. If a resilver is in progress, \fBZFS\fR does not allow a scrub to be started until the resilver completes.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-s\fR\fR
+.ad
+.RS 6n
.rt
-Upgrade to the specified version. If the \fB-V\fR flag is not specified, the pool is upgraded to the most recent version. This option can only be used to increase the version number, and only up to the most recent version supported by this software.
+Stop scrubbing.
.RE
.RE
@@ -1344,31 +1349,44 @@ Upgrade to the specified version. If the \fB-V\fR flag is not specified, the poo
.ne 2
.mk
.na
-\fB\fBzpool history\fR [\fB-il\fR] [\fIpool\fR] ...\fR
+\fB\fBzpool set\fR \fIproperty\fR=\fIvalue\fR \fIpool\fR\fR
.ad
.sp .6
.RS 4n
-Displays the command history of the specified pools or all pools if no pool is specified.
+Sets the given property on the specified pool. See the "Properties" section for more information on what properties can be set and acceptable values.
+.RE
+
.sp
.ne 2
.mk
.na
-\fB\fB-i\fR\fR
+\fB\fBzpool status\fR [\fB-xv\fR] [\fIpool\fR] ...\fR
+.ad
+.sp .6
+.RS 4n
+Displays the detailed health status for the given pools. If no \fIpool\fR is specified, then the status of each pool in the system is displayed. For more information on pool and device health, see the "Device Failure and Recovery" section.
+.sp
+If a scrub or resilver is in progress, this command reports the percentage done and the estimated time to completion. Both of these are only approximate, because the amount of data in the pool and the other workloads on the system can change.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-x\fR\fR
.ad
.RS 6n
.rt
-Displays internally logged \fBZFS\fR events in addition to user initiated events.
+Only display status for pools that are exhibiting errors or are otherwise unavailable.
.RE
.sp
.ne 2
.mk
.na
-\fB\fB-l\fR\fR
+\fB\fB-v\fR\fR
.ad
.RS 6n
.rt
-Displays log records in long format, which in addition to standard format includes, the user name, the hostname, and the zone in which the operation was performed.
+Displays verbose data error information, printing out a complete list of all data errors since the last complete pool scrub.
.RE
.RE
@@ -1377,34 +1395,55 @@ Displays log records in long format, which in addition to standard format includ
.ne 2
.mk
.na
-\fB\fBzpool get\fR "\fIall\fR" | \fIproperty\fR[,...] \fIpool\fR ...\fR
+\fB\fBzpool upgrade\fR\fR
.ad
.sp .6
.RS 4n
-Retrieves the given list of properties (or all properties if "\fBall\fR" is used) for the specified storage pool(s). These properties are displayed with the following fields:
-.sp
-.in +2
-.nf
- name Name of storage pool
- property Property name
- value Property value
- source Property source, either 'default' or 'local'.
-.fi
-.in -2
-.sp
+Displays all pools formatted using a different \fBZFS\fR on-disk version. Older versions can continue to be used, but some features may not be available. These pools can be upgraded using "\fBzpool upgrade -a\fR". Pools that are formatted with a more recent version are also displayed, although these pools will be inaccessible on the system.
+.RE
-See the "Properties" section for more information on the available pool properties.
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool upgrade\fR \fB-v\fR\fR
+.ad
+.sp .6
+.RS 4n
+Displays \fBZFS\fR versions supported by the current software. The current \fBZFS\fR versions and all previous supported versions are displayed, along with an explanation of the features provided with each version.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool set\fR \fIproperty\fR=\fIvalue\fR \fIpool\fR\fR
+\fB\fBzpool upgrade\fR [\fB-V\fR \fIversion\fR] \fB-a\fR | \fIpool\fR ...\fR
.ad
.sp .6
.RS 4n
-Sets the given property on the specified pool. See the "Properties" section for more information on what properties can be set and acceptable values.
+Upgrades the given pool to the latest on-disk version. Once this is done, the pool will no longer be accessible on systems running older versions of the software.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-a\fR\fR
+.ad
+.RS 14n
+.rt
+Upgrades all pools.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-V\fR \fIversion\fR\fR
+.ad
+.RS 14n
+.rt
+Upgrade to the specified version. If the \fB-V\fR flag is not specified, the pool is upgraded to the most recent version. This option can only be used to increase the version number, and only up to the most recent version supported by this software.
+.RE
+
.RE
.SH EXAMPLES
@@ -1417,7 +1456,7 @@ The following command creates a pool with a single \fBraidz\fR root \fIvdev\fR t
.sp
.in +2
.nf
-\fB# zpool create tank raidz c0t0d0 c0t1d0 c0t2d0 c0t3d0 c0t4d0 c0t5d0\fR
+# \fBzpool create tank raidz c0t0d0 c0t1d0 c0t2d0 c0t3d0 c0t4d0 c0t5d0\fR
.fi
.in -2
.sp
@@ -1431,7 +1470,7 @@ The following command creates a pool with two mirrors, where each mirror contain
.sp
.in +2
.nf
-\fB# zpool create tank mirror c0t0d0 c0t1d0 mirror c0t2d0 c0t3d0\fR
+# \fBzpool create tank mirror c0t0d0 c0t1d0 mirror c0t2d0 c0t3d0\fR
.fi
.in -2
.sp
@@ -1445,7 +1484,7 @@ The following command creates an unmirrored pool using two disk slices.
.sp
.in +2
.nf
-\fB# zpool create tank /dev/dsk/c0t0d0s1 c0t1d0s4\fR
+# \fBzpool create tank /dev/dsk/c0t0d0s1 c0t1d0s4\fR
.fi
.in -2
.sp
@@ -1459,7 +1498,7 @@ The following command creates an unmirrored pool using files. While not recommen
.sp
.in +2
.nf
-\fB# zpool create tank /path/to/file/a /path/to/file/b\fR
+# \fBzpool create tank /path/to/file/a /path/to/file/b\fR
.fi
.in -2
.sp
@@ -1473,7 +1512,7 @@ The following command adds two mirrored disks to the pool "\fItank\fR", assuming
.sp
.in +2
.nf
-\fB# zpool add tank mirror c1t0d0 c1t1d0\fR
+# \fBzpool add tank mirror c1t0d0 c1t1d0\fR
.fi
.in -2
.sp
@@ -1491,7 +1530,7 @@ The results from this command are similar to the following:
.sp
.in +2
.nf
-\fB# zpool list\fR
+# \fBzpool list\fR
NAME SIZE USED AVAIL CAP HEALTH ALTROOT
pool 67.5G 2.92M 67.5G 0% ONLINE -
tank 67.5G 2.92M 67.5G 0% ONLINE -
@@ -1509,7 +1548,7 @@ The following command destroys the pool "\fItank\fR" and any datasets contained
.sp
.in +2
.nf
-\fB# zpool destroy -f tank\fR
+# \fBzpool destroy -f tank\fR
.fi
.in -2
.sp
@@ -1523,7 +1562,7 @@ The following command exports the devices in pool \fItank\fR so that they can be
.sp
.in +2
.nf
-\fB# zpool export tank\fR
+# \fBzpool export tank\fR
.fi
.in -2
.sp
@@ -1541,7 +1580,7 @@ The results from this command are similar to the following:
.sp
.in +2
.nf
-\fB# zpool import\fR
+# \fBzpool import\fR
pool: tank
id: 15451357997522795478
state: ONLINE
@@ -1553,7 +1592,7 @@ config:
c1t2d0 ONLINE
c1t3d0 ONLINE
-\fB# zpool import tank\fR
+# \fBzpool import tank\fR
.fi
.in -2
.sp
@@ -1567,7 +1606,7 @@ The following command upgrades all ZFS Storage pools to the current version of t
.sp
.in +2
.nf
-\fB# zpool upgrade -a\fR
+# \fBzpool upgrade -a\fR
This system is currently running ZFS version 2.
.fi
.in -2
@@ -1582,7 +1621,7 @@ The following command creates a new pool with an available hot spare:
.sp
.in +2
.nf
-\fB# zpool create tank mirror c0t0d0 c0t1d0 spare c0t2d0\fR
+# \fBzpool create tank mirror c0t0d0 c0t1d0 spare c0t2d0\fR
.fi
.in -2
.sp
@@ -1594,7 +1633,7 @@ If one of the disks were to fail, the pool would be reduced to the degraded stat
.sp
.in +2
.nf
-\fB# zpool replace tank c0t0d0 c0t3d0\fR
+# \fBzpool replace tank c0t0d0 c0t3d0\fR
.fi
.in -2
.sp
@@ -1606,7 +1645,7 @@ Once the data has been resilvered, the spare is automatically removed and is mad
.sp
.in +2
.nf
-\fB# zpool remove tank c0t2d0\fR
+# \fBzpool remove tank c0t2d0\fR
.fi
.in -2
.sp
@@ -1620,7 +1659,7 @@ The following command creates a ZFS storage pool consisting of two, two-way mirr
.sp
.in +2
.nf
-\fB# zpool create pool mirror c0d0 c1d0 mirror c2d0 c3d0 log mirror \e
+# \fBzpool create pool mirror c0d0 c1d0 mirror c2d0 c3d0 log mirror \e
c4d0 c5d0\fR
.fi
.in -2
@@ -1635,7 +1674,7 @@ The following command adds two disks for use as cache devices to a ZFS storage p
.sp
.in +2
.nf
-\fB# zpool add pool cache c2d0 c3d0\fR
+# \fBzpool add pool cache c2d0 c3d0\fR
.fi
.in -2
.sp
@@ -1643,10 +1682,57 @@ The following command adds two disks for use as cache devices to a ZFS storage p
.sp
.LP
Once added, the cache devices gradually fill with content from main memory. Depending on the size of your cache devices, it could take over an hour for them to fill. Capacity and reads can be monitored using the \fBiostat\fR option as follows:
+
+.sp
+.in +2
+.nf
+# \fBzpool iostat -v pool 5\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 14 \fRRemoving a Mirrored Log Device
+.sp
+.LP
+The following command removes the mirrored log device \fBmirror-2\fR.
+
+.sp
+.LP
+Given this configuration:
+
+.sp
+.in +2
+.nf
+ pool: tank
+ state: ONLINE
+ scrub: none requested
+config:
+
+ NAME STATE READ WRITE CKSUM
+ tank ONLINE 0 0 0
+ mirror-0 ONLINE 0 0 0
+ c6t0d0 ONLINE 0 0 0
+ c6t1d0 ONLINE 0 0 0
+ mirror-1 ONLINE 0 0 0
+ c6t2d0 ONLINE 0 0 0
+ c6t3d0 ONLINE 0 0 0
+ logs
+ mirror-2 ONLINE 0 0 0
+ c4t0d0 ONLINE 0 0 0
+ c4t1d0 ONLINE 0 0 0
+.fi
+.in -2
+.sp
+
+.sp
+.LP
+The command to remove the mirrored log \fBmirror-2\fR is:
+
.sp
.in +2
.nf
-\fB# zpool iostat -v pool 5\fR
+# \fBzpool remove tank mirror-2\fR
.fi
.in -2
.sp
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
index 09cba89..73e40ec 100644
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <solaris.h>
@@ -44,7 +43,6 @@
#include <zone.h>
#include <sys/time.h>
#include <sys/fs/zfs.h>
-
#include <sys/stat.h>
#include <libzfs.h>
@@ -52,6 +50,8 @@
#include "zpool_util.h"
#include "zfs_comutil.h"
+#include "statcommon.h"
+
static int zpool_do_create(int, char **);
static int zpool_do_destroy(int, char **);
@@ -69,6 +69,7 @@ static int zpool_do_clear(int, char **);
static int zpool_do_attach(int, char **);
static int zpool_do_detach(int, char **);
static int zpool_do_replace(int, char **);
+static int zpool_do_split(int, char **);
static int zpool_do_scrub(int, char **);
@@ -121,7 +122,8 @@ typedef enum {
HELP_STATUS,
HELP_UPGRADE,
HELP_GET,
- HELP_SET
+ HELP_SET,
+ HELP_SPLIT
} zpool_help_t;
@@ -158,6 +160,7 @@ static zpool_command_t command_table[] = {
{ "attach", zpool_do_attach, HELP_ATTACH },
{ "detach", zpool_do_detach, HELP_DETACH },
{ "replace", zpool_do_replace, HELP_REPLACE },
+ { "split", zpool_do_split, HELP_SPLIT },
{ NULL },
{ "scrub", zpool_do_scrub, HELP_SCRUB },
{ NULL },
@@ -175,6 +178,8 @@ static zpool_command_t command_table[] = {
zpool_command_t *current_command;
static char history_str[HIS_MAX_RECORD_LEN];
+static uint_t timestamp_fmt = NODATE;
+
static const char *
get_usage(zpool_help_t idx) {
switch (idx) {
@@ -184,7 +189,7 @@ get_usage(zpool_help_t idx) {
return (gettext("\tattach [-f] <pool> <device> "
"<new-device>\n"));
case HELP_CLEAR:
- return (gettext("\tclear <pool> [device]\n"));
+ return (gettext("\tclear [-nF] <pool> [device]\n"));
case HELP_CREATE:
return (gettext("\tcreate [-fn] [-o property=value] ... \n"
"\t [-O file-system-property=value] ... \n"
@@ -199,17 +204,20 @@ get_usage(zpool_help_t idx) {
return (gettext("\thistory [-il] [<pool>] ...\n"));
case HELP_IMPORT:
return (gettext("\timport [-d dir] [-D]\n"
+ "\timport [-d dir | -c cachefile] [-F [-n]] <pool | id>\n"
"\timport [-o mntopts] [-o property=value] ... \n"
- "\t [-d dir | -c cachefile] [-D] [-f] [-R root] -a\n"
+ "\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] "
+ "[-R root] [-F [-n]] -a\n"
"\timport [-o mntopts] [-o property=value] ... \n"
- "\t [-d dir | -c cachefile] [-D] [-f] [-R root] "
- "<pool | id> [newpool]\n"));
+ "\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] "
+ "[-R root] [-F [-n]]\n"
+ "\t <pool | id> [newpool]\n"));
case HELP_IOSTAT:
- return (gettext("\tiostat [-v] [pool] ... [interval "
+ return (gettext("\tiostat [-v] [-T d|u] [pool] ... [interval "
"[count]]\n"));
case HELP_LIST:
return (gettext("\tlist [-H] [-o property[,...]] "
- "[pool] ...\n"));
+ "[-T d|u] [pool] ... [interval [count]]\n"));
case HELP_OFFLINE:
return (gettext("\toffline [-t] <pool> <device> ...\n"));
case HELP_ONLINE:
@@ -222,7 +230,8 @@ get_usage(zpool_help_t idx) {
case HELP_SCRUB:
return (gettext("\tscrub [-s] <pool> ...\n"));
case HELP_STATUS:
- return (gettext("\tstatus [-vx] [pool] ...\n"));
+ return (gettext("\tstatus [-vx] [-T d|u] [pool] ... [interval "
+ "[count]]\n"));
case HELP_UPGRADE:
return (gettext("\tupgrade\n"
"\tupgrade -v\n"
@@ -232,6 +241,10 @@ get_usage(zpool_help_t idx) {
"<pool> ...\n"));
case HELP_SET:
return (gettext("\tset <property=value> <pool> \n"));
+ case HELP_SPLIT:
+ return (gettext("\tsplit [-n] [-R altroot] [-o mntopts]\n"
+ "\t [-o property=value] <pool> <newpool> "
+ "[<device> ...]\n"));
}
abort();
@@ -247,12 +260,12 @@ print_prop_cb(int prop, void *cb)
{
FILE *fp = cb;
- (void) fprintf(fp, "\t%-13s ", zpool_prop_to_name(prop));
+ (void) fprintf(fp, "\t%-15s ", zpool_prop_to_name(prop));
if (zpool_prop_readonly(prop))
(void) fprintf(fp, " NO ");
else
- (void) fprintf(fp, " YES ");
+ (void) fprintf(fp, " YES ");
if (zpool_prop_values(prop) == NULL)
(void) fprintf(fp, "-\n");
@@ -299,7 +312,7 @@ usage(boolean_t requested)
(void) fprintf(fp,
gettext("\nthe following properties are supported:\n"));
- (void) fprintf(fp, "\n\t%-13s %s %s\n\n",
+ (void) fprintf(fp, "\n\t%-15s %s %s\n\n",
"PROPERTY", "EDIT", "VALUES");
/* Iterate over all properties */
@@ -341,7 +354,7 @@ print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent,
if ((is_log && !print_logs) || (!is_log && print_logs))
continue;
- vname = zpool_vdev_name(g_zfs, zhp, child[c]);
+ vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE);
print_vdev_tree(zhp, vname, child[c], indent + 2,
B_FALSE);
free(vname);
@@ -509,11 +522,10 @@ zpool_do_add(int argc, char **argv)
}
/*
- * zpool remove <pool> <vdev> ...
+ * zpool remove <pool> <vdev> ...
*
- * Removes the given vdev from the pool. Currently, this only supports removing
- * spares and cache devices from the pool. Eventually, we'll want to support
- * removing leaf vdevs (as an alias for 'detach') as well as toplevel vdevs.
+ * Removes the given vdev from the pool. Currently, this supports removing
+ * spares, cache, and log devices from the pool.
*/
int
zpool_do_remove(int argc, char **argv)
@@ -942,7 +954,7 @@ zpool_do_export(int argc, char **argv)
static int
max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max)
{
- char *name = zpool_vdev_name(g_zfs, zhp, nv);
+ char *name = zpool_vdev_name(g_zfs, zhp, nv, B_TRUE);
nvlist_t **child;
uint_t c, children;
int ret;
@@ -1034,20 +1046,21 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
{
nvlist_t **child;
uint_t c, children;
+ pool_scan_stat_t *ps = NULL;
vdev_stat_t *vs;
- char rbuf[6], wbuf[6], cbuf[6], repaired[7];
+ char rbuf[6], wbuf[6], cbuf[6];
char *vname;
uint64_t notpresent;
spare_cbdata_t cb;
char *state;
- verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
- (uint64_t **)&vs, &c) == 0);
-
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0)
children = 0;
+ verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
+ (uint64_t **)&vs, &c) == 0);
+
state = zpool_state_to_name(vs->vs_state, vs->vs_aux);
if (isspare) {
/*
@@ -1125,31 +1138,43 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
(void) printf(gettext("bad intent log"));
break;
+ case VDEV_AUX_EXTERNAL:
+ (void) printf(gettext("external device fault"));
+ break;
+
+ case VDEV_AUX_SPLIT_POOL:
+ (void) printf(gettext("split into new pool"));
+ break;
+
default:
(void) printf(gettext("corrupted data"));
break;
}
- } else if (vs->vs_scrub_repaired != 0 && children == 0) {
- /*
- * Report bytes resilvered/repaired on leaf devices.
- */
- zfs_nicenum(vs->vs_scrub_repaired, repaired, sizeof (repaired));
- (void) printf(gettext(" %s %s"), repaired,
- (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ?
- "resilvered" : "repaired");
+ }
+
+ (void) nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_SCAN_STATS,
+ (uint64_t **)&ps, &c);
+
+ if (ps && ps->pss_state == DSS_SCANNING &&
+ vs->vs_scan_processed != 0 && children == 0) {
+ (void) printf(gettext(" (%s)"),
+ (ps->pss_func == POOL_SCAN_RESILVER) ?
+ "resilvering" : "repairing");
}
(void) printf("\n");
for (c = 0; c < children; c++) {
- uint64_t is_log = B_FALSE;
+ uint64_t islog = B_FALSE, ishole = B_FALSE;
- /* Don't print logs here */
+ /* Don't print logs or holes here */
(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
- &is_log);
- if (is_log)
+ &islog);
+ (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
+ &ishole);
+ if (islog || ishole)
continue;
- vname = zpool_vdev_name(g_zfs, zhp, child[c]);
+ vname = zpool_vdev_name(g_zfs, zhp, child[c], B_TRUE);
print_status_config(zhp, vname, child[c],
namewidth, depth + 2, isspare);
free(vname);
@@ -1170,10 +1195,11 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
char *type, *vname;
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
- if (strcmp(type, VDEV_TYPE_MISSING) == 0)
+ if (strcmp(type, VDEV_TYPE_MISSING) == 0 ||
+ strcmp(type, VDEV_TYPE_HOLE) == 0)
return;
- verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
+ verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &c) == 0);
(void) printf("\t%*s%-*s", depth, "", namewidth - depth, name);
@@ -1222,7 +1248,7 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
if (is_log)
continue;
- vname = zpool_vdev_name(g_zfs, NULL, child[c]);
+ vname = zpool_vdev_name(g_zfs, NULL, child[c], B_TRUE);
print_import_config(vname, child[c], namewidth, depth + 2);
free(vname);
}
@@ -1231,7 +1257,7 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
&child, &children) == 0) {
(void) printf(gettext("\tcache\n"));
for (c = 0; c < children; c++) {
- vname = zpool_vdev_name(g_zfs, NULL, child[c]);
+ vname = zpool_vdev_name(g_zfs, NULL, child[c], B_FALSE);
(void) printf("\t %s\n", vname);
free(vname);
}
@@ -1241,7 +1267,7 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
&child, &children) == 0) {
(void) printf(gettext("\tspares\n"));
for (c = 0; c < children; c++) {
- vname = zpool_vdev_name(g_zfs, NULL, child[c]);
+ vname = zpool_vdev_name(g_zfs, NULL, child[c], B_FALSE);
(void) printf("\t %s\n", vname);
free(vname);
}
@@ -1276,7 +1302,7 @@ print_logs(zpool_handle_t *zhp, nvlist_t *nv, int namewidth, boolean_t verbose)
&is_log);
if (!is_log)
continue;
- name = zpool_vdev_name(g_zfs, zhp, child[c]);
+ name = zpool_vdev_name(g_zfs, zhp, child[c], B_TRUE);
if (verbose)
print_status_config(zhp, name, child[c], namewidth,
2, B_FALSE);
@@ -1285,6 +1311,7 @@ print_logs(zpool_handle_t *zhp, nvlist_t *nv, int namewidth, boolean_t verbose)
free(name);
}
}
+
/*
* Display the status for the given pool.
*/
@@ -1311,7 +1338,7 @@ show_import(nvlist_t *config)
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0);
- verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
+ verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &vsc) == 0);
health = zpool_state_to_name(vs->vs_state, vs->vs_aux);
@@ -1378,6 +1405,11 @@ show_import(nvlist_t *config)
"read.\n"));
break;
+ case ZPOOL_STATUS_RESILVERING:
+ (void) printf(gettext("status: One or more devices were being "
+ "resilvered.\n"));
+ break;
+
default:
/*
* No other status can be seen when importing pools.
@@ -1471,13 +1503,12 @@ show_import(nvlist_t *config)
*/
static int
do_import(nvlist_t *config, const char *newname, const char *mntopts,
- int force, nvlist_t *props, boolean_t do_verbatim)
+ nvlist_t *props, int flags)
{
zpool_handle_t *zhp;
char *name;
uint64_t state;
uint64_t version;
- int error = 0;
verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
&name) == 0);
@@ -1490,7 +1521,8 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
(void) fprintf(stderr, gettext("cannot import '%s': pool "
"is formatted using a newer ZFS version\n"), name);
return (1);
- } else if (state != POOL_STATE_EXPORTED && !force) {
+ } else if (state != POOL_STATE_EXPORTED &&
+ !(flags & ZFS_IMPORT_ANY_HOST)) {
uint64_t hostid;
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID,
@@ -1524,7 +1556,7 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
}
}
- if (zpool_import_props(g_zfs, config, newname, props, do_verbatim) != 0)
+ if (zpool_import_props(g_zfs, config, newname, props, flags) != 0)
return (1);
if (newname != NULL)
@@ -1534,13 +1566,14 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
return (1);
if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
+ !(flags & ZFS_IMPORT_ONLY) &&
zpool_enable_datasets(zhp, mntopts, 0) != 0) {
zpool_close(zhp);
return (1);
}
zpool_close(zhp);
- return (error);
+ return (0);
}
/*
@@ -1548,7 +1581,7 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
* import [-o mntopts] [-o prop=value] ... [-R root] [-D]
* [-d dir | -c cachefile] [-f] -a
* import [-o mntopts] [-o prop=value] ... [-R root] [-D]
- * [-d dir | -c cachefile] [-f] <pool | id> [newpool]
+ * [-d dir | -c cachefile] [-f] [-n] [-F] <pool | id> [newpool]
*
* -c Read pool information from a cachefile instead of searching
* devices.
@@ -1563,14 +1596,23 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
* the given root. The pool will remain exported when the machine
* is rebooted.
*
- * -f Force import, even if it appears that the pool is active.
- *
- * -F Import even in the presence of faulted vdevs. This is an
+ * -V Import even in the presence of faulted vdevs. This is an
* intentionally undocumented option for testing purposes, and
* treats the pool configuration as complete, leaving any bad
* vdevs in the FAULTED state. In other words, it does verbatim
* import.
*
+ * -f Force import, even if it appears that the pool is active.
+ *
+ * -F Attempt rewind if necessary.
+ *
+ * -n See if rewind would work, but don't actually rewind.
+ *
+ * -N Import the pool but don't mount datasets.
+ *
+ * -T Specify a starting txg to use for import. This option is
+ * intentionally undocumented option for testing purposes.
+ *
* -a Import all pools found.
*
* -o Set property=value and/or temporary mount options (without '=').
@@ -1584,26 +1626,32 @@ zpool_do_import(int argc, char **argv)
char **searchdirs = NULL;
int nsearch = 0;
int c;
- int err;
+ int err = 0;
nvlist_t *pools = NULL;
boolean_t do_all = B_FALSE;
boolean_t do_destroyed = B_FALSE;
char *mntopts = NULL;
- boolean_t do_force = B_FALSE;
nvpair_t *elem;
nvlist_t *config;
uint64_t searchguid = 0;
char *searchname = NULL;
char *propval;
nvlist_t *found_config;
+ nvlist_t *policy = NULL;
nvlist_t *props = NULL;
boolean_t first;
- boolean_t do_verbatim = B_FALSE;
- uint64_t pool_state;
+ int flags = ZFS_IMPORT_NORMAL;
+ uint32_t rewind_policy = ZPOOL_NO_REWIND;
+ boolean_t dryrun = B_FALSE;
+ boolean_t do_rewind = B_FALSE;
+ boolean_t xtreme_rewind = B_FALSE;
+ uint64_t pool_state, txg = -1ULL;
char *cachefile = NULL;
+ importargs_t idata = { 0 };
+ char *endptr;
/* check options */
- while ((c = getopt(argc, argv, ":ac:d:DfFo:p:R:")) != -1) {
+ while ((c = getopt(argc, argv, ":aCc:d:DEfFmnNo:rR:T:VX")) != -1) {
switch (c) {
case 'a':
do_all = B_TRUE;
@@ -1628,10 +1676,19 @@ zpool_do_import(int argc, char **argv)
do_destroyed = B_TRUE;
break;
case 'f':
- do_force = B_TRUE;
+ flags |= ZFS_IMPORT_ANY_HOST;
break;
case 'F':
- do_verbatim = B_TRUE;
+ do_rewind = B_TRUE;
+ break;
+ case 'm':
+ flags |= ZFS_IMPORT_MISSING_LOG;
+ break;
+ case 'n':
+ dryrun = B_TRUE;
+ break;
+ case 'N':
+ flags |= ZFS_IMPORT_ONLY;
break;
case 'o':
if ((propval = strchr(optarg, '=')) != NULL) {
@@ -1656,6 +1713,22 @@ zpool_do_import(int argc, char **argv)
ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE))
goto error;
break;
+ case 'T':
+ errno = 0;
+ txg = strtoull(optarg, &endptr, 10);
+ if (errno != 0 || *endptr != '\0') {
+ (void) fprintf(stderr,
+ gettext("invalid txg value\n"));
+ usage(B_FALSE);
+ }
+ rewind_policy = ZPOOL_DO_REWIND | ZPOOL_EXTREME_REWIND;
+ break;
+ case 'V':
+ flags |= ZFS_IMPORT_VERBATIM;
+ break;
+ case 'X':
+ xtreme_rewind = B_TRUE;
+ break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
@@ -1676,6 +1749,24 @@ zpool_do_import(int argc, char **argv)
usage(B_FALSE);
}
+ if ((dryrun || xtreme_rewind) && !do_rewind) {
+ (void) fprintf(stderr,
+ gettext("-n or -X only meaningful with -F\n"));
+ usage(B_FALSE);
+ }
+ if (dryrun)
+ rewind_policy = ZPOOL_TRY_REWIND;
+ else if (do_rewind)
+ rewind_policy = ZPOOL_DO_REWIND;
+ if (xtreme_rewind)
+ rewind_policy |= ZPOOL_EXTREME_REWIND;
+
+ /* In the future, we can capture further policy and include it here */
+ if (nvlist_alloc(&policy, NV_UNIQUE_NAME, 0) != 0 ||
+ nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, txg) != 0 ||
+ nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind_policy) != 0)
+ goto error;
+
if (searchdirs == NULL) {
searchdirs = safe_malloc(sizeof (char *));
searchdirs[0] = "/dev/dsk";
@@ -1703,6 +1794,7 @@ zpool_do_import(int argc, char **argv)
(void) fprintf(stderr, gettext("cannot "
"discover pools: permission denied\n"));
free(searchdirs);
+ nvlist_free(policy);
return (1);
}
}
@@ -1728,28 +1820,49 @@ zpool_do_import(int argc, char **argv)
if (errno != 0 || *endptr != '\0')
searchname = argv[0];
found_config = NULL;
- }
- if (cachefile) {
- pools = zpool_find_import_cached(g_zfs, cachefile, searchname,
- searchguid);
- } else if (searchname != NULL) {
- pools = zpool_find_import_byname(g_zfs, nsearch, searchdirs,
- searchname);
- } else {
/*
- * It's OK to search by guid even if searchguid is 0.
+ * User specified a name or guid. Ensure it's unique.
*/
- pools = zpool_find_import_byguid(g_zfs, nsearch, searchdirs,
- searchguid);
- }
-
- if (pools == NULL) {
+ idata.unique = B_TRUE;
+ }
+
+
+ idata.path = searchdirs;
+ idata.paths = nsearch;
+ idata.poolname = searchname;
+ idata.guid = searchguid;
+ idata.cachefile = cachefile;
+
+ pools = zpool_search_import(g_zfs, &idata);
+
+ if (pools != NULL && idata.exists &&
+ (argc == 1 || strcmp(argv[0], argv[1]) == 0)) {
+ (void) fprintf(stderr, gettext("cannot import '%s': "
+ "a pool with that name already exists\n"),
+ argv[0]);
+ (void) fprintf(stderr, gettext("use the form '%s "
+ "<pool | id> <newpool>' to give it a new name\n"),
+ "zpool import");
+ err = 1;
+ } else if (pools == NULL && idata.exists) {
+ (void) fprintf(stderr, gettext("cannot import '%s': "
+ "a pool with that name is already created/imported,\n"),
+ argv[0]);
+ (void) fprintf(stderr, gettext("and no additional pools "
+ "with that name were found\n"));
+ err = 1;
+ } else if (pools == NULL) {
if (argc != 0) {
(void) fprintf(stderr, gettext("cannot import '%s': "
"no such pool available\n"), argv[0]);
}
+ err = 1;
+ }
+
+ if (err == 1) {
free(searchdirs);
+ nvlist_free(policy);
return (1);
}
@@ -1773,17 +1886,21 @@ zpool_do_import(int argc, char **argv)
if (do_destroyed && pool_state != POOL_STATE_DESTROYED)
continue;
+ verify(nvlist_add_nvlist(config, ZPOOL_REWIND_POLICY,
+ policy) == 0);
+
if (argc == 0) {
if (first)
first = B_FALSE;
else if (!do_all)
(void) printf("\n");
- if (do_all)
+ if (do_all) {
err |= do_import(config, NULL, mntopts,
- do_force, props, do_verbatim);
- else
+ props, flags);
+ } else {
show_import(config);
+ }
} else if (searchname != NULL) {
char *name;
@@ -1829,7 +1946,7 @@ zpool_do_import(int argc, char **argv)
err = B_TRUE;
} else {
err |= do_import(found_config, argc == 1 ? NULL :
- argv[1], mntopts, do_force, props, do_verbatim);
+ argv[1], mntopts, props, flags);
}
}
@@ -1844,6 +1961,7 @@ zpool_do_import(int argc, char **argv)
error:
nvlist_free(props);
nvlist_free(pools);
+ nvlist_free(policy);
free(searchdirs);
return (err ? 1 : 0);
@@ -1871,7 +1989,7 @@ print_iostat_header(iostat_cbdata_t *cb)
{
(void) printf("%*s capacity operations bandwidth\n",
cb->cb_namewidth, "");
- (void) printf("%-*s used avail read write read write\n",
+ (void) printf("%-*s alloc free read write read write\n",
cb->cb_namewidth, "pool");
print_iostat_separator(cb);
}
@@ -1906,13 +2024,13 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
char *vname;
if (oldnv != NULL) {
- verify(nvlist_lookup_uint64_array(oldnv, ZPOOL_CONFIG_STATS,
- (uint64_t **)&oldvs, &c) == 0);
+ verify(nvlist_lookup_uint64_array(oldnv,
+ ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&oldvs, &c) == 0);
} else {
oldvs = &zerovs;
}
- verify(nvlist_lookup_uint64_array(newnv, ZPOOL_CONFIG_STATS,
+ verify(nvlist_lookup_uint64_array(newnv, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&newvs, &c) == 0);
if (strlen(name) + depth > cb->cb_namewidth)
@@ -1962,7 +2080,13 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
return;
for (c = 0; c < children; c++) {
- vname = zpool_vdev_name(g_zfs, zhp, newchild[c]);
+ uint64_t ishole = B_FALSE;
+
+ if (nvlist_lookup_uint64(newchild[c],
+ ZPOOL_CONFIG_IS_HOLE, &ishole) == 0 && ishole)
+ continue;
+
+ vname = zpool_vdev_name(g_zfs, zhp, newchild[c], B_FALSE);
print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
newchild[c], cb, depth + 2);
free(vname);
@@ -1983,7 +2107,8 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
(void) printf("%-*s - - - - - "
"-\n", cb->cb_namewidth, "cache");
for (c = 0; c < children; c++) {
- vname = zpool_vdev_name(g_zfs, zhp, newchild[c]);
+ vname = zpool_vdev_name(g_zfs, zhp, newchild[c],
+ B_FALSE);
print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
newchild[c], cb, depth + 2);
free(vname);
@@ -2072,42 +2197,14 @@ get_namewidth(zpool_handle_t *zhp, void *data)
}
/*
- * zpool iostat [-v] [pool] ... [interval [count]]
- *
- * -v Display statistics for individual vdevs
- *
- * This command can be tricky because we want to be able to deal with pool
- * creation/destruction as well as vdev configuration changes. The bulk of this
- * processing is handled by the pool_list_* routines in zpool_iter.c. We rely
- * on pool_list_update() to detect the addition of new pools. Configuration
- * changes are all handled within libzfs.
+ * Parse the input string, get the 'interval' and 'count' value if there is one.
*/
-int
-zpool_do_iostat(int argc, char **argv)
+static void
+get_interval_count(int *argcp, char **argv, unsigned long *iv,
+ unsigned long *cnt)
{
- int c;
- int ret;
- int npools;
unsigned long interval = 0, count = 0;
- zpool_list_t *list;
- boolean_t verbose = B_FALSE;
- iostat_cbdata_t cb;
-
- /* check options */
- while ((c = getopt(argc, argv, "v")) != -1) {
- switch (c) {
- case 'v':
- verbose = B_TRUE;
- break;
- case '?':
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- optopt);
- usage(B_FALSE);
- }
- }
-
- argc -= optind;
- argv += optind;
+ int argc = *argcp, errno;
/*
* Determine if the last argument is an integer or a pool name
@@ -2124,7 +2221,6 @@ zpool_do_iostat(int argc, char **argv)
"cannot be zero\n"));
usage(B_FALSE);
}
-
/*
* Ignore the last parameter
*/
@@ -2141,7 +2237,7 @@ zpool_do_iostat(int argc, char **argv)
/*
* If the last argument is also an integer, then we have both a count
- * and an integer.
+ * and an interval.
*/
if (argc > 0 && isdigit(argv[argc - 1][0])) {
char *end;
@@ -2166,6 +2262,66 @@ zpool_do_iostat(int argc, char **argv)
}
}
+ *iv = interval;
+ *cnt = count;
+ *argcp = argc;
+}
+
+static void
+get_timestamp_arg(char c)
+{
+ if (c == 'u')
+ timestamp_fmt = UDATE;
+ else if (c == 'd')
+ timestamp_fmt = DDATE;
+ else
+ usage(B_FALSE);
+}
+
+/*
+ * zpool iostat [-v] [-T d|u] [pool] ... [interval [count]]
+ *
+ * -v Display statistics for individual vdevs
+ * -T Display a timestamp in date(1) or Unix format
+ *
+ * This command can be tricky because we want to be able to deal with pool
+ * creation/destruction as well as vdev configuration changes. The bulk of this
+ * processing is handled by the pool_list_* routines in zpool_iter.c. We rely
+ * on pool_list_update() to detect the addition of new pools. Configuration
+ * changes are all handled within libzfs.
+ */
+int
+zpool_do_iostat(int argc, char **argv)
+{
+ int c;
+ int ret;
+ int npools;
+ unsigned long interval = 0, count = 0;
+ zpool_list_t *list;
+ boolean_t verbose = B_FALSE;
+ iostat_cbdata_t cb;
+
+ /* check options */
+ while ((c = getopt(argc, argv, "T:v")) != -1) {
+ switch (c) {
+ case 'T':
+ get_timestamp_arg(*optarg);
+ break;
+ case 'v':
+ verbose = B_TRUE;
+ break;
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ get_interval_count(&argc, argv, &interval, &count);
+
/*
* Construct the list of all interesting pools.
*/
@@ -2212,6 +2368,9 @@ zpool_do_iostat(int argc, char **argv)
cb.cb_namewidth = 0;
(void) pool_list_iter(list, B_FALSE, get_namewidth, &cb);
+ if (timestamp_fmt != NODATE)
+ print_timestamp(timestamp_fmt);
+
/*
* If it's the first time, or verbose mode, print the header.
*/
@@ -2363,12 +2522,13 @@ list_callback(zpool_handle_t *zhp, void *data)
}
/*
- * zpool list [-H] [-o prop[,prop]*] [pool] ...
+ * zpool list [-H] [-o prop[,prop]*] [-T d|u] [pool] ... [interval [count]]
*
* -H Scripted mode. Don't display headers, and separate properties
* by a single tab.
* -o List of properties to display. Defaults to
- * "name,size,used,available,capacity,health,altroot"
+ * "name,size,allocated,free,capacity,health,altroot"
+ * -T Display a timestamp in date(1) or Unix format
*
* List all pools in the system, whether or not they're healthy. Output space
* statistics for each one, as well as health status summary.
@@ -2380,11 +2540,12 @@ zpool_do_list(int argc, char **argv)
int ret;
list_cbdata_t cb = { 0 };
static char default_props[] =
- "name,size,used,available,capacity,health,altroot";
+ "name,size,allocated,free,capacity,dedupratio,health,altroot";
char *props = default_props;
+ unsigned long interval = 0, count = 0;
/* check options */
- while ((c = getopt(argc, argv, ":Ho:")) != -1) {
+ while ((c = getopt(argc, argv, ":Ho:T:")) != -1) {
switch (c) {
case 'H':
cb.cb_scripted = B_TRUE;
@@ -2392,6 +2553,9 @@ zpool_do_list(int argc, char **argv)
case 'o':
props = optarg;
break;
+ case 'T':
+ get_timestamp_arg(*optarg);
+ break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
@@ -2407,21 +2571,37 @@ zpool_do_list(int argc, char **argv)
argc -= optind;
argv += optind;
+ get_interval_count(&argc, argv, &interval, &count);
+
if (zprop_get_list(g_zfs, props, &cb.cb_proplist, ZFS_TYPE_POOL) != 0)
usage(B_FALSE);
cb.cb_first = B_TRUE;
- ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist,
- list_callback, &cb);
+ for (;;) {
- zprop_free_list(cb.cb_proplist);
+ if (timestamp_fmt != NODATE)
+ print_timestamp(timestamp_fmt);
- if (argc == 0 && cb.cb_first && !cb.cb_scripted) {
- (void) printf(gettext("no pools available\n"));
- return (0);
+ ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist,
+ list_callback, &cb);
+
+ if (argc == 0 && cb.cb_first && !cb.cb_scripted) {
+ (void) printf(gettext("no pools available\n"));
+ zprop_free_list(cb.cb_proplist);
+ return (0);
+ }
+
+ if (interval == 0)
+ break;
+
+ if (count != 0 && --count == 0)
+ break;
+
+ (void) sleep(interval);
}
+ zprop_free_list(cb.cb_proplist);
return (ret);
}
@@ -2436,10 +2616,10 @@ zpool_get_vdev_by_name(nvlist_t *nv, char *name)
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0) {
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
- if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV)-1) == 0)
- name += sizeof(_PATH_DEV)-1;
- if (strncmp(path, _PATH_DEV, sizeof(_PATH_DEV)-1) == 0)
- path += sizeof(_PATH_DEV)-1;
+ if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+ name += sizeof(_PATH_DEV) - 1;
+ if (strncmp(path, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+ path += sizeof(_PATH_DEV) - 1;
if (strcmp(name, path) == 0)
return (nv);
return (NULL);
@@ -2628,6 +2808,146 @@ zpool_do_detach(int argc, char **argv)
}
/*
+ * zpool split [-n] [-o prop=val] ...
+ * [-o mntopt] ...
+ * [-R altroot] <pool> <newpool> [<device> ...]
+ *
+ * -n Do not split the pool, but display the resulting layout if
+ * it were to be split.
+ * -o Set property=value, or set mount options.
+ * -R Mount the split-off pool under an alternate root.
+ *
+ * Splits the named pool and gives it the new pool name. Devices to be split
+ * off may be listed, provided that no more than one device is specified
+ * per top-level vdev mirror. The newly split pool is left in an exported
+ * state unless -R is specified.
+ *
+ * Restrictions: the top-level of the pool pool must only be made up of
+ * mirrors; all devices in the pool must be healthy; no device may be
+ * undergoing a resilvering operation.
+ */
+int
+zpool_do_split(int argc, char **argv)
+{
+ char *srcpool, *newpool, *propval;
+ char *mntopts = NULL;
+ splitflags_t flags;
+ int c, ret = 0;
+ zpool_handle_t *zhp;
+ nvlist_t *config, *props = NULL;
+
+ flags.dryrun = B_FALSE;
+ flags.import = B_FALSE;
+
+ /* check options */
+ while ((c = getopt(argc, argv, ":R:no:")) != -1) {
+ switch (c) {
+ case 'R':
+ flags.import = B_TRUE;
+ if (add_prop_list(
+ zpool_prop_to_name(ZPOOL_PROP_ALTROOT), optarg,
+ &props, B_TRUE) != 0) {
+ if (props)
+ nvlist_free(props);
+ usage(B_FALSE);
+ }
+ break;
+ case 'n':
+ flags.dryrun = B_TRUE;
+ break;
+ case 'o':
+ if ((propval = strchr(optarg, '=')) != NULL) {
+ *propval = '\0';
+ propval++;
+ if (add_prop_list(optarg, propval,
+ &props, B_TRUE) != 0) {
+ if (props)
+ nvlist_free(props);
+ usage(B_FALSE);
+ }
+ } else {
+ mntopts = optarg;
+ }
+ break;
+ case ':':
+ (void) fprintf(stderr, gettext("missing argument for "
+ "'%c' option\n"), optopt);
+ usage(B_FALSE);
+ break;
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ break;
+ }
+ }
+
+ if (!flags.import && mntopts != NULL) {
+ (void) fprintf(stderr, gettext("setting mntopts is only "
+ "valid when importing the pool\n"));
+ usage(B_FALSE);
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 1) {
+ (void) fprintf(stderr, gettext("Missing pool name\n"));
+ usage(B_FALSE);
+ }
+ if (argc < 2) {
+ (void) fprintf(stderr, gettext("Missing new pool name\n"));
+ usage(B_FALSE);
+ }
+
+ srcpool = argv[0];
+ newpool = argv[1];
+
+ argc -= 2;
+ argv += 2;
+
+ if ((zhp = zpool_open(g_zfs, srcpool)) == NULL)
+ return (1);
+
+ config = split_mirror_vdev(zhp, newpool, props, flags, argc, argv);
+ if (config == NULL) {
+ ret = 1;
+ } else {
+ if (flags.dryrun) {
+ (void) printf(gettext("would create '%s' with the "
+ "following layout:\n\n"), newpool);
+ print_vdev_tree(NULL, newpool, config, 0, B_FALSE);
+ }
+ nvlist_free(config);
+ }
+
+ zpool_close(zhp);
+
+ if (ret != 0 || flags.dryrun || !flags.import)
+ return (ret);
+
+ /*
+ * The split was successful. Now we need to open the new
+ * pool and import it.
+ */
+ if ((zhp = zpool_open_canfail(g_zfs, newpool)) == NULL)
+ return (1);
+ if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
+ zpool_enable_datasets(zhp, mntopts, 0) != 0) {
+ ret = 1;
+ (void) fprintf(stderr, gettext("Split was succssful, but "
+ "the datasets could not all be mounted\n"));
+ (void) fprintf(stderr, gettext("Try doing '%s' with a "
+ "different altroot\n"), "zpool import");
+ }
+ zpool_close(zhp);
+
+ return (ret);
+}
+
+
+
+/*
* zpool online <pool> <device> ...
*/
int
@@ -2638,10 +2958,14 @@ zpool_do_online(int argc, char **argv)
zpool_handle_t *zhp;
int ret = 0;
vdev_state_t newstate;
+ int flags = 0;
/* check options */
- while ((c = getopt(argc, argv, "t")) != -1) {
+ while ((c = getopt(argc, argv, "et")) != -1) {
switch (c) {
+ case 'e':
+ flags |= ZFS_ONLINE_EXPAND;
+ break;
case 't':
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
@@ -2669,7 +2993,7 @@ zpool_do_online(int argc, char **argv)
return (1);
for (i = 1; i < argc; i++) {
- if (zpool_vdev_online(zhp, argv[i], 0, &newstate) == 0) {
+ if (zpool_vdev_online(zhp, argv[i], flags, &newstate) == 0) {
if (newstate != VDEV_STATE_HEALTHY) {
(void) printf(gettext("warning: device '%s' "
"onlined, but remains in faulted state\n"),
@@ -2763,31 +3087,80 @@ zpool_do_offline(int argc, char **argv)
int
zpool_do_clear(int argc, char **argv)
{
+ int c;
int ret = 0;
+ boolean_t dryrun = B_FALSE;
+ boolean_t do_rewind = B_FALSE;
+ boolean_t xtreme_rewind = B_FALSE;
+ uint32_t rewind_policy = ZPOOL_NO_REWIND;
+ nvlist_t *policy = NULL;
zpool_handle_t *zhp;
char *pool, *device;
- if (argc < 2) {
+ /* check options */
+ while ((c = getopt(argc, argv, "FnX")) != -1) {
+ switch (c) {
+ case 'F':
+ do_rewind = B_TRUE;
+ break;
+ case 'n':
+ dryrun = B_TRUE;
+ break;
+ case 'X':
+ xtreme_rewind = B_TRUE;
+ break;
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool name\n"));
usage(B_FALSE);
}
- if (argc > 3) {
+ if (argc > 2) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}
- pool = argv[1];
- device = argc == 3 ? argv[2] : NULL;
+ if ((dryrun || xtreme_rewind) && !do_rewind) {
+ (void) fprintf(stderr,
+ gettext("-n or -X only meaningful with -F\n"));
+ usage(B_FALSE);
+ }
+ if (dryrun)
+ rewind_policy = ZPOOL_TRY_REWIND;
+ else if (do_rewind)
+ rewind_policy = ZPOOL_DO_REWIND;
+ if (xtreme_rewind)
+ rewind_policy |= ZPOOL_EXTREME_REWIND;
+
+ /* In future, further rewind policy choices can be passed along here */
+ if (nvlist_alloc(&policy, NV_UNIQUE_NAME, 0) != 0 ||
+ nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind_policy) != 0)
+ return (1);
+
+ pool = argv[0];
+ device = argc == 2 ? argv[1] : NULL;
- if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL)
+ if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) {
+ nvlist_free(policy);
return (1);
+ }
- if (zpool_clear(zhp, device) != 0)
+ if (zpool_clear(zhp, device, policy) != 0)
ret = 1;
zpool_close(zhp);
+ nvlist_free(policy);
+
return (ret);
}
@@ -2812,7 +3185,7 @@ scrub_callback(zpool_handle_t *zhp, void *data)
return (1);
}
- err = zpool_scrub(zhp, cb->cb_type);
+ err = zpool_scan(zhp, cb->cb_type);
return (err != 0);
}
@@ -2828,13 +3201,13 @@ zpool_do_scrub(int argc, char **argv)
int c;
scrub_cbdata_t cb;
- cb.cb_type = POOL_SCRUB_EVERYTHING;
+ cb.cb_type = POOL_SCAN_SCRUB;
/* check options */
while ((c = getopt(argc, argv, "s")) != -1) {
switch (c) {
case 's':
- cb.cb_type = POOL_SCRUB_NONE;
+ cb.cb_type = POOL_SCAN_NONE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
@@ -2862,68 +3235,119 @@ typedef struct status_cbdata {
boolean_t cb_verbose;
boolean_t cb_explain;
boolean_t cb_first;
+ boolean_t cb_dedup_stats;
} status_cbdata_t;
/*
* Print out detailed scrub status.
*/
void
-print_scrub_status(nvlist_t *nvroot)
+print_scan_status(pool_scan_stat_t *ps)
{
- vdev_stat_t *vs;
- uint_t vsc;
- time_t start, end, now;
+ time_t start, end;
+ uint64_t elapsed, mins_left, hours_left;
+ uint64_t pass_exam, examined, total;
+ uint_t rate;
double fraction_done;
- uint64_t examined, total, minutes_left, minutes_taken;
- char *scrub_type;
+ char processed_buf[7], examined_buf[7], total_buf[7], rate_buf[7];
- verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
- (uint64_t **)&vs, &vsc) == 0);
+ (void) printf(gettext(" scan: "));
- /*
- * If there's never been a scrub, there's not much to say.
- */
- if (vs->vs_scrub_end == 0 && vs->vs_scrub_type == POOL_SCRUB_NONE) {
+ /* If there's never been a scan, there's not much to say. */
+ if (ps == NULL || ps->pss_func == POOL_SCAN_NONE ||
+ ps->pss_func >= POOL_SCAN_FUNCS) {
(void) printf(gettext("none requested\n"));
return;
}
- scrub_type = (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ?
- "resilver" : "scrub";
+ start = ps->pss_start_time;
+ end = ps->pss_end_time;
+ zfs_nicenum(ps->pss_processed, processed_buf, sizeof (processed_buf));
- start = vs->vs_scrub_start;
- end = vs->vs_scrub_end;
- now = time(NULL);
- examined = vs->vs_scrub_examined;
- total = vs->vs_alloc;
-
- if (end != 0) {
- minutes_taken = (uint64_t)((end - start) / 60);
-
- (void) printf(gettext("%s %s after %lluh%um with %llu errors "
- "on %s"),
- scrub_type, vs->vs_scrub_complete ? "completed" : "stopped",
+ assert(ps->pss_func == POOL_SCAN_SCRUB ||
+ ps->pss_func == POOL_SCAN_RESILVER);
+ /*
+ * Scan is finished or canceled.
+ */
+ if (ps->pss_state == DSS_FINISHED) {
+ uint64_t minutes_taken = (end - start) / 60;
+ char *fmt;
+
+ if (ps->pss_func == POOL_SCAN_SCRUB) {
+ fmt = gettext("scrub repaired %s in %lluh%um with "
+ "%llu errors on %s");
+ } else if (ps->pss_func == POOL_SCAN_RESILVER) {
+ fmt = gettext("resilvered %s in %lluh%um with "
+ "%llu errors on %s");
+ }
+ /* LINTED */
+ (void) printf(fmt, processed_buf,
(u_longlong_t)(minutes_taken / 60),
(uint_t)(minutes_taken % 60),
- (u_longlong_t)vs->vs_scrub_errors, ctime(&end));
+ (u_longlong_t)ps->pss_errors,
+ ctime((time_t *)&end));
+ return;
+ } else if (ps->pss_state == DSS_CANCELED) {
+ if (ps->pss_func == POOL_SCAN_SCRUB) {
+ (void) printf(gettext("scrub canceled on %s"),
+ ctime(&end));
+ } else if (ps->pss_func == POOL_SCAN_RESILVER) {
+ (void) printf(gettext("resilver canceled on %s"),
+ ctime(&end));
+ }
return;
}
- if (examined == 0)
- examined = 1;
- if (examined > total)
- total = examined;
+ assert(ps->pss_state == DSS_SCANNING);
+
+ /*
+ * Scan is in progress.
+ */
+ if (ps->pss_func == POOL_SCAN_SCRUB) {
+ (void) printf(gettext("scrub in progress since %s"),
+ ctime(&start));
+ } else if (ps->pss_func == POOL_SCAN_RESILVER) {
+ (void) printf(gettext("resilver in progress since %s"),
+ ctime(&start));
+ }
+ examined = ps->pss_examined ? ps->pss_examined : 1;
+ total = ps->pss_to_examine;
fraction_done = (double)examined / total;
- minutes_left = (uint64_t)((now - start) *
- (1 - fraction_done) / fraction_done / 60);
- minutes_taken = (uint64_t)((now - start) / 60);
- (void) printf(gettext("%s in progress for %lluh%um, %.2f%% done, "
- "%lluh%um to go\n"),
- scrub_type, (u_longlong_t)(minutes_taken / 60),
- (uint_t)(minutes_taken % 60), 100 * fraction_done,
- (u_longlong_t)(minutes_left / 60), (uint_t)(minutes_left % 60));
+ /* elapsed time for this pass */
+ elapsed = time(NULL) - ps->pss_pass_start;
+ elapsed = elapsed ? elapsed : 1;
+ pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1;
+ rate = pass_exam / elapsed;
+ rate = rate ? rate : 1;
+ mins_left = ((total - examined) / rate) / 60;
+ hours_left = mins_left / 60;
+
+ zfs_nicenum(examined, examined_buf, sizeof (examined_buf));
+ zfs_nicenum(total, total_buf, sizeof (total_buf));
+ zfs_nicenum(rate, rate_buf, sizeof (rate_buf));
+
+ /*
+ * do not print estimated time if hours_left is more than 30 days
+ */
+ (void) printf(gettext(" %s scanned out of %s at %s/s"),
+ examined_buf, total_buf, rate_buf);
+ if (hours_left < (30 * 24)) {
+ (void) printf(gettext(", %lluh%um to go\n"),
+ (u_longlong_t)hours_left, (uint_t)(mins_left % 60));
+ } else {
+ (void) printf(gettext(
+ ", (scan is slow, no estimated time)\n"));
+ }
+
+ if (ps->pss_func == POOL_SCAN_RESILVER) {
+ (void) printf(gettext(" %s resilvered, %.2f%% done\n"),
+ processed_buf, 100 * fraction_done);
+ } else if (ps->pss_func == POOL_SCAN_SCRUB) {
+ (void) printf(gettext(" %s repaired, %.2f%% done\n"),
+ processed_buf, 100 * fraction_done);
+ }
}
static void
@@ -2974,7 +3398,7 @@ print_spares(zpool_handle_t *zhp, nvlist_t **spares, uint_t nspares,
(void) printf(gettext("\tspares\n"));
for (i = 0; i < nspares; i++) {
- name = zpool_vdev_name(g_zfs, zhp, spares[i]);
+ name = zpool_vdev_name(g_zfs, zhp, spares[i], B_FALSE);
print_status_config(zhp, name, spares[i],
namewidth, 2, B_TRUE);
free(name);
@@ -2994,13 +3418,43 @@ print_l2cache(zpool_handle_t *zhp, nvlist_t **l2cache, uint_t nl2cache,
(void) printf(gettext("\tcache\n"));
for (i = 0; i < nl2cache; i++) {
- name = zpool_vdev_name(g_zfs, zhp, l2cache[i]);
+ name = zpool_vdev_name(g_zfs, zhp, l2cache[i], B_FALSE);
print_status_config(zhp, name, l2cache[i],
namewidth, 2, B_FALSE);
free(name);
}
}
+static void
+print_dedup_stats(nvlist_t *config)
+{
+ ddt_histogram_t *ddh;
+ ddt_stat_t *dds;
+ ddt_object_t *ddo;
+ uint_t c;
+
+ /*
+ * If the pool was faulted then we may not have been able to
+ * obtain the config. Otherwise, if have anything in the dedup
+ * table continue processing the stats.
+ */
+ if (nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_OBJ_STATS,
+ (uint64_t **)&ddo, &c) != 0 || ddo->ddo_count == 0)
+ return;
+
+ (void) printf("\n");
+ (void) printf("DDT entries %llu, size %llu on disk, %llu in core\n",
+ (u_longlong_t)ddo->ddo_count,
+ (u_longlong_t)ddo->ddo_dspace,
+ (u_longlong_t)ddo->ddo_mspace);
+
+ verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_STATS,
+ (uint64_t **)&dds, &c) == 0);
+ verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_HISTOGRAM,
+ (uint64_t **)&ddh, &c) == 0);
+ zpool_dump_ddt(dds, ddh);
+}
+
/*
* Display a summary of pool status. Displays a summary such as:
*
@@ -3053,7 +3507,7 @@ status_callback(zpool_handle_t *zhp, void *data)
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0);
- verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
+ verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &c) == 0);
health = zpool_state_to_name(vs->vs_state, vs->vs_aux);
@@ -3091,8 +3545,8 @@ status_callback(zpool_handle_t *zhp, void *data)
"be used because the label is missing \n\tor invalid. "
"There are insufficient replicas for the pool to "
"continue\n\tfunctioning.\n"));
- (void) printf(gettext("action: Destroy and re-create the pool "
- "from a backup source.\n"));
+ zpool_explain_recover(zpool_get_handle(zhp),
+ zpool_get_name(zhp), reason, config);
break;
case ZPOOL_STATUS_FAILING_DEV:
@@ -3116,6 +3570,16 @@ status_callback(zpool_handle_t *zhp, void *data)
"replace'.\n"));
break;
+ case ZPOOL_STATUS_REMOVED_DEV:
+ (void) printf(gettext("status: One or more devices has "
+ "been removed by the administrator.\n\tSufficient "
+ "replicas exist for the pool to continue functioning in "
+ "a\n\tdegraded state.\n"));
+ (void) printf(gettext("action: Online the device using "
+ "'zpool online' or replace the device with\n\t'zpool "
+ "replace'.\n"));
+ break;
+
case ZPOOL_STATUS_RESILVERING:
(void) printf(gettext("status: One or more devices is "
"currently being resilvered. The pool will\n\tcontinue "
@@ -3136,8 +3600,8 @@ status_callback(zpool_handle_t *zhp, void *data)
case ZPOOL_STATUS_CORRUPT_POOL:
(void) printf(gettext("status: The pool metadata is corrupted "
"and the pool cannot be opened.\n"));
- (void) printf(gettext("action: Destroy and re-create the pool "
- "from a backup source.\n"));
+ zpool_explain_recover(zpool_get_handle(zhp),
+ zpool_get_name(zhp), reason, config);
break;
case ZPOOL_STATUS_VERSION_OLDER:
@@ -3213,10 +3677,11 @@ status_callback(zpool_handle_t *zhp, void *data)
uint64_t nerr;
nvlist_t **spares, **l2cache;
uint_t nspares, nl2cache;
+ pool_scan_stat_t *ps = NULL;
-
- (void) printf(gettext(" scrub: "));
- print_scrub_status(nvroot);
+ (void) nvlist_lookup_uint64_array(nvroot,
+ ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &c);
+ print_scan_status(ps);
namewidth = max_width(zhp, nvroot, 0, 0);
if (namewidth < 10)
@@ -3272,6 +3737,9 @@ status_callback(zpool_handle_t *zhp, void *data)
else
print_error_log(zhp);
}
+
+ if (cbp->cb_dedup_stats)
+ print_dedup_stats(config);
} else {
(void) printf(gettext("config: The configuration cannot be "
"determined.\n"));
@@ -3281,10 +3749,12 @@ status_callback(zpool_handle_t *zhp, void *data)
}
/*
- * zpool status [-vx] [pool] ...
+ * zpool status [-vx] [-T d|u] [pool] ... [interval [count]]
*
* -v Display complete error logs
* -x Display only pools with potential problems
+ * -D Display dedup status (undocumented)
+ * -T Display a timestamp in date(1) or Unix format
*
* Describes the health status of all pools or some subset.
*/
@@ -3293,10 +3763,11 @@ zpool_do_status(int argc, char **argv)
{
int c;
int ret;
+ unsigned long interval = 0, count = 0;
status_cbdata_t cb = { 0 };
/* check options */
- while ((c = getopt(argc, argv, "vx")) != -1) {
+ while ((c = getopt(argc, argv, "vxDT:")) != -1) {
switch (c) {
case 'v':
cb.cb_verbose = B_TRUE;
@@ -3304,6 +3775,12 @@ zpool_do_status(int argc, char **argv)
case 'x':
cb.cb_explain = B_TRUE;
break;
+ case 'D':
+ cb.cb_dedup_stats = B_TRUE;
+ break;
+ case 'T':
+ get_timestamp_arg(*optarg);
+ break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
@@ -3314,19 +3791,38 @@ zpool_do_status(int argc, char **argv)
argc -= optind;
argv += optind;
- cb.cb_first = B_TRUE;
+ get_interval_count(&argc, argv, &interval, &count);
if (argc == 0)
cb.cb_allpools = B_TRUE;
- ret = for_each_pool(argc, argv, B_TRUE, NULL, status_callback, &cb);
+ cb.cb_first = B_TRUE;
- if (argc == 0 && cb.cb_count == 0)
- (void) printf(gettext("no pools available\n"));
- else if (cb.cb_explain && cb.cb_first && cb.cb_allpools)
- (void) printf(gettext("all pools are healthy\n"));
+ for (;;) {
+ if (timestamp_fmt != NODATE)
+ print_timestamp(timestamp_fmt);
- return (ret);
+ ret = for_each_pool(argc, argv, B_TRUE, NULL,
+ status_callback, &cb);
+
+ if (argc == 0 && cb.cb_count == 0)
+ (void) printf(gettext("no pools available\n"));
+ else if (cb.cb_explain && cb.cb_first && cb.cb_allpools)
+ (void) printf(gettext("all pools are healthy\n"));
+
+ if (ret != 0)
+ return (ret);
+
+ if (interval == 0)
+ break;
+
+ if (count != 0 && --count == 0)
+ break;
+
+ (void) sleep(interval);
+ }
+
+ return (0);
}
typedef struct upgrade_cbdata {
@@ -3489,7 +3985,7 @@ zpool_do_upgrade(int argc, char **argv)
/* check options */
- while ((c = getopt(argc, argv, "avV:")) != -1) {
+ while ((c = getopt(argc, argv, ":avV:")) != -1) {
switch (c) {
case 'a':
cb.cb_all = B_TRUE;
@@ -3506,6 +4002,11 @@ zpool_do_upgrade(int argc, char **argv)
usage(B_FALSE);
}
break;
+ case ':':
+ (void) fprintf(stderr, gettext("missing argument for "
+ "'%c' option\n"), optopt);
+ usage(B_FALSE);
+ break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
@@ -3568,11 +4069,25 @@ zpool_do_upgrade(int argc, char **argv)
(void) printf(gettext(" 13 snapused property\n"));
(void) printf(gettext(" 14 passthrough-x aclinherit\n"));
(void) printf(gettext(" 15 user/group space accounting\n"));
- (void) printf(gettext("For more information on a particular "
- "version, including supported releases, see:\n\n"));
- (void) printf("http://www.opensolaris.org/os/community/zfs/"
- "version/N\n\n");
- (void) printf(gettext("Where 'N' is the version number.\n"));
+ (void) printf(gettext(" 16 stmf property support\n"));
+ (void) printf(gettext(" 17 Triple-parity RAID-Z\n"));
+ (void) printf(gettext(" 18 Snapshot user holds\n"));
+ (void) printf(gettext(" 19 Log device removal\n"));
+ (void) printf(gettext(" 20 Compression using zle "
+ "(zero-length encoding)\n"));
+ (void) printf(gettext(" 21 Deduplication\n"));
+ (void) printf(gettext(" 22 Received properties\n"));
+ (void) printf(gettext(" 23 Slim ZIL\n"));
+ (void) printf(gettext(" 24 System attributes\n"));
+ (void) printf(gettext(" 25 Improved scrub stats\n"));
+ (void) printf(gettext(" 26 Improved snapshot deletion "
+ "performance\n"));
+ (void) printf(gettext(" 27 Improved snapshot creation "
+ "performance\n"));
+ (void) printf(gettext(" 28 Multiple vdev replacements\n"));
+ (void) printf(gettext("\nFor more information on a particular "
+ "version, including supported releases,\n"));
+ (void) printf(gettext("see the ZFS Administration Guide.\n\n"));
} else if (argc == 0) {
int notfound;
@@ -3624,47 +4139,6 @@ typedef struct hist_cbdata {
int internal;
} hist_cbdata_t;
-char *hist_event_table[LOG_END] = {
- "invalid event",
- "pool create",
- "vdev add",
- "pool remove",
- "pool destroy",
- "pool export",
- "pool import",
- "vdev attach",
- "vdev replace",
- "vdev detach",
- "vdev online",
- "vdev offline",
- "vdev upgrade",
- "pool clear",
- "pool scrub",
- "pool property set",
- "create",
- "clone",
- "destroy",
- "destroy_begin_sync",
- "inherit",
- "property set",
- "quota set",
- "permission update",
- "permission remove",
- "permission who remove",
- "promote",
- "receive",
- "rename",
- "reservation set",
- "replay_inc_sync",
- "replay_full_sync",
- "rollback",
- "snapshot",
- "filesystem version upgrade",
- "refquota set",
- "refreservation set",
- "pool scrub done",
-};
-
/*
* Print out the command history for a specific pool.
*/
@@ -3722,7 +4196,7 @@ get_history_one(zpool_handle_t *zhp, void *data)
(void) snprintf(internalstr,
sizeof (internalstr),
"[internal %s txg:%lld] %s",
- hist_event_table[ievent], txg,
+ zfs_history_event_names[ievent], txg,
pathstr);
cmdstr = internalstr;
}
@@ -3834,7 +4308,8 @@ get_callback(zpool_handle_t *zhp, void *data)
continue;
zprop_print_one_property(zpool_get_name(zhp), cbp,
- zpool_prop_to_name(pl->pl_prop), value, srctype, NULL);
+ zpool_prop_to_name(pl->pl_prop), value, srctype, NULL,
+ NULL);
}
return (0);
}
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c
index f44da4f..c7a002e 100644
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <errno.h>
#include <libgen.h>
#include <libintl.h>
@@ -51,22 +49,6 @@ safe_malloc(size_t size)
}
/*
- * Same as above, but for strdup()
- */
-char *
-safe_strdup(const char *str)
-{
- char *ret;
-
- if ((ret = strdup(str)) == NULL) {
- (void) fprintf(stderr, "internal error: out of memory\n");
- exit(1);
- }
-
- return (ret);
-}
-
-/*
* Display an out of memory error message and abort the current program.
*/
void
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h
index e82f320..134c730 100644
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef ZPOOL_UTIL_H
@@ -37,7 +36,6 @@ extern "C" {
* Basic utility functions
*/
void *safe_malloc(size_t);
-char *safe_strdup(const char *);
void zpool_no_memory(void);
uint_t num_logs(nvlist_t *nv);
@@ -46,7 +44,9 @@ uint_t num_logs(nvlist_t *nv);
*/
nvlist_t *make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
- boolean_t isreplace, boolean_t dryrun, int argc, char **argv);
+ boolean_t replacing, boolean_t dryrun, int argc, char **argv);
+nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname,
+ nvlist_t *props, splitflags_t flags, int argc, char **argv);
/*
* Pool list functions
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c
index 35a636c..5ffd39a 100644
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -66,6 +65,7 @@
#include <fcntl.h>
#include <libintl.h>
#include <libnvpair.h>
+#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
@@ -77,6 +77,10 @@
#include "zpool_util.h"
+#define DISK_ROOT "/dev/dsk"
+#define RDISK_ROOT "/dev/rdsk"
+#define BACKUP_SLICE "s2"
+
/*
* For any given vdev specification, we can have multiple errors. The
* vdev_error() function keeps track of whether we have seen an error yet, and
@@ -107,6 +111,170 @@ vdev_error(const char *fmt, ...)
va_end(ap);
}
+#ifdef sun
+static void
+libdiskmgt_error(int error)
+{
+ /*
+ * ENXIO/ENODEV is a valid error message if the device doesn't live in
+ * /dev/dsk. Don't bother printing an error message in this case.
+ */
+ if (error == ENXIO || error == ENODEV)
+ return;
+
+ (void) fprintf(stderr, gettext("warning: device in use checking "
+ "failed: %s\n"), strerror(error));
+}
+
+/*
+ * Validate a device, passing the bulk of the work off to libdiskmgt.
+ */
+static int
+check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare)
+{
+ char *msg;
+ int error = 0;
+ dm_who_type_t who;
+
+ if (force)
+ who = DM_WHO_ZPOOL_FORCE;
+ else if (isspare)
+ who = DM_WHO_ZPOOL_SPARE;
+ else
+ who = DM_WHO_ZPOOL;
+
+ if (dm_inuse((char *)path, &msg, who, &error) || error) {
+ if (error != 0) {
+ libdiskmgt_error(error);
+ return (0);
+ } else {
+ vdev_error("%s", msg);
+ free(msg);
+ return (-1);
+ }
+ }
+
+ /*
+ * If we're given a whole disk, ignore overlapping slices since we're
+ * about to label it anyway.
+ */
+ error = 0;
+ if (!wholedisk && !force &&
+ (dm_isoverlapping((char *)path, &msg, &error) || error)) {
+ if (error == 0) {
+ /* dm_isoverlapping returned -1 */
+ vdev_error(gettext("%s overlaps with %s\n"), path, msg);
+ free(msg);
+ return (-1);
+ } else if (error != ENODEV) {
+ /* libdiskmgt's devcache only handles physical drives */
+ libdiskmgt_error(error);
+ return (0);
+ }
+ }
+
+ return (0);
+}
+
+
+/*
+ * Validate a whole disk. Iterate over all slices on the disk and make sure
+ * that none is in use by calling check_slice().
+ */
+static int
+check_disk(const char *name, dm_descriptor_t disk, int force, int isspare)
+{
+ dm_descriptor_t *drive, *media, *slice;
+ int err = 0;
+ int i;
+ int ret;
+
+ /*
+ * Get the drive associated with this disk. This should never fail,
+ * because we already have an alias handle open for the device.
+ */
+ if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE,
+ &err)) == NULL || *drive == NULL) {
+ if (err)
+ libdiskmgt_error(err);
+ return (0);
+ }
+
+ if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA,
+ &err)) == NULL) {
+ dm_free_descriptors(drive);
+ if (err)
+ libdiskmgt_error(err);
+ return (0);
+ }
+
+ dm_free_descriptors(drive);
+
+ /*
+ * It is possible that the user has specified a removable media drive,
+ * and the media is not present.
+ */
+ if (*media == NULL) {
+ dm_free_descriptors(media);
+ vdev_error(gettext("'%s' has no media in drive\n"), name);
+ return (-1);
+ }
+
+ if ((slice = dm_get_associated_descriptors(*media, DM_SLICE,
+ &err)) == NULL) {
+ dm_free_descriptors(media);
+ if (err)
+ libdiskmgt_error(err);
+ return (0);
+ }
+
+ dm_free_descriptors(media);
+
+ ret = 0;
+
+ /*
+ * Iterate over all slices and report any errors. We don't care about
+ * overlapping slices because we are using the whole disk.
+ */
+ for (i = 0; slice[i] != NULL; i++) {
+ char *name = dm_get_name(slice[i], &err);
+
+ if (check_slice(name, force, B_TRUE, isspare) != 0)
+ ret = -1;
+
+ dm_free_name(name);
+ }
+
+ dm_free_descriptors(slice);
+ return (ret);
+}
+
+/*
+ * Validate a device.
+ */
+static int
+check_device(const char *path, boolean_t force, boolean_t isspare)
+{
+ dm_descriptor_t desc;
+ int err;
+ char *dev;
+
+ /*
+ * For whole disks, libdiskmgt does not include the leading dev path.
+ */
+ dev = strrchr(path, '/');
+ assert(dev != NULL);
+ dev++;
+ if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) {
+ err = check_disk(path, desc, force, isspare);
+ dm_free_descriptor(desc);
+ return (err);
+ }
+
+ return (check_slice(path, force, B_FALSE, isspare));
+}
+#endif /* sun */
+
/*
* Check that a file is valid. All we can do in this case is check that it's
* not in use by another pool, and not in use by swap.
@@ -121,7 +289,7 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
pool_state_t state;
boolean_t inuse;
-#if 0
+#ifdef sun
if (dm_inuse_swap(file, &err)) {
if (err)
libdiskmgt_error(err);
@@ -185,7 +353,7 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
}
static int
-check_provider(const char *name, boolean_t force, boolean_t isspare)
+check_device(const char *name, boolean_t force, boolean_t isspare)
{
char path[MAXPATHLEN];
@@ -206,24 +374,44 @@ check_provider(const char *name, boolean_t force, boolean_t isspare)
* it isn't.
*/
static boolean_t
-is_whole_disk(const char *name)
+is_whole_disk(const char *arg)
{
+#ifdef sun
+ struct dk_gpt *label;
+ int fd;
+ char path[MAXPATHLEN];
+
+ (void) snprintf(path, sizeof (path), "%s%s%s",
+ RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE);
+ if ((fd = open(path, O_RDWR | O_NDELAY)) < 0)
+ return (B_FALSE);
+ if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
+ (void) close(fd);
+ return (B_FALSE);
+ }
+ efi_free(label);
+ (void) close(fd);
+ return (B_TRUE);
+#else
int fd;
- fd = g_open(name, 0);
+ fd = g_open(arg, 0);
if (fd >= 0) {
g_close(fd);
return (B_TRUE);
}
return (B_FALSE);
+#endif
}
/*
- * Create a leaf vdev. Determine if this is a GEOM provider.
- * Valid forms for a leaf vdev are:
+ * Create a leaf vdev. Determine if this is a file or a device. If it's a
+ * device, fill in the device id to make a complete nvlist. Valid forms for a
+ * leaf vdev are:
*
- * /dev/xxx Complete path to a GEOM provider
- * xxx Shorthand for /dev/xxx
+ * /dev/dsk/xxx Complete disk path
+ * /xxx Full path to file
+ * xxx Shorthand for /dev/dsk/xxx
*/
static nvlist_t *
make_leaf_vdev(const char *arg, uint64_t is_log)
@@ -290,10 +478,18 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
}
}
+#ifdef __FreeBSD__
+ if (S_ISCHR(statbuf.st_mode)) {
+ statbuf.st_mode &= ~S_IFCHR;
+ statbuf.st_mode |= S_IFBLK;
+ wholedisk = B_FALSE;
+ }
+#endif
+
/*
* Determine whether this is a device or a file.
*/
- if (wholedisk) {
+ if (wholedisk || S_ISBLK(statbuf.st_mode)) {
type = VDEV_TYPE_DISK;
} else if (S_ISREG(statbuf.st_mode)) {
type = VDEV_TYPE_FILE;
@@ -314,12 +510,12 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0);
if (strcmp(type, VDEV_TYPE_DISK) == 0)
verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
- (uint64_t)B_FALSE) == 0);
+ (uint64_t)wholedisk) == 0);
/*
* For a whole disk, defer getting its devid until after labeling it.
*/
- if (1 || (S_ISBLK(statbuf.st_mode) && !wholedisk)) {
+ if (S_ISBLK(statbuf.st_mode) && !wholedisk) {
/*
* Get the devid for the device.
*/
@@ -527,16 +723,14 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
*/
if ((fd = open(path, O_RDONLY)) >= 0) {
err = fstat64(fd, &statbuf);
- if (err == 0 &&
- S_ISCHR(statbuf.st_mode)) {
- err = ioctl(fd, DIOCGMEDIASIZE,
- &statbuf.st_size);
- }
(void) close(fd);
} else {
err = stat64(path, &statbuf);
}
- if (err != 0 || statbuf.st_size == 0)
+
+ if (err != 0 ||
+ statbuf.st_size == 0 ||
+ statbuf.st_size == MAXOFFSET_T)
continue;
size = statbuf.st_size;
@@ -714,6 +908,112 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
return (ret);
}
+#ifdef sun
+/*
+ * Go through and find any whole disks in the vdev specification, labelling them
+ * as appropriate. When constructing the vdev spec, we were unable to open this
+ * device in order to provide a devid. Now that we have labelled the disk and
+ * know that slice 0 is valid, we can construct the devid now.
+ *
+ * If the disk was already labeled with an EFI label, we will have gotten the
+ * devid already (because we were able to open the whole disk). Otherwise, we
+ * need to get the devid after we label the disk.
+ */
+static int
+make_disks(zpool_handle_t *zhp, nvlist_t *nv)
+{
+ nvlist_t **child;
+ uint_t c, children;
+ char *type, *path, *diskname;
+ char buf[MAXPATHLEN];
+ uint64_t wholedisk;
+ int fd;
+ int ret;
+ ddi_devid_t devid;
+ char *minor = NULL, *devid_str = NULL;
+
+ verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
+
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) != 0) {
+
+ if (strcmp(type, VDEV_TYPE_DISK) != 0)
+ return (0);
+
+ /*
+ * We have a disk device. Get the path to the device
+ * and see if it's a whole disk by appending the backup
+ * slice and stat()ing the device.
+ */
+ verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
+ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+ &wholedisk) != 0 || !wholedisk)
+ return (0);
+
+ diskname = strrchr(path, '/');
+ assert(diskname != NULL);
+ diskname++;
+ if (zpool_label_disk(g_zfs, zhp, diskname) == -1)
+ return (-1);
+
+ /*
+ * Fill in the devid, now that we've labeled the disk.
+ */
+ (void) snprintf(buf, sizeof (buf), "%ss0", path);
+ if ((fd = open(buf, O_RDONLY)) < 0) {
+ (void) fprintf(stderr,
+ gettext("cannot open '%s': %s\n"),
+ buf, strerror(errno));
+ return (-1);
+ }
+
+ if (devid_get(fd, &devid) == 0) {
+ if (devid_get_minor_name(fd, &minor) == 0 &&
+ (devid_str = devid_str_encode(devid, minor)) !=
+ NULL) {
+ verify(nvlist_add_string(nv,
+ ZPOOL_CONFIG_DEVID, devid_str) == 0);
+ }
+ if (devid_str != NULL)
+ devid_str_free(devid_str);
+ if (minor != NULL)
+ devid_str_free(minor);
+ devid_free(devid);
+ }
+
+ /*
+ * Update the path to refer to the 's0' slice. The presence of
+ * the 'whole_disk' field indicates to the CLI that we should
+ * chop off the slice number when displaying the device in
+ * future output.
+ */
+ verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0);
+
+ (void) close(fd);
+
+ return (0);
+ }
+
+ for (c = 0; c < children; c++)
+ if ((ret = make_disks(zhp, child[c])) != 0)
+ return (ret);
+
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+ &child, &children) == 0)
+ for (c = 0; c < children; c++)
+ if ((ret = make_disks(zhp, child[c])) != 0)
+ return (ret);
+
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
+ &child, &children) == 0)
+ for (c = 0; c < children; c++)
+ if ((ret = make_disks(zhp, child[c])) != 0)
+ return (ret);
+
+ return (0);
+}
+#endif /* sun */
+
/*
* Determine if the given path is a hot spare within the given configuration.
*/
@@ -742,8 +1042,8 @@ is_spare(nvlist_t *config, const char *path)
return (B_FALSE);
}
free(name);
-
(void) close(fd);
+
verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
nvlist_free(label);
@@ -767,8 +1067,8 @@ is_spare(nvlist_t *config, const char *path)
* the majority of this task.
*/
static int
-check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
- int isspare)
+check_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
+ boolean_t replacing, boolean_t isspare)
{
nvlist_t **child;
uint_t c, children;
@@ -789,14 +1089,22 @@ check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
* hot spare within the same pool. If so, we allow it
* regardless of what libdiskmgt or zpool_in_use() says.
*/
- if (isreplacing) {
- (void) strlcpy(buf, path, sizeof (buf));
+ if (replacing) {
+#ifdef sun
+ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+ &wholedisk) == 0 && wholedisk)
+ (void) snprintf(buf, sizeof (buf), "%ss0",
+ path);
+ else
+#endif
+ (void) strlcpy(buf, path, sizeof (buf));
+
if (is_spare(config, buf))
return (0);
}
if (strcmp(type, VDEV_TYPE_DISK) == 0)
- ret = check_provider(path, force, isspare);
+ ret = check_device(path, force, isspare);
if (strcmp(type, VDEV_TYPE_FILE) == 0)
ret = check_file(path, force, isspare);
@@ -806,41 +1114,56 @@ check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
for (c = 0; c < children; c++)
if ((ret = check_in_use(config, child[c], force,
- isreplacing, B_FALSE)) != 0)
+ replacing, B_FALSE)) != 0)
return (ret);
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
&child, &children) == 0)
for (c = 0; c < children; c++)
if ((ret = check_in_use(config, child[c], force,
- isreplacing, B_TRUE)) != 0)
+ replacing, B_TRUE)) != 0)
return (ret);
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
&child, &children) == 0)
for (c = 0; c < children; c++)
if ((ret = check_in_use(config, child[c], force,
- isreplacing, B_FALSE)) != 0)
+ replacing, B_FALSE)) != 0)
return (ret);
return (0);
}
static const char *
-is_grouping(const char *type, int *mindev)
+is_grouping(const char *type, int *mindev, int *maxdev)
{
- if (strcmp(type, "raidz") == 0 || strcmp(type, "raidz1") == 0) {
- if (mindev != NULL)
- *mindev = 2;
- return (VDEV_TYPE_RAIDZ);
- }
+ if (strncmp(type, "raidz", 5) == 0) {
+ const char *p = type + 5;
+ char *end;
+ long nparity;
+
+ if (*p == '\0') {
+ nparity = 1;
+ } else if (*p == '0') {
+ return (NULL); /* no zero prefixes allowed */
+ } else {
+ errno = 0;
+ nparity = strtol(p, &end, 10);
+ if (errno != 0 || nparity < 1 || nparity >= 255 ||
+ *end != '\0')
+ return (NULL);
+ }
- if (strcmp(type, "raidz2") == 0) {
if (mindev != NULL)
- *mindev = 3;
+ *mindev = nparity + 1;
+ if (maxdev != NULL)
+ *maxdev = 255;
return (VDEV_TYPE_RAIDZ);
}
+ if (maxdev != NULL)
+ *maxdev = INT_MAX;
+
if (strcmp(type, "mirror") == 0) {
if (mindev != NULL)
*mindev = 2;
@@ -878,7 +1201,7 @@ nvlist_t *
construct_spec(int argc, char **argv)
{
nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
- int t, toplevels, mindev, nspares, nlogs, nl2cache;
+ int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache;
const char *type;
uint64_t is_log;
boolean_t seen_logs;
@@ -900,7 +1223,7 @@ construct_spec(int argc, char **argv)
* If it's a mirror or raidz, the subsequent arguments are
* its leaves -- until we encounter the next mirror or raidz.
*/
- if ((type = is_grouping(argv[0], &mindev)) != NULL) {
+ if ((type = is_grouping(argv[0], &mindev, &maxdev)) != NULL) {
nvlist_t **child = NULL;
int c, children = 0;
@@ -957,7 +1280,7 @@ construct_spec(int argc, char **argv)
}
for (c = 1; c < argc; c++) {
- if (is_grouping(argv[c], NULL) != NULL)
+ if (is_grouping(argv[c], NULL, NULL) != NULL)
break;
children++;
child = realloc(child,
@@ -977,6 +1300,13 @@ construct_spec(int argc, char **argv)
return (NULL);
}
+ if (children > maxdev) {
+ (void) fprintf(stderr, gettext("invalid vdev "
+ "specification: %s supports no more than "
+ "%d devices\n"), argv[0], maxdev);
+ return (NULL);
+ }
+
argc -= c;
argv += c;
@@ -1071,6 +1401,54 @@ construct_spec(int argc, char **argv)
return (nvroot);
}
+nvlist_t *
+split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
+ splitflags_t flags, int argc, char **argv)
+{
+ nvlist_t *newroot = NULL, **child;
+ uint_t c, children;
+
+ if (argc > 0) {
+ if ((newroot = construct_spec(argc, argv)) == NULL) {
+ (void) fprintf(stderr, gettext("Unable to build a "
+ "pool from the specified devices\n"));
+ return (NULL);
+ }
+
+#ifdef sun
+ if (!flags.dryrun && make_disks(zhp, newroot) != 0) {
+ nvlist_free(newroot);
+ return (NULL);
+ }
+#endif
+
+ /* avoid any tricks in the spec */
+ verify(nvlist_lookup_nvlist_array(newroot,
+ ZPOOL_CONFIG_CHILDREN, &child, &children) == 0);
+ for (c = 0; c < children; c++) {
+ char *path;
+ const char *type;
+ int min, max;
+
+ verify(nvlist_lookup_string(child[c],
+ ZPOOL_CONFIG_PATH, &path) == 0);
+ if ((type = is_grouping(path, &min, &max)) != NULL) {
+ (void) fprintf(stderr, gettext("Cannot use "
+ "'%s' as a device for splitting\n"), type);
+ nvlist_free(newroot);
+ return (NULL);
+ }
+ }
+ }
+
+ if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) {
+ if (newroot != NULL)
+ nvlist_free(newroot);
+ return (NULL);
+ }
+
+ return (newroot);
+}
/*
* Get and validate the contents of the given vdev specification. This ensures
@@ -1084,7 +1462,7 @@ construct_spec(int argc, char **argv)
*/
nvlist_t *
make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
- boolean_t isreplacing, boolean_t dryrun, int argc, char **argv)
+ boolean_t replacing, boolean_t dryrun, int argc, char **argv)
{
nvlist_t *newroot;
nvlist_t *poolconfig = NULL;
@@ -1107,8 +1485,7 @@ make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
* uses (such as a dedicated dump device) that even '-f' cannot
* override.
*/
- if (check_in_use(poolconfig, newroot, force, isreplacing,
- B_FALSE) != 0) {
+ if (check_in_use(poolconfig, newroot, force, replacing, B_FALSE) != 0) {
nvlist_free(newroot);
return (NULL);
}
@@ -1123,5 +1500,15 @@ make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
return (NULL);
}
+#ifdef sun
+ /*
+ * Run through the vdev specification and label any whole disks found.
+ */
+ if (!dryrun && make_disks(zhp, newroot) != 0) {
+ nvlist_free(newroot);
+ return (NULL);
+ }
+#endif
+
return (newroot);
}
diff --git a/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.1 b/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.1
new file mode 100644
index 0000000..9e11948
--- /dev/null
+++ b/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.1
@@ -0,0 +1,67 @@
+'\" te
+.\" Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved
+.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with
+.\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
+.TH zstreamdump 1M "21 Sep 2009" "SunOS 5.11" "System Administration Commands"
+.SH NAME
+zstreamdump \- filter data in zfs send stream
+.SH SYNOPSIS
+.LP
+.nf
+\fBzstreamdump\fR [\fB-C\fR] [\fB-v\fR]
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+The \fBzstreamdump\fR utility reads from the output of the \fBzfs send\fR command, then displays headers and some statistics from that output. See \fBzfs\fR(1M).
+.SH OPTIONS
+.sp
+.LP
+The following options are supported:
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-C\fR\fR
+.ad
+.sp .6
+.RS 4n
+Suppress the validation of checksums.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-v\fR\fR
+.ad
+.sp .6
+.RS 4n
+Verbose. Dump all headers, not only begin and end headers.
+.RE
+
+.SH ATTRIBUTES
+.sp
+.LP
+See \fBattributes\fR(5) for descriptions of the following attributes:
+.sp
+
+.sp
+.TS
+tab() box;
+cw(2.75i) |cw(2.75i)
+lw(2.75i) |lw(2.75i)
+.
+ATTRIBUTE TYPEATTRIBUTE VALUE
+_
+AvailabilitySUNWzfsu
+_
+Interface StabilityUncommitted
+.TE
+
+.SH SEE ALSO
+.sp
+.LP
+\fBzfs\fR(1M), \fBattributes\fR(5)
diff --git a/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c b/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
new file mode 100644
index 0000000..df23cc1
--- /dev/null
+++ b/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
@@ -0,0 +1,429 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <libnvpair.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include <sys/dmu.h>
+#include <sys/zfs_ioctl.h>
+#include <zfs_fletcher.h>
+
+uint64_t drr_record_count[DRR_NUMTYPES];
+uint64_t total_write_size = 0;
+uint64_t total_stream_len = 0;
+FILE *send_stream = 0;
+boolean_t do_byteswap = B_FALSE;
+boolean_t do_cksum = B_TRUE;
+#define INITIAL_BUFLEN (1<<20)
+
+static void
+usage(void)
+{
+ (void) fprintf(stderr, "usage: zstreamdump [-v] [-C] < file\n");
+ (void) fprintf(stderr, "\t -v -- verbose\n");
+ (void) fprintf(stderr, "\t -C -- suppress checksum verification\n");
+ exit(1);
+}
+
+/*
+ * ssread - send stream read.
+ *
+ * Read while computing incremental checksum
+ */
+
+static size_t
+ssread(void *buf, size_t len, zio_cksum_t *cksum)
+{
+ size_t outlen;
+
+ if ((outlen = fread(buf, len, 1, send_stream)) == 0)
+ return (0);
+
+ if (do_cksum && cksum) {
+ if (do_byteswap)
+ fletcher_4_incremental_byteswap(buf, len, cksum);
+ else
+ fletcher_4_incremental_native(buf, len, cksum);
+ }
+ total_stream_len += len;
+ return (outlen);
+}
+
+int
+main(int argc, char *argv[])
+{
+ char *buf = malloc(INITIAL_BUFLEN);
+ dmu_replay_record_t thedrr;
+ dmu_replay_record_t *drr = &thedrr;
+ struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
+ struct drr_end *drre = &thedrr.drr_u.drr_end;
+ struct drr_object *drro = &thedrr.drr_u.drr_object;
+ struct drr_freeobjects *drrfo = &thedrr.drr_u.drr_freeobjects;
+ struct drr_write *drrw = &thedrr.drr_u.drr_write;
+ struct drr_write_byref *drrwbr = &thedrr.drr_u.drr_write_byref;
+ struct drr_free *drrf = &thedrr.drr_u.drr_free;
+ struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
+ char c;
+ boolean_t verbose = B_FALSE;
+ boolean_t first = B_TRUE;
+ int err;
+ zio_cksum_t zc = { 0 };
+ zio_cksum_t pcksum = { 0 };
+
+ while ((c = getopt(argc, argv, ":vC")) != -1) {
+ switch (c) {
+ case 'C':
+ do_cksum = B_FALSE;
+ break;
+ case 'v':
+ verbose = B_TRUE;
+ break;
+ case ':':
+ (void) fprintf(stderr,
+ "missing argument for '%c' option\n", optopt);
+ usage();
+ break;
+ case '?':
+ (void) fprintf(stderr, "invalid option '%c'\n",
+ optopt);
+ usage();
+ }
+ }
+
+ if (isatty(STDIN_FILENO)) {
+ (void) fprintf(stderr,
+ "Error: Backup stream can not be read "
+ "from a terminal.\n"
+ "You must redirect standard input.\n");
+ exit(1);
+ }
+
+ send_stream = stdin;
+ pcksum = zc;
+ while (ssread(drr, sizeof (dmu_replay_record_t), &zc)) {
+
+ if (first) {
+ if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
+ do_byteswap = B_TRUE;
+ if (do_cksum) {
+ ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
+ /*
+ * recalculate header checksum now
+ * that we know it needs to be
+ * byteswapped.
+ */
+ fletcher_4_incremental_byteswap(drr,
+ sizeof (dmu_replay_record_t), &zc);
+ }
+ } else if (drrb->drr_magic != DMU_BACKUP_MAGIC) {
+ (void) fprintf(stderr, "Invalid stream "
+ "(bad magic number)\n");
+ exit(1);
+ }
+ first = B_FALSE;
+ }
+ if (do_byteswap) {
+ drr->drr_type = BSWAP_32(drr->drr_type);
+ drr->drr_payloadlen =
+ BSWAP_32(drr->drr_payloadlen);
+ }
+
+ /*
+ * At this point, the leading fields of the replay record
+ * (drr_type and drr_payloadlen) have been byte-swapped if
+ * necessary, but the rest of the data structure (the
+ * union of type-specific structures) is still in its
+ * original state.
+ */
+ if (drr->drr_type >= DRR_NUMTYPES) {
+ (void) printf("INVALID record found: type 0x%x\n",
+ drr->drr_type);
+ (void) printf("Aborting.\n");
+ exit(1);
+ }
+
+ drr_record_count[drr->drr_type]++;
+
+ switch (drr->drr_type) {
+ case DRR_BEGIN:
+ if (do_byteswap) {
+ drrb->drr_magic = BSWAP_64(drrb->drr_magic);
+ drrb->drr_versioninfo =
+ BSWAP_64(drrb->drr_versioninfo);
+ drrb->drr_creation_time =
+ BSWAP_64(drrb->drr_creation_time);
+ drrb->drr_type = BSWAP_32(drrb->drr_type);
+ drrb->drr_flags = BSWAP_32(drrb->drr_flags);
+ drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
+ drrb->drr_fromguid =
+ BSWAP_64(drrb->drr_fromguid);
+ }
+
+ (void) printf("BEGIN record\n");
+ (void) printf("\thdrtype = %lld\n",
+ DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo));
+ (void) printf("\tfeatures = %llx\n",
+ DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo));
+ (void) printf("\tmagic = %llx\n",
+ (u_longlong_t)drrb->drr_magic);
+ (void) printf("\tcreation_time = %llx\n",
+ (u_longlong_t)drrb->drr_creation_time);
+ (void) printf("\ttype = %u\n", drrb->drr_type);
+ (void) printf("\tflags = 0x%x\n", drrb->drr_flags);
+ (void) printf("\ttoguid = %llx\n",
+ (u_longlong_t)drrb->drr_toguid);
+ (void) printf("\tfromguid = %llx\n",
+ (u_longlong_t)drrb->drr_fromguid);
+ (void) printf("\ttoname = %s\n", drrb->drr_toname);
+ if (verbose)
+ (void) printf("\n");
+
+ if ((DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
+ DMU_COMPOUNDSTREAM) && drr->drr_payloadlen != 0) {
+ nvlist_t *nv;
+ int sz = drr->drr_payloadlen;
+
+ if (sz > 1<<20) {
+ free(buf);
+ buf = malloc(sz);
+ }
+ (void) ssread(buf, sz, &zc);
+ if (ferror(send_stream))
+ perror("fread");
+ err = nvlist_unpack(buf, sz, &nv, 0);
+ if (err)
+ perror(strerror(err));
+ nvlist_print(stdout, nv);
+ nvlist_free(nv);
+ }
+ break;
+
+ case DRR_END:
+ if (do_byteswap) {
+ drre->drr_checksum.zc_word[0] =
+ BSWAP_64(drre->drr_checksum.zc_word[0]);
+ drre->drr_checksum.zc_word[1] =
+ BSWAP_64(drre->drr_checksum.zc_word[1]);
+ drre->drr_checksum.zc_word[2] =
+ BSWAP_64(drre->drr_checksum.zc_word[2]);
+ drre->drr_checksum.zc_word[3] =
+ BSWAP_64(drre->drr_checksum.zc_word[3]);
+ }
+ /*
+ * We compare against the *previous* checksum
+ * value, because the stored checksum is of
+ * everything before the DRR_END record.
+ */
+ if (do_cksum && !ZIO_CHECKSUM_EQUAL(drre->drr_checksum,
+ pcksum)) {
+ (void) printf("Expected checksum differs from "
+ "checksum in stream.\n");
+ (void) printf("Expected checksum = "
+ "%llx/%llx/%llx/%llx\n",
+ pcksum.zc_word[0],
+ pcksum.zc_word[1],
+ pcksum.zc_word[2],
+ pcksum.zc_word[3]);
+ }
+ (void) printf("END checksum = %llx/%llx/%llx/%llx\n",
+ drre->drr_checksum.zc_word[0],
+ drre->drr_checksum.zc_word[1],
+ drre->drr_checksum.zc_word[2],
+ drre->drr_checksum.zc_word[3]);
+
+ ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
+ break;
+
+ case DRR_OBJECT:
+ if (do_byteswap) {
+ drro->drr_object = BSWAP_64(drro->drr_object);
+ drro->drr_type = BSWAP_32(drro->drr_type);
+ drro->drr_bonustype =
+ BSWAP_32(drro->drr_bonustype);
+ drro->drr_blksz = BSWAP_32(drro->drr_blksz);
+ drro->drr_bonuslen =
+ BSWAP_32(drro->drr_bonuslen);
+ drro->drr_toguid = BSWAP_64(drro->drr_toguid);
+ }
+ if (verbose) {
+ (void) printf("OBJECT object = %llu type = %u "
+ "bonustype = %u blksz = %u bonuslen = %u\n",
+ (u_longlong_t)drro->drr_object,
+ drro->drr_type,
+ drro->drr_bonustype,
+ drro->drr_blksz,
+ drro->drr_bonuslen);
+ }
+ if (drro->drr_bonuslen > 0) {
+ (void) ssread(buf, P2ROUNDUP(drro->drr_bonuslen,
+ 8), &zc);
+ }
+ break;
+
+ case DRR_FREEOBJECTS:
+ if (do_byteswap) {
+ drrfo->drr_firstobj =
+ BSWAP_64(drrfo->drr_firstobj);
+ drrfo->drr_numobjs =
+ BSWAP_64(drrfo->drr_numobjs);
+ drrfo->drr_toguid = BSWAP_64(drrfo->drr_toguid);
+ }
+ if (verbose) {
+ (void) printf("FREEOBJECTS firstobj = %llu "
+ "numobjs = %llu\n",
+ (u_longlong_t)drrfo->drr_firstobj,
+ (u_longlong_t)drrfo->drr_numobjs);
+ }
+ break;
+
+ case DRR_WRITE:
+ if (do_byteswap) {
+ drrw->drr_object = BSWAP_64(drrw->drr_object);
+ drrw->drr_type = BSWAP_32(drrw->drr_type);
+ drrw->drr_offset = BSWAP_64(drrw->drr_offset);
+ drrw->drr_length = BSWAP_64(drrw->drr_length);
+ drrw->drr_toguid = BSWAP_64(drrw->drr_toguid);
+ drrw->drr_key.ddk_prop =
+ BSWAP_64(drrw->drr_key.ddk_prop);
+ }
+ if (verbose) {
+ (void) printf("WRITE object = %llu type = %u "
+ "checksum type = %u\n"
+ "offset = %llu length = %llu "
+ "props = %llx\n",
+ (u_longlong_t)drrw->drr_object,
+ drrw->drr_type,
+ drrw->drr_checksumtype,
+ (u_longlong_t)drrw->drr_offset,
+ (u_longlong_t)drrw->drr_length,
+ (u_longlong_t)drrw->drr_key.ddk_prop);
+ }
+ (void) ssread(buf, drrw->drr_length, &zc);
+ total_write_size += drrw->drr_length;
+ break;
+
+ case DRR_WRITE_BYREF:
+ if (do_byteswap) {
+ drrwbr->drr_object =
+ BSWAP_64(drrwbr->drr_object);
+ drrwbr->drr_offset =
+ BSWAP_64(drrwbr->drr_offset);
+ drrwbr->drr_length =
+ BSWAP_64(drrwbr->drr_length);
+ drrwbr->drr_toguid =
+ BSWAP_64(drrwbr->drr_toguid);
+ drrwbr->drr_refguid =
+ BSWAP_64(drrwbr->drr_refguid);
+ drrwbr->drr_refobject =
+ BSWAP_64(drrwbr->drr_refobject);
+ drrwbr->drr_refoffset =
+ BSWAP_64(drrwbr->drr_refoffset);
+ drrwbr->drr_key.ddk_prop =
+ BSWAP_64(drrwbr->drr_key.ddk_prop);
+ }
+ if (verbose) {
+ (void) printf("WRITE_BYREF object = %llu "
+ "checksum type = %u props = %llx\n"
+ "offset = %llu length = %llu\n"
+ "toguid = %llx refguid = %llx\n"
+ "refobject = %llu refoffset = %llu\n",
+ (u_longlong_t)drrwbr->drr_object,
+ drrwbr->drr_checksumtype,
+ (u_longlong_t)drrwbr->drr_key.ddk_prop,
+ (u_longlong_t)drrwbr->drr_offset,
+ (u_longlong_t)drrwbr->drr_length,
+ (u_longlong_t)drrwbr->drr_toguid,
+ (u_longlong_t)drrwbr->drr_refguid,
+ (u_longlong_t)drrwbr->drr_refobject,
+ (u_longlong_t)drrwbr->drr_refoffset);
+ }
+ break;
+
+ case DRR_FREE:
+ if (do_byteswap) {
+ drrf->drr_object = BSWAP_64(drrf->drr_object);
+ drrf->drr_offset = BSWAP_64(drrf->drr_offset);
+ drrf->drr_length = BSWAP_64(drrf->drr_length);
+ }
+ if (verbose) {
+ (void) printf("FREE object = %llu "
+ "offset = %llu length = %lld\n",
+ (u_longlong_t)drrf->drr_object,
+ (u_longlong_t)drrf->drr_offset,
+ (longlong_t)drrf->drr_length);
+ }
+ break;
+ case DRR_SPILL:
+ if (do_byteswap) {
+ drrs->drr_object = BSWAP_64(drrs->drr_object);
+ drrs->drr_length = BSWAP_64(drrs->drr_length);
+ }
+ if (verbose) {
+ (void) printf("SPILL block for object = %llu "
+ "length = %llu\n", drrs->drr_object,
+ drrs->drr_length);
+ }
+ (void) ssread(buf, drrs->drr_length, &zc);
+ break;
+ }
+ pcksum = zc;
+ }
+ free(buf);
+
+ /* Print final summary */
+
+ (void) printf("SUMMARY:\n");
+ (void) printf("\tTotal DRR_BEGIN records = %lld\n",
+ (u_longlong_t)drr_record_count[DRR_BEGIN]);
+ (void) printf("\tTotal DRR_END records = %lld\n",
+ (u_longlong_t)drr_record_count[DRR_END]);
+ (void) printf("\tTotal DRR_OBJECT records = %lld\n",
+ (u_longlong_t)drr_record_count[DRR_OBJECT]);
+ (void) printf("\tTotal DRR_FREEOBJECTS records = %lld\n",
+ (u_longlong_t)drr_record_count[DRR_FREEOBJECTS]);
+ (void) printf("\tTotal DRR_WRITE records = %lld\n",
+ (u_longlong_t)drr_record_count[DRR_WRITE]);
+ (void) printf("\tTotal DRR_FREE records = %lld\n",
+ (u_longlong_t)drr_record_count[DRR_FREE]);
+ (void) printf("\tTotal DRR_SPILL records = %lld\n",
+ (u_longlong_t)drr_record_count[DRR_SPILL]);
+ (void) printf("\tTotal records = %lld\n",
+ (u_longlong_t)(drr_record_count[DRR_BEGIN] +
+ drr_record_count[DRR_OBJECT] +
+ drr_record_count[DRR_FREEOBJECTS] +
+ drr_record_count[DRR_WRITE] +
+ drr_record_count[DRR_FREE] +
+ drr_record_count[DRR_SPILL] +
+ drr_record_count[DRR_END]));
+ (void) printf("\tTotal write size = %lld (0x%llx)\n",
+ (u_longlong_t)total_write_size, (u_longlong_t)total_write_size);
+ (void) printf("\tTotal stream length = %lld (0x%llx)\n",
+ (u_longlong_t)total_stream_len, (u_longlong_t)total_stream_len);
+ return (0);
+}
diff --git a/cddl/contrib/opensolaris/cmd/ztest/ztest.c b/cddl/contrib/opensolaris/cmd/ztest/ztest.c
index 3894f6b..d350230 100644
--- a/cddl/contrib/opensolaris/cmd/ztest/ztest.c
+++ b/cddl/contrib/opensolaris/cmd/ztest/ztest.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -86,14 +85,16 @@
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/zio.h>
-#include <sys/zio_checksum.h>
-#include <sys/zio_compress.h>
#include <sys/zil.h>
+#include <sys/zil_impl.h>
#include <sys/vdev_impl.h>
#include <sys/vdev_file.h>
#include <sys/spa_impl.h>
+#include <sys/metaslab_impl.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_dataset.h>
+#include <sys/dsl_scan.h>
+#include <sys/zio_checksum.h>
#include <sys/refcount.h>
#include <stdio.h>
#include <stdio_ext.h>
@@ -106,6 +107,7 @@
#include <math.h>
#include <errno.h>
#include <sys/fs/zfs.h>
+#include <libnvpair.h>
static char cmdname[] = "ztest";
static char *zopt_pool = cmdname;
@@ -126,144 +128,231 @@ static int zopt_verbose = 0;
static int zopt_init = 1;
static char *zopt_dir = "/tmp";
static uint64_t zopt_time = 300; /* 5 minutes */
-static int zopt_maxfaults;
+static uint64_t zopt_maxloops = 50; /* max loops during spa_freeze() */
+
+#define BT_MAGIC 0x123456789abcdefULL
+#define MAXFAULTS() (MAX(zs->zs_mirrors, 1) * (zopt_raidz_parity + 1) - 1)
+
+enum ztest_io_type {
+ ZTEST_IO_WRITE_TAG,
+ ZTEST_IO_WRITE_PATTERN,
+ ZTEST_IO_WRITE_ZEROES,
+ ZTEST_IO_TRUNCATE,
+ ZTEST_IO_SETATTR,
+ ZTEST_IO_TYPES
+};
typedef struct ztest_block_tag {
+ uint64_t bt_magic;
uint64_t bt_objset;
uint64_t bt_object;
uint64_t bt_offset;
+ uint64_t bt_gen;
uint64_t bt_txg;
- uint64_t bt_thread;
- uint64_t bt_seq;
+ uint64_t bt_crtxg;
} ztest_block_tag_t;
-typedef struct ztest_args {
- char za_pool[MAXNAMELEN];
- spa_t *za_spa;
- objset_t *za_os;
- zilog_t *za_zilog;
- thread_t za_thread;
- uint64_t za_instance;
- uint64_t za_random;
- uint64_t za_diroff;
- uint64_t za_diroff_shared;
- uint64_t za_zil_seq;
- hrtime_t za_start;
- hrtime_t za_stop;
- hrtime_t za_kill;
- /*
- * Thread-local variables can go here to aid debugging.
- */
- ztest_block_tag_t za_rbt;
- ztest_block_tag_t za_wbt;
- dmu_object_info_t za_doi;
- dmu_buf_t *za_dbuf;
-} ztest_args_t;
-
-typedef void ztest_func_t(ztest_args_t *);
+typedef struct bufwad {
+ uint64_t bw_index;
+ uint64_t bw_txg;
+ uint64_t bw_data;
+} bufwad_t;
+
+/*
+ * XXX -- fix zfs range locks to be generic so we can use them here.
+ */
+typedef enum {
+ RL_READER,
+ RL_WRITER,
+ RL_APPEND
+} rl_type_t;
+
+typedef struct rll {
+ void *rll_writer;
+ int rll_readers;
+ mutex_t rll_lock;
+ cond_t rll_cv;
+} rll_t;
+
+typedef struct rl {
+ uint64_t rl_object;
+ uint64_t rl_offset;
+ uint64_t rl_size;
+ rll_t *rl_lock;
+} rl_t;
+
+#define ZTEST_RANGE_LOCKS 64
+#define ZTEST_OBJECT_LOCKS 64
+
+/*
+ * Object descriptor. Used as a template for object lookup/create/remove.
+ */
+typedef struct ztest_od {
+ uint64_t od_dir;
+ uint64_t od_object;
+ dmu_object_type_t od_type;
+ dmu_object_type_t od_crtype;
+ uint64_t od_blocksize;
+ uint64_t od_crblocksize;
+ uint64_t od_gen;
+ uint64_t od_crgen;
+ char od_name[MAXNAMELEN];
+} ztest_od_t;
+
+/*
+ * Per-dataset state.
+ */
+typedef struct ztest_ds {
+ objset_t *zd_os;
+ zilog_t *zd_zilog;
+ uint64_t zd_seq;
+ ztest_od_t *zd_od; /* debugging aid */
+ char zd_name[MAXNAMELEN];
+ mutex_t zd_dirobj_lock;
+ rll_t zd_object_lock[ZTEST_OBJECT_LOCKS];
+ rll_t zd_range_lock[ZTEST_RANGE_LOCKS];
+} ztest_ds_t;
+
+/*
+ * Per-iteration state.
+ */
+typedef void ztest_func_t(ztest_ds_t *zd, uint64_t id);
+
+typedef struct ztest_info {
+ ztest_func_t *zi_func; /* test function */
+ uint64_t zi_iters; /* iterations per execution */
+ uint64_t *zi_interval; /* execute every <interval> seconds */
+ uint64_t zi_call_count; /* per-pass count */
+ uint64_t zi_call_time; /* per-pass time */
+ uint64_t zi_call_next; /* next time to call this function */
+} ztest_info_t;
/*
* Note: these aren't static because we want dladdr() to work.
*/
ztest_func_t ztest_dmu_read_write;
-ztest_func_t ztest_dmu_read_write_zcopy;
ztest_func_t ztest_dmu_write_parallel;
ztest_func_t ztest_dmu_object_alloc_free;
+ztest_func_t ztest_dmu_commit_callbacks;
ztest_func_t ztest_zap;
-ztest_func_t ztest_fzap;
ztest_func_t ztest_zap_parallel;
-ztest_func_t ztest_traverse;
-ztest_func_t ztest_dsl_prop_get_set;
+ztest_func_t ztest_zil_commit;
+ztest_func_t ztest_dmu_read_write_zcopy;
ztest_func_t ztest_dmu_objset_create_destroy;
+ztest_func_t ztest_dmu_prealloc;
+ztest_func_t ztest_fzap;
ztest_func_t ztest_dmu_snapshot_create_destroy;
-ztest_func_t ztest_dsl_dataset_promote_busy;
+ztest_func_t ztest_dsl_prop_get_set;
+ztest_func_t ztest_spa_prop_get_set;
ztest_func_t ztest_spa_create_destroy;
ztest_func_t ztest_fault_inject;
+ztest_func_t ztest_ddt_repair;
+ztest_func_t ztest_dmu_snapshot_hold;
ztest_func_t ztest_spa_rename;
+ztest_func_t ztest_scrub;
+ztest_func_t ztest_dsl_dataset_promote_busy;
ztest_func_t ztest_vdev_attach_detach;
ztest_func_t ztest_vdev_LUN_growth;
ztest_func_t ztest_vdev_add_remove;
ztest_func_t ztest_vdev_aux_add_remove;
-ztest_func_t ztest_scrub;
+ztest_func_t ztest_split_pool;
-typedef struct ztest_info {
- ztest_func_t *zi_func; /* test function */
- uint64_t zi_iters; /* iterations per execution */
- uint64_t *zi_interval; /* execute every <interval> seconds */
- uint64_t zi_calls; /* per-pass count */
- uint64_t zi_call_time; /* per-pass time */
- uint64_t zi_call_total; /* cumulative total */
- uint64_t zi_call_target; /* target cumulative total */
-} ztest_info_t;
-
-uint64_t zopt_always = 0; /* all the time */
-uint64_t zopt_often = 1; /* every second */
-uint64_t zopt_sometimes = 10; /* every 10 seconds */
-uint64_t zopt_rarely = 60; /* every 60 seconds */
+uint64_t zopt_always = 0ULL * NANOSEC; /* all the time */
+uint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */
+uint64_t zopt_often = 1ULL * NANOSEC; /* every second */
+uint64_t zopt_sometimes = 10ULL * NANOSEC; /* every 10 seconds */
+uint64_t zopt_rarely = 60ULL * NANOSEC; /* every 60 seconds */
ztest_info_t ztest_info[] = {
{ ztest_dmu_read_write, 1, &zopt_always },
- { ztest_dmu_read_write_zcopy, 1, &zopt_always },
- { ztest_dmu_write_parallel, 30, &zopt_always },
+ { ztest_dmu_write_parallel, 10, &zopt_always },
{ ztest_dmu_object_alloc_free, 1, &zopt_always },
+ { ztest_dmu_commit_callbacks, 1, &zopt_always },
{ ztest_zap, 30, &zopt_always },
- { ztest_fzap, 30, &zopt_always },
{ ztest_zap_parallel, 100, &zopt_always },
- { ztest_dsl_prop_get_set, 1, &zopt_sometimes },
- { ztest_dmu_objset_create_destroy, 1, &zopt_sometimes },
- { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes },
- { ztest_spa_create_destroy, 1, &zopt_sometimes },
+ { ztest_split_pool, 1, &zopt_always },
+ { ztest_zil_commit, 1, &zopt_incessant },
+ { ztest_dmu_read_write_zcopy, 1, &zopt_often },
+ { ztest_dmu_objset_create_destroy, 1, &zopt_often },
+ { ztest_dsl_prop_get_set, 1, &zopt_often },
+ { ztest_spa_prop_get_set, 1, &zopt_sometimes },
+#if 0
+ { ztest_dmu_prealloc, 1, &zopt_sometimes },
+#endif
+ { ztest_fzap, 1, &zopt_sometimes },
+ { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes },
+ { ztest_spa_create_destroy, 1, &zopt_sometimes },
{ ztest_fault_inject, 1, &zopt_sometimes },
+ { ztest_ddt_repair, 1, &zopt_sometimes },
+ { ztest_dmu_snapshot_hold, 1, &zopt_sometimes },
{ ztest_spa_rename, 1, &zopt_rarely },
- { ztest_vdev_attach_detach, 1, &zopt_rarely },
- { ztest_vdev_LUN_growth, 1, &zopt_rarely },
+ { ztest_scrub, 1, &zopt_rarely },
{ ztest_dsl_dataset_promote_busy, 1, &zopt_rarely },
- { ztest_vdev_add_remove, 1, &zopt_vdevtime },
+ { ztest_vdev_attach_detach, 1, &zopt_rarely },
+ { ztest_vdev_LUN_growth, 1, &zopt_rarely },
+ { ztest_vdev_add_remove, 1, &zopt_vdevtime },
{ ztest_vdev_aux_add_remove, 1, &zopt_vdevtime },
- { ztest_scrub, 1, &zopt_vdevtime },
};
#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t))
-#define ZTEST_SYNC_LOCKS 16
+/*
+ * The following struct is used to hold a list of uncalled commit callbacks.
+ * The callbacks are ordered by txg number.
+ */
+typedef struct ztest_cb_list {
+ mutex_t zcl_callbacks_lock;
+ list_t zcl_callbacks;
+} ztest_cb_list_t;
/*
* Stuff we need to share writably between parent and child.
*/
typedef struct ztest_shared {
- mutex_t zs_vdev_lock;
- rwlock_t zs_name_lock;
- uint64_t zs_vdev_primaries;
- uint64_t zs_vdev_aux;
+ char *zs_pool;
+ spa_t *zs_spa;
+ hrtime_t zs_proc_start;
+ hrtime_t zs_proc_stop;
+ hrtime_t zs_thread_start;
+ hrtime_t zs_thread_stop;
+ hrtime_t zs_thread_kill;
uint64_t zs_enospc_count;
- hrtime_t zs_start_time;
- hrtime_t zs_stop_time;
+ uint64_t zs_vdev_next_leaf;
+ uint64_t zs_vdev_aux;
uint64_t zs_alloc;
uint64_t zs_space;
+ mutex_t zs_vdev_lock;
+ rwlock_t zs_name_lock;
ztest_info_t zs_info[ZTEST_FUNCS];
- mutex_t zs_sync_lock[ZTEST_SYNC_LOCKS];
- uint64_t zs_seq[ZTEST_SYNC_LOCKS];
+ uint64_t zs_splits;
+ uint64_t zs_mirrors;
+ ztest_ds_t zs_zd[];
} ztest_shared_t;
+#define ID_PARALLEL -1ULL
+
static char ztest_dev_template[] = "%s/%s.%llua";
static char ztest_aux_template[] = "%s/%s.%s.%llu";
-static ztest_shared_t *ztest_shared;
+ztest_shared_t *ztest_shared;
+uint64_t *ztest_seq;
static int ztest_random_fd;
static int ztest_dump_core = 1;
-static uint64_t metaslab_sz;
static boolean_t ztest_exiting;
+/* Global commit callback list */
+static ztest_cb_list_t zcl;
+
extern uint64_t metaslab_gang_bang;
extern uint64_t metaslab_df_alloc_threshold;
+static uint64_t metaslab_sz;
-#define ZTEST_DIROBJ 1
-#define ZTEST_MICROZAP_OBJ 2
-#define ZTEST_FATZAP_OBJ 3
-
-#define ZTEST_DIROBJ_BLOCKSIZE (1 << 10)
-#define ZTEST_DIRSIZE 256
+enum ztest_object {
+ ZTEST_META_DNODE = 0,
+ ZTEST_DIROBJ,
+ ZTEST_OBJECTS
+};
static void usage(boolean_t) __NORETURN;
@@ -381,21 +470,22 @@ usage(boolean_t requested)
(void) fprintf(fp, "Usage: %s\n"
"\t[-v vdevs (default: %llu)]\n"
"\t[-s size_of_each_vdev (default: %s)]\n"
- "\t[-a alignment_shift (default: %d) (use 0 for random)]\n"
+ "\t[-a alignment_shift (default: %d)] use 0 for random\n"
"\t[-m mirror_copies (default: %d)]\n"
"\t[-r raidz_disks (default: %d)]\n"
"\t[-R raidz_parity (default: %d)]\n"
"\t[-d datasets (default: %d)]\n"
"\t[-t threads (default: %d)]\n"
"\t[-g gang_block_threshold (default: %s)]\n"
- "\t[-i initialize pool i times (default: %d)]\n"
- "\t[-k kill percentage (default: %llu%%)]\n"
+ "\t[-i init_count (default: %d)] initialize pool i times\n"
+ "\t[-k kill_percentage (default: %llu%%)]\n"
"\t[-p pool_name (default: %s)]\n"
- "\t[-f file directory for vdev files (default: %s)]\n"
- "\t[-V(erbose)] (use multiple times for ever more blather)\n"
- "\t[-E(xisting)] (use existing pool instead of creating new one)\n"
- "\t[-T time] total run time (default: %llu sec)\n"
- "\t[-P passtime] time per pass (default: %llu sec)\n"
+ "\t[-f dir (default: %s)] file directory for vdev files\n"
+ "\t[-V] verbose (use multiple times for ever more blather)\n"
+ "\t[-E] use existing pool instead of creating new one\n"
+ "\t[-T time (default: %llu sec)] total run time\n"
+ "\t[-F freezeloops (default: %llu)] max loops in spa_freeze()\n"
+ "\t[-P passtime (default: %llu sec)] time per pass\n"
"\t[-h] (print help)\n"
"",
cmdname,
@@ -413,31 +503,11 @@ usage(boolean_t requested)
zopt_pool, /* -p */
zopt_dir, /* -f */
(u_longlong_t)zopt_time, /* -T */
+ (u_longlong_t)zopt_maxloops, /* -F */
(u_longlong_t)zopt_passtime); /* -P */
exit(requested ? 0 : 1);
}
-static uint64_t
-ztest_random(uint64_t range)
-{
- uint64_t r;
-
- if (range == 0)
- return (0);
-
- if (read(ztest_random_fd, &r, sizeof (r)) != sizeof (r))
- fatal(1, "short read from /dev/urandom");
-
- return (r % range);
-}
-
-/* ARGSUSED */
-static void
-ztest_record_enospc(char *s)
-{
- ztest_shared->zs_enospc_count++;
-}
-
static void
process_options(int argc, char **argv)
{
@@ -451,7 +521,7 @@ process_options(int argc, char **argv)
metaslab_gang_bang = 32 << 10;
while ((opt = getopt(argc, argv,
- "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:h")) != EOF) {
+ "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:hF:")) != EOF) {
value = 0;
switch (opt) {
case 'v':
@@ -467,6 +537,7 @@ process_options(int argc, char **argv)
case 'k':
case 'T':
case 'P':
+ case 'F':
value = nicenumtoull(optarg);
}
switch (opt) {
@@ -486,7 +557,7 @@ process_options(int argc, char **argv)
zopt_raidz = MAX(1, value);
break;
case 'R':
- zopt_raidz_parity = MIN(MAX(value, 1), 2);
+ zopt_raidz_parity = MIN(MAX(value, 1), 3);
break;
case 'd':
zopt_datasets = MAX(1, value);
@@ -521,6 +592,9 @@ process_options(int argc, char **argv)
case 'P':
zopt_passtime = MAX(1, value);
break;
+ case 'F':
+ zopt_maxloops = MAX(1, value);
+ break;
case 'h':
usage(B_TRUE);
break;
@@ -533,8 +607,37 @@ process_options(int argc, char **argv)
zopt_raidz_parity = MIN(zopt_raidz_parity, zopt_raidz - 1);
- zopt_vdevtime = (zopt_vdevs > 0 ? zopt_time / zopt_vdevs : UINT64_MAX);
- zopt_maxfaults = MAX(zopt_mirrors, 1) * (zopt_raidz_parity + 1) - 1;
+ zopt_vdevtime = (zopt_vdevs > 0 ? zopt_time * NANOSEC / zopt_vdevs :
+ UINT64_MAX >> 2);
+}
+
+static void
+ztest_kill(ztest_shared_t *zs)
+{
+ zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(zs->zs_spa));
+ zs->zs_space = metaslab_class_get_space(spa_normal_class(zs->zs_spa));
+ (void) kill(getpid(), SIGKILL);
+}
+
+static uint64_t
+ztest_random(uint64_t range)
+{
+ uint64_t r;
+
+ if (range == 0)
+ return (0);
+
+ if (read(ztest_random_fd, &r, sizeof (r)) != sizeof (r))
+ fatal(1, "short read from /dev/urandom");
+
+ return (r % range);
+}
+
+/* ARGSUSED */
+static void
+ztest_record_enospc(const char *s)
+{
+ ztest_shared->zs_enospc_count++;
}
static uint64_t
@@ -563,7 +666,7 @@ make_vdev_file(char *path, char *aux, size_t size, uint64_t ashift)
(void) sprintf(path, ztest_aux_template,
zopt_dir, zopt_pool, aux, vdev);
} else {
- vdev = ztest_shared->zs_vdev_primaries++;
+ vdev = ztest_shared->zs_vdev_next_leaf++;
(void) sprintf(path, ztest_dev_template,
zopt_dir, zopt_pool, vdev);
}
@@ -674,100 +777,807 @@ make_vdev_root(char *path, char *aux, size_t size, uint64_t ashift,
return (root);
}
+static int
+ztest_random_blocksize(void)
+{
+ return (1 << (SPA_MINBLOCKSHIFT +
+ ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)));
+}
+
+static int
+ztest_random_ibshift(void)
+{
+ return (DN_MIN_INDBLKSHIFT +
+ ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1));
+}
+
+static uint64_t
+ztest_random_vdev_top(spa_t *spa, boolean_t log_ok)
+{
+ uint64_t top;
+ vdev_t *rvd = spa->spa_root_vdev;
+ vdev_t *tvd;
+
+ ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
+
+ do {
+ top = ztest_random(rvd->vdev_children);
+ tvd = rvd->vdev_child[top];
+ } while (tvd->vdev_ishole || (tvd->vdev_islog && !log_ok) ||
+ tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL);
+
+ return (top);
+}
+
+static uint64_t
+ztest_random_dsl_prop(zfs_prop_t prop)
+{
+ uint64_t value;
+
+ do {
+ value = zfs_prop_random_value(prop, ztest_random(-1ULL));
+ } while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF);
+
+ return (value);
+}
+
+static int
+ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value,
+ boolean_t inherit)
+{
+ const char *propname = zfs_prop_to_name(prop);
+ const char *valname;
+ char setpoint[MAXPATHLEN];
+ uint64_t curval;
+ int error;
+
+ error = dsl_prop_set(osname, propname,
+ (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL),
+ sizeof (value), 1, &value);
+
+ if (error == ENOSPC) {
+ ztest_record_enospc(FTAG);
+ return (error);
+ }
+ ASSERT3U(error, ==, 0);
+
+ VERIFY3U(dsl_prop_get(osname, propname, sizeof (curval),
+ 1, &curval, setpoint), ==, 0);
+
+ if (zopt_verbose >= 6) {
+ VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0);
+ (void) printf("%s %s = %s at '%s'\n",
+ osname, propname, valname, setpoint);
+ }
+
+ return (error);
+}
+
+static int
+ztest_spa_prop_set_uint64(ztest_shared_t *zs, zpool_prop_t prop, uint64_t value)
+{
+ spa_t *spa = zs->zs_spa;
+ nvlist_t *props = NULL;
+ int error;
+
+ VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
+ VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(prop), value) == 0);
+
+ error = spa_prop_set(spa, props);
+
+ nvlist_free(props);
+
+ if (error == ENOSPC) {
+ ztest_record_enospc(FTAG);
+ return (error);
+ }
+ ASSERT3U(error, ==, 0);
+
+ return (error);
+}
+
+static void
+ztest_rll_init(rll_t *rll)
+{
+ rll->rll_writer = NULL;
+ rll->rll_readers = 0;
+ VERIFY(_mutex_init(&rll->rll_lock, USYNC_THREAD, NULL) == 0);
+ VERIFY(cond_init(&rll->rll_cv, USYNC_THREAD, NULL) == 0);
+}
+
static void
-ztest_set_random_blocksize(objset_t *os, uint64_t object, dmu_tx_t *tx)
+ztest_rll_destroy(rll_t *rll)
+{
+ ASSERT(rll->rll_writer == NULL);
+ ASSERT(rll->rll_readers == 0);
+ VERIFY(_mutex_destroy(&rll->rll_lock) == 0);
+ VERIFY(cond_destroy(&rll->rll_cv) == 0);
+}
+
+static void
+ztest_rll_lock(rll_t *rll, rl_type_t type)
+{
+ VERIFY(mutex_lock(&rll->rll_lock) == 0);
+
+ if (type == RL_READER) {
+ while (rll->rll_writer != NULL)
+ (void) cond_wait(&rll->rll_cv, &rll->rll_lock);
+ rll->rll_readers++;
+ } else {
+ while (rll->rll_writer != NULL || rll->rll_readers)
+ (void) cond_wait(&rll->rll_cv, &rll->rll_lock);
+ rll->rll_writer = curthread;
+ }
+
+ VERIFY(mutex_unlock(&rll->rll_lock) == 0);
+}
+
+static void
+ztest_rll_unlock(rll_t *rll)
+{
+ VERIFY(mutex_lock(&rll->rll_lock) == 0);
+
+ if (rll->rll_writer) {
+ ASSERT(rll->rll_readers == 0);
+ rll->rll_writer = NULL;
+ } else {
+ ASSERT(rll->rll_readers != 0);
+ ASSERT(rll->rll_writer == NULL);
+ rll->rll_readers--;
+ }
+
+ if (rll->rll_writer == NULL && rll->rll_readers == 0)
+ VERIFY(cond_broadcast(&rll->rll_cv) == 0);
+
+ VERIFY(mutex_unlock(&rll->rll_lock) == 0);
+}
+
+static void
+ztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type)
+{
+ rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
+
+ ztest_rll_lock(rll, type);
+}
+
+static void
+ztest_object_unlock(ztest_ds_t *zd, uint64_t object)
+{
+ rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
+
+ ztest_rll_unlock(rll);
+}
+
+static rl_t *
+ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset,
+ uint64_t size, rl_type_t type)
+{
+ uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1));
+ rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)];
+ rl_t *rl;
+
+ rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL);
+ rl->rl_object = object;
+ rl->rl_offset = offset;
+ rl->rl_size = size;
+ rl->rl_lock = rll;
+
+ ztest_rll_lock(rll, type);
+
+ return (rl);
+}
+
+static void
+ztest_range_unlock(rl_t *rl)
+{
+ rll_t *rll = rl->rl_lock;
+
+ ztest_rll_unlock(rll);
+
+ umem_free(rl, sizeof (*rl));
+}
+
+static void
+ztest_zd_init(ztest_ds_t *zd, objset_t *os)
+{
+ zd->zd_os = os;
+ zd->zd_zilog = dmu_objset_zil(os);
+ zd->zd_seq = 0;
+ dmu_objset_name(os, zd->zd_name);
+
+ VERIFY(_mutex_init(&zd->zd_dirobj_lock, USYNC_THREAD, NULL) == 0);
+
+ for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
+ ztest_rll_init(&zd->zd_object_lock[l]);
+
+ for (int l = 0; l < ZTEST_RANGE_LOCKS; l++)
+ ztest_rll_init(&zd->zd_range_lock[l]);
+}
+
+static void
+ztest_zd_fini(ztest_ds_t *zd)
+{
+ VERIFY(_mutex_destroy(&zd->zd_dirobj_lock) == 0);
+
+ for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
+ ztest_rll_destroy(&zd->zd_object_lock[l]);
+
+ for (int l = 0; l < ZTEST_RANGE_LOCKS; l++)
+ ztest_rll_destroy(&zd->zd_range_lock[l]);
+}
+
+#define TXG_MIGHTWAIT (ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT)
+
+static uint64_t
+ztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag)
{
- int bs = SPA_MINBLOCKSHIFT +
- ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1);
- int ibs = DN_MIN_INDBLKSHIFT +
- ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1);
+ uint64_t txg;
int error;
- error = dmu_object_set_blocksize(os, object, 1ULL << bs, ibs, tx);
+ /*
+ * Attempt to assign tx to some transaction group.
+ */
+ error = dmu_tx_assign(tx, txg_how);
if (error) {
- char osname[300];
- dmu_objset_name(os, osname);
- fatal(0, "dmu_object_set_blocksize('%s', %llu, %d, %d) = %d",
- osname, object, 1 << bs, ibs, error);
+ if (error == ERESTART) {
+ ASSERT(txg_how == TXG_NOWAIT);
+ dmu_tx_wait(tx);
+ } else {
+ ASSERT3U(error, ==, ENOSPC);
+ ztest_record_enospc(tag);
+ }
+ dmu_tx_abort(tx);
+ return (0);
}
+ txg = dmu_tx_get_txg(tx);
+ ASSERT(txg != 0);
+ return (txg);
+}
+
+static void
+ztest_pattern_set(void *buf, uint64_t size, uint64_t value)
+{
+ uint64_t *ip = buf;
+ uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
+
+ while (ip < ip_end)
+ *ip++ = value;
}
-static uint8_t
-ztest_random_checksum(void)
+static boolean_t
+ztest_pattern_match(void *buf, uint64_t size, uint64_t value)
{
- uint8_t checksum;
+ uint64_t *ip = buf;
+ uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
+ uint64_t diff = 0;
- do {
- checksum = ztest_random(ZIO_CHECKSUM_FUNCTIONS);
- } while (zio_checksum_table[checksum].ci_zbt);
+ while (ip < ip_end)
+ diff |= (value - *ip++);
+
+ return (diff == 0);
+}
+
+static void
+ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
+ uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
+{
+ bt->bt_magic = BT_MAGIC;
+ bt->bt_objset = dmu_objset_id(os);
+ bt->bt_object = object;
+ bt->bt_offset = offset;
+ bt->bt_gen = gen;
+ bt->bt_txg = txg;
+ bt->bt_crtxg = crtxg;
+}
+
+static void
+ztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
+ uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
+{
+ ASSERT(bt->bt_magic == BT_MAGIC);
+ ASSERT(bt->bt_objset == dmu_objset_id(os));
+ ASSERT(bt->bt_object == object);
+ ASSERT(bt->bt_offset == offset);
+ ASSERT(bt->bt_gen <= gen);
+ ASSERT(bt->bt_txg <= txg);
+ ASSERT(bt->bt_crtxg == crtxg);
+}
+
+static ztest_block_tag_t *
+ztest_bt_bonus(dmu_buf_t *db)
+{
+ dmu_object_info_t doi;
+ ztest_block_tag_t *bt;
+
+ dmu_object_info_from_db(db, &doi);
+ ASSERT3U(doi.doi_bonus_size, <=, db->db_size);
+ ASSERT3U(doi.doi_bonus_size, >=, sizeof (*bt));
+ bt = (void *)((char *)db->db_data + doi.doi_bonus_size - sizeof (*bt));
+
+ return (bt);
+}
+
+/*
+ * ZIL logging ops
+ */
+
+#define lrz_type lr_mode
+#define lrz_blocksize lr_uid
+#define lrz_ibshift lr_gid
+#define lrz_bonustype lr_rdev
+#define lrz_bonuslen lr_crtime[1]
+
+static void
+ztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr)
+{
+ char *name = (void *)(lr + 1); /* name follows lr */
+ size_t namesize = strlen(name) + 1;
+ itx_t *itx;
+
+ if (zil_replaying(zd->zd_zilog, tx))
+ return;
+
+ itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize);
+ bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
+ sizeof (*lr) + namesize - sizeof (lr_t));
+
+ zil_itx_assign(zd->zd_zilog, itx, tx);
+}
+
+static void
+ztest_log_remove(ztest_ds_t *zd, dmu_tx_t *tx, lr_remove_t *lr, uint64_t object)
+{
+ char *name = (void *)(lr + 1); /* name follows lr */
+ size_t namesize = strlen(name) + 1;
+ itx_t *itx;
+
+ if (zil_replaying(zd->zd_zilog, tx))
+ return;
+
+ itx = zil_itx_create(TX_REMOVE, sizeof (*lr) + namesize);
+ bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
+ sizeof (*lr) + namesize - sizeof (lr_t));
- if (checksum == ZIO_CHECKSUM_OFF)
- checksum = ZIO_CHECKSUM_ON;
+ itx->itx_oid = object;
+ zil_itx_assign(zd->zd_zilog, itx, tx);
+}
+
+static void
+ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr)
+{
+ itx_t *itx;
+ itx_wr_state_t write_state = ztest_random(WR_NUM_STATES);
+
+ if (zil_replaying(zd->zd_zilog, tx))
+ return;
+
+ if (lr->lr_length > ZIL_MAX_LOG_DATA)
+ write_state = WR_INDIRECT;
+
+ itx = zil_itx_create(TX_WRITE,
+ sizeof (*lr) + (write_state == WR_COPIED ? lr->lr_length : 0));
+
+ if (write_state == WR_COPIED &&
+ dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length,
+ ((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) {
+ zil_itx_destroy(itx);
+ itx = zil_itx_create(TX_WRITE, sizeof (*lr));
+ write_state = WR_NEED_COPY;
+ }
+ itx->itx_private = zd;
+ itx->itx_wr_state = write_state;
+ itx->itx_sync = (ztest_random(8) == 0);
+ itx->itx_sod += (write_state == WR_NEED_COPY ? lr->lr_length : 0);
+
+ bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
+ sizeof (*lr) - sizeof (lr_t));
+
+ zil_itx_assign(zd->zd_zilog, itx, tx);
+}
- return (checksum);
+static void
+ztest_log_truncate(ztest_ds_t *zd, dmu_tx_t *tx, lr_truncate_t *lr)
+{
+ itx_t *itx;
+
+ if (zil_replaying(zd->zd_zilog, tx))
+ return;
+
+ itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr));
+ bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
+ sizeof (*lr) - sizeof (lr_t));
+
+ itx->itx_sync = B_FALSE;
+ zil_itx_assign(zd->zd_zilog, itx, tx);
}
-static uint8_t
-ztest_random_compress(void)
+static void
+ztest_log_setattr(ztest_ds_t *zd, dmu_tx_t *tx, lr_setattr_t *lr)
{
- return ((uint8_t)ztest_random(ZIO_COMPRESS_FUNCTIONS));
+ itx_t *itx;
+
+ if (zil_replaying(zd->zd_zilog, tx))
+ return;
+
+ itx = zil_itx_create(TX_SETATTR, sizeof (*lr));
+ bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
+ sizeof (*lr) - sizeof (lr_t));
+
+ itx->itx_sync = B_FALSE;
+ zil_itx_assign(zd->zd_zilog, itx, tx);
}
+/*
+ * ZIL replay ops
+ */
static int
-ztest_replay_create(objset_t *os, lr_create_t *lr, boolean_t byteswap)
+ztest_replay_create(ztest_ds_t *zd, lr_create_t *lr, boolean_t byteswap)
{
+ char *name = (void *)(lr + 1); /* name follows lr */
+ objset_t *os = zd->zd_os;
+ ztest_block_tag_t *bbt;
+ dmu_buf_t *db;
dmu_tx_t *tx;
- int error;
+ uint64_t txg;
+ int error = 0;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
+ ASSERT(lr->lr_doid == ZTEST_DIROBJ);
+ ASSERT(name[0] != '\0');
+
tx = dmu_tx_create(os);
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- error = dmu_tx_assign(tx, TXG_WAIT);
+
+ dmu_tx_hold_zap(tx, lr->lr_doid, B_TRUE, name);
+
+ if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
+ dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
+ } else {
+ dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
+ }
+
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+ if (txg == 0)
+ return (ENOSPC);
+
+ ASSERT(dmu_objset_zil(os)->zl_replay == !!lr->lr_foid);
+
+ if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
+ if (lr->lr_foid == 0) {
+ lr->lr_foid = zap_create(os,
+ lr->lrz_type, lr->lrz_bonustype,
+ lr->lrz_bonuslen, tx);
+ } else {
+ error = zap_create_claim(os, lr->lr_foid,
+ lr->lrz_type, lr->lrz_bonustype,
+ lr->lrz_bonuslen, tx);
+ }
+ } else {
+ if (lr->lr_foid == 0) {
+ lr->lr_foid = dmu_object_alloc(os,
+ lr->lrz_type, 0, lr->lrz_bonustype,
+ lr->lrz_bonuslen, tx);
+ } else {
+ error = dmu_object_claim(os, lr->lr_foid,
+ lr->lrz_type, 0, lr->lrz_bonustype,
+ lr->lrz_bonuslen, tx);
+ }
+ }
+
if (error) {
- dmu_tx_abort(tx);
+ ASSERT3U(error, ==, EEXIST);
+ ASSERT(zd->zd_zilog->zl_replay);
+ dmu_tx_commit(tx);
return (error);
}
- error = dmu_object_claim(os, lr->lr_doid, lr->lr_mode, 0,
- DMU_OT_NONE, 0, tx);
- ASSERT3U(error, ==, 0);
+ ASSERT(lr->lr_foid != 0);
+
+ if (lr->lrz_type != DMU_OT_ZAP_OTHER)
+ VERIFY3U(0, ==, dmu_object_set_blocksize(os, lr->lr_foid,
+ lr->lrz_blocksize, lr->lrz_ibshift, tx));
+
+ VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
+ bbt = ztest_bt_bonus(db);
+ dmu_buf_will_dirty(db, tx);
+ ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_gen, txg, txg);
+ dmu_buf_rele(db, FTAG);
+
+ VERIFY3U(0, ==, zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1,
+ &lr->lr_foid, tx));
+
+ (void) ztest_log_create(zd, tx, lr);
+
dmu_tx_commit(tx);
- if (zopt_verbose >= 5) {
- char osname[MAXNAMELEN];
- dmu_objset_name(os, osname);
- (void) printf("replay create of %s object %llu"
- " in txg %llu = %d\n",
- osname, (u_longlong_t)lr->lr_doid,
- (u_longlong_t)dmu_tx_get_txg(tx), error);
+ return (0);
+}
+
+static int
+ztest_replay_remove(ztest_ds_t *zd, lr_remove_t *lr, boolean_t byteswap)
+{
+ char *name = (void *)(lr + 1); /* name follows lr */
+ objset_t *os = zd->zd_os;
+ dmu_object_info_t doi;
+ dmu_tx_t *tx;
+ uint64_t object, txg;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ ASSERT(lr->lr_doid == ZTEST_DIROBJ);
+ ASSERT(name[0] != '\0');
+
+ VERIFY3U(0, ==,
+ zap_lookup(os, lr->lr_doid, name, sizeof (object), 1, &object));
+ ASSERT(object != 0);
+
+ ztest_object_lock(zd, object, RL_WRITER);
+
+ VERIFY3U(0, ==, dmu_object_info(os, object, &doi));
+
+ tx = dmu_tx_create(os);
+
+ dmu_tx_hold_zap(tx, lr->lr_doid, B_FALSE, name);
+ dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
+
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+ if (txg == 0) {
+ ztest_object_unlock(zd, object);
+ return (ENOSPC);
}
- return (error);
+ if (doi.doi_type == DMU_OT_ZAP_OTHER) {
+ VERIFY3U(0, ==, zap_destroy(os, object, tx));
+ } else {
+ VERIFY3U(0, ==, dmu_object_free(os, object, tx));
+ }
+
+ VERIFY3U(0, ==, zap_remove(os, lr->lr_doid, name, tx));
+
+ (void) ztest_log_remove(zd, tx, lr, object);
+
+ dmu_tx_commit(tx);
+
+ ztest_object_unlock(zd, object);
+
+ return (0);
}
static int
-ztest_replay_remove(objset_t *os, lr_remove_t *lr, boolean_t byteswap)
+ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
{
+ objset_t *os = zd->zd_os;
+ void *data = lr + 1; /* data follows lr */
+ uint64_t offset, length;
+ ztest_block_tag_t *bt = data;
+ ztest_block_tag_t *bbt;
+ uint64_t gen, txg, lrtxg, crtxg;
+ dmu_object_info_t doi;
dmu_tx_t *tx;
- int error;
+ dmu_buf_t *db;
+ arc_buf_t *abuf = NULL;
+ rl_t *rl;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
+ offset = lr->lr_offset;
+ length = lr->lr_length;
+
+ /* If it's a dmu_sync() block, write the whole block */
+ if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
+ uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
+ if (length < blocksize) {
+ offset -= offset % blocksize;
+ length = blocksize;
+ }
+ }
+
+ if (bt->bt_magic == BSWAP_64(BT_MAGIC))
+ byteswap_uint64_array(bt, sizeof (*bt));
+
+ if (bt->bt_magic != BT_MAGIC)
+ bt = NULL;
+
+ ztest_object_lock(zd, lr->lr_foid, RL_READER);
+ rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER);
+
+ VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
+
+ dmu_object_info_from_db(db, &doi);
+
+ bbt = ztest_bt_bonus(db);
+ ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
+ gen = bbt->bt_gen;
+ crtxg = bbt->bt_crtxg;
+ lrtxg = lr->lr_common.lrc_txg;
+
tx = dmu_tx_create(os);
- dmu_tx_hold_free(tx, lr->lr_doid, 0, DMU_OBJECT_END);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- dmu_tx_abort(tx);
- return (error);
+
+ dmu_tx_hold_write(tx, lr->lr_foid, offset, length);
+
+ if (ztest_random(8) == 0 && length == doi.doi_data_block_size &&
+ P2PHASE(offset, length) == 0)
+ abuf = dmu_request_arcbuf(db, length);
+
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+ if (txg == 0) {
+ if (abuf != NULL)
+ dmu_return_arcbuf(abuf);
+ dmu_buf_rele(db, FTAG);
+ ztest_range_unlock(rl);
+ ztest_object_unlock(zd, lr->lr_foid);
+ return (ENOSPC);
+ }
+
+ if (bt != NULL) {
+ /*
+ * Usually, verify the old data before writing new data --
+ * but not always, because we also want to verify correct
+ * behavior when the data was not recently read into cache.
+ */
+ ASSERT(offset % doi.doi_data_block_size == 0);
+ if (ztest_random(4) != 0) {
+ int prefetch = ztest_random(2) ?
+ DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH;
+ ztest_block_tag_t rbt;
+
+ VERIFY(dmu_read(os, lr->lr_foid, offset,
+ sizeof (rbt), &rbt, prefetch) == 0);
+ if (rbt.bt_magic == BT_MAGIC) {
+ ztest_bt_verify(&rbt, os, lr->lr_foid,
+ offset, gen, txg, crtxg);
+ }
+ }
+
+ /*
+ * Writes can appear to be newer than the bonus buffer because
+ * the ztest_get_data() callback does a dmu_read() of the
+ * open-context data, which may be different than the data
+ * as it was when the write was generated.
+ */
+ if (zd->zd_zilog->zl_replay) {
+ ztest_bt_verify(bt, os, lr->lr_foid, offset,
+ MAX(gen, bt->bt_gen), MAX(txg, lrtxg),
+ bt->bt_crtxg);
+ }
+
+ /*
+ * Set the bt's gen/txg to the bonus buffer's gen/txg
+ * so that all of the usual ASSERTs will work.
+ */
+ ztest_bt_generate(bt, os, lr->lr_foid, offset, gen, txg, crtxg);
}
- error = dmu_object_free(os, lr->lr_doid, tx);
+ if (abuf == NULL) {
+ dmu_write(os, lr->lr_foid, offset, length, data, tx);
+ } else {
+ bcopy(data, abuf->b_data, length);
+ dmu_assign_arcbuf(db, offset, abuf, tx);
+ }
+
+ (void) ztest_log_write(zd, tx, lr);
+
+ dmu_buf_rele(db, FTAG);
+
dmu_tx_commit(tx);
- return (error);
+ ztest_range_unlock(rl);
+ ztest_object_unlock(zd, lr->lr_foid);
+
+ return (0);
+}
+
+static int
+ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap)
+{
+ objset_t *os = zd->zd_os;
+ dmu_tx_t *tx;
+ uint64_t txg;
+ rl_t *rl;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ ztest_object_lock(zd, lr->lr_foid, RL_READER);
+ rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length,
+ RL_WRITER);
+
+ tx = dmu_tx_create(os);
+
+ dmu_tx_hold_free(tx, lr->lr_foid, lr->lr_offset, lr->lr_length);
+
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+ if (txg == 0) {
+ ztest_range_unlock(rl);
+ ztest_object_unlock(zd, lr->lr_foid);
+ return (ENOSPC);
+ }
+
+ VERIFY(dmu_free_range(os, lr->lr_foid, lr->lr_offset,
+ lr->lr_length, tx) == 0);
+
+ (void) ztest_log_truncate(zd, tx, lr);
+
+ dmu_tx_commit(tx);
+
+ ztest_range_unlock(rl);
+ ztest_object_unlock(zd, lr->lr_foid);
+
+ return (0);
+}
+
+static int
+ztest_replay_setattr(ztest_ds_t *zd, lr_setattr_t *lr, boolean_t byteswap)
+{
+ objset_t *os = zd->zd_os;
+ dmu_tx_t *tx;
+ dmu_buf_t *db;
+ ztest_block_tag_t *bbt;
+ uint64_t txg, lrtxg, crtxg;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ ztest_object_lock(zd, lr->lr_foid, RL_WRITER);
+
+ VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
+
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_bonus(tx, lr->lr_foid);
+
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+ if (txg == 0) {
+ dmu_buf_rele(db, FTAG);
+ ztest_object_unlock(zd, lr->lr_foid);
+ return (ENOSPC);
+ }
+
+ bbt = ztest_bt_bonus(db);
+ ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
+ crtxg = bbt->bt_crtxg;
+ lrtxg = lr->lr_common.lrc_txg;
+
+ if (zd->zd_zilog->zl_replay) {
+ ASSERT(lr->lr_size != 0);
+ ASSERT(lr->lr_mode != 0);
+ ASSERT(lrtxg != 0);
+ } else {
+ /*
+ * Randomly change the size and increment the generation.
+ */
+ lr->lr_size = (ztest_random(db->db_size / sizeof (*bbt)) + 1) *
+ sizeof (*bbt);
+ lr->lr_mode = bbt->bt_gen + 1;
+ ASSERT(lrtxg == 0);
+ }
+
+ /*
+ * Verify that the current bonus buffer is not newer than our txg.
+ */
+ ztest_bt_verify(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode,
+ MAX(txg, lrtxg), crtxg);
+
+ dmu_buf_will_dirty(db, tx);
+
+ ASSERT3U(lr->lr_size, >=, sizeof (*bbt));
+ ASSERT3U(lr->lr_size, <=, db->db_size);
+ VERIFY3U(dmu_set_bonus(db, lr->lr_size, tx), ==, 0);
+ bbt = ztest_bt_bonus(db);
+
+ ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, txg, crtxg);
+
+ dmu_buf_rele(db, FTAG);
+
+ (void) ztest_log_setattr(zd, tx, lr);
+
+ dmu_tx_commit(tx);
+
+ ztest_object_unlock(zd, lr->lr_foid);
+
+ return (0);
}
zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
@@ -780,9 +1590,9 @@ zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
NULL, /* TX_RMDIR */
NULL, /* TX_LINK */
NULL, /* TX_RENAME */
- NULL, /* TX_WRITE */
- NULL, /* TX_TRUNCATE */
- NULL, /* TX_SETATTR */
+ ztest_replay_write, /* TX_WRITE */
+ ztest_replay_truncate, /* TX_TRUNCATE */
+ ztest_replay_setattr, /* TX_SETATTR */
NULL, /* TX_ACL */
NULL, /* TX_CREATE_ACL */
NULL, /* TX_CREATE_ATTR */
@@ -794,13 +1604,477 @@ zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
};
/*
+ * ZIL get_data callbacks
+ */
+
+static void
+ztest_get_done(zgd_t *zgd, int error)
+{
+ ztest_ds_t *zd = zgd->zgd_private;
+ uint64_t object = zgd->zgd_rl->rl_object;
+
+ if (zgd->zgd_db)
+ dmu_buf_rele(zgd->zgd_db, zgd);
+
+ ztest_range_unlock(zgd->zgd_rl);
+ ztest_object_unlock(zd, object);
+
+ if (error == 0 && zgd->zgd_bp)
+ zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
+
+ umem_free(zgd, sizeof (*zgd));
+}
+
+static int
+ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
+{
+ ztest_ds_t *zd = arg;
+ objset_t *os = zd->zd_os;
+ uint64_t object = lr->lr_foid;
+ uint64_t offset = lr->lr_offset;
+ uint64_t size = lr->lr_length;
+ blkptr_t *bp = &lr->lr_blkptr;
+ uint64_t txg = lr->lr_common.lrc_txg;
+ uint64_t crtxg;
+ dmu_object_info_t doi;
+ dmu_buf_t *db;
+ zgd_t *zgd;
+ int error;
+
+ ztest_object_lock(zd, object, RL_READER);
+ error = dmu_bonus_hold(os, object, FTAG, &db);
+ if (error) {
+ ztest_object_unlock(zd, object);
+ return (error);
+ }
+
+ crtxg = ztest_bt_bonus(db)->bt_crtxg;
+
+ if (crtxg == 0 || crtxg > txg) {
+ dmu_buf_rele(db, FTAG);
+ ztest_object_unlock(zd, object);
+ return (ENOENT);
+ }
+
+ dmu_object_info_from_db(db, &doi);
+ dmu_buf_rele(db, FTAG);
+ db = NULL;
+
+ zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL);
+ zgd->zgd_zilog = zd->zd_zilog;
+ zgd->zgd_private = zd;
+
+ if (buf != NULL) { /* immediate write */
+ zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
+ RL_READER);
+
+ error = dmu_read(os, object, offset, size, buf,
+ DMU_READ_NO_PREFETCH);
+ ASSERT(error == 0);
+ } else {
+ size = doi.doi_data_block_size;
+ if (ISP2(size)) {
+ offset = P2ALIGN(offset, size);
+ } else {
+ ASSERT(offset < size);
+ offset = 0;
+ }
+
+ zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
+ RL_READER);
+
+ error = dmu_buf_hold(os, object, offset, zgd, &db,
+ DMU_READ_NO_PREFETCH);
+
+ if (error == 0) {
+ zgd->zgd_db = db;
+ zgd->zgd_bp = bp;
+
+ ASSERT(db->db_offset == offset);
+ ASSERT(db->db_size == size);
+
+ error = dmu_sync(zio, lr->lr_common.lrc_txg,
+ ztest_get_done, zgd);
+
+ if (error == 0)
+ return (0);
+ }
+ }
+
+ ztest_get_done(zgd, error);
+
+ return (error);
+}
+
+static void *
+ztest_lr_alloc(size_t lrsize, char *name)
+{
+ char *lr;
+ size_t namesize = name ? strlen(name) + 1 : 0;
+
+ lr = umem_zalloc(lrsize + namesize, UMEM_NOFAIL);
+
+ if (name)
+ bcopy(name, lr + lrsize, namesize);
+
+ return (lr);
+}
+
+void
+ztest_lr_free(void *lr, size_t lrsize, char *name)
+{
+ size_t namesize = name ? strlen(name) + 1 : 0;
+
+ umem_free(lr, lrsize + namesize);
+}
+
+/*
+ * Lookup a bunch of objects. Returns the number of objects not found.
+ */
+static int
+ztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count)
+{
+ int missing = 0;
+ int error;
+
+ ASSERT(_mutex_held(&zd->zd_dirobj_lock));
+
+ for (int i = 0; i < count; i++, od++) {
+ od->od_object = 0;
+ error = zap_lookup(zd->zd_os, od->od_dir, od->od_name,
+ sizeof (uint64_t), 1, &od->od_object);
+ if (error) {
+ ASSERT(error == ENOENT);
+ ASSERT(od->od_object == 0);
+ missing++;
+ } else {
+ dmu_buf_t *db;
+ ztest_block_tag_t *bbt;
+ dmu_object_info_t doi;
+
+ ASSERT(od->od_object != 0);
+ ASSERT(missing == 0); /* there should be no gaps */
+
+ ztest_object_lock(zd, od->od_object, RL_READER);
+ VERIFY3U(0, ==, dmu_bonus_hold(zd->zd_os,
+ od->od_object, FTAG, &db));
+ dmu_object_info_from_db(db, &doi);
+ bbt = ztest_bt_bonus(db);
+ ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
+ od->od_type = doi.doi_type;
+ od->od_blocksize = doi.doi_data_block_size;
+ od->od_gen = bbt->bt_gen;
+ dmu_buf_rele(db, FTAG);
+ ztest_object_unlock(zd, od->od_object);
+ }
+ }
+
+ return (missing);
+}
+
+static int
+ztest_create(ztest_ds_t *zd, ztest_od_t *od, int count)
+{
+ int missing = 0;
+
+ ASSERT(_mutex_held(&zd->zd_dirobj_lock));
+
+ for (int i = 0; i < count; i++, od++) {
+ if (missing) {
+ od->od_object = 0;
+ missing++;
+ continue;
+ }
+
+ lr_create_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
+
+ lr->lr_doid = od->od_dir;
+ lr->lr_foid = 0; /* 0 to allocate, > 0 to claim */
+ lr->lrz_type = od->od_crtype;
+ lr->lrz_blocksize = od->od_crblocksize;
+ lr->lrz_ibshift = ztest_random_ibshift();
+ lr->lrz_bonustype = DMU_OT_UINT64_OTHER;
+ lr->lrz_bonuslen = dmu_bonus_max();
+ lr->lr_gen = od->od_crgen;
+ lr->lr_crtime[0] = time(NULL);
+
+ if (ztest_replay_create(zd, lr, B_FALSE) != 0) {
+ ASSERT(missing == 0);
+ od->od_object = 0;
+ missing++;
+ } else {
+ od->od_object = lr->lr_foid;
+ od->od_type = od->od_crtype;
+ od->od_blocksize = od->od_crblocksize;
+ od->od_gen = od->od_crgen;
+ ASSERT(od->od_object != 0);
+ }
+
+ ztest_lr_free(lr, sizeof (*lr), od->od_name);
+ }
+
+ return (missing);
+}
+
+static int
+ztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count)
+{
+ int missing = 0;
+ int error;
+
+ ASSERT(_mutex_held(&zd->zd_dirobj_lock));
+
+ od += count - 1;
+
+ for (int i = count - 1; i >= 0; i--, od--) {
+ if (missing) {
+ missing++;
+ continue;
+ }
+
+ if (od->od_object == 0)
+ continue;
+
+ lr_remove_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
+
+ lr->lr_doid = od->od_dir;
+
+ if ((error = ztest_replay_remove(zd, lr, B_FALSE)) != 0) {
+ ASSERT3U(error, ==, ENOSPC);
+ missing++;
+ } else {
+ od->od_object = 0;
+ }
+ ztest_lr_free(lr, sizeof (*lr), od->od_name);
+ }
+
+ return (missing);
+}
+
+static int
+ztest_write(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size,
+ void *data)
+{
+ lr_write_t *lr;
+ int error;
+
+ lr = ztest_lr_alloc(sizeof (*lr) + size, NULL);
+
+ lr->lr_foid = object;
+ lr->lr_offset = offset;
+ lr->lr_length = size;
+ lr->lr_blkoff = 0;
+ BP_ZERO(&lr->lr_blkptr);
+
+ bcopy(data, lr + 1, size);
+
+ error = ztest_replay_write(zd, lr, B_FALSE);
+
+ ztest_lr_free(lr, sizeof (*lr) + size, NULL);
+
+ return (error);
+}
+
+static int
+ztest_truncate(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
+{
+ lr_truncate_t *lr;
+ int error;
+
+ lr = ztest_lr_alloc(sizeof (*lr), NULL);
+
+ lr->lr_foid = object;
+ lr->lr_offset = offset;
+ lr->lr_length = size;
+
+ error = ztest_replay_truncate(zd, lr, B_FALSE);
+
+ ztest_lr_free(lr, sizeof (*lr), NULL);
+
+ return (error);
+}
+
+static int
+ztest_setattr(ztest_ds_t *zd, uint64_t object)
+{
+ lr_setattr_t *lr;
+ int error;
+
+ lr = ztest_lr_alloc(sizeof (*lr), NULL);
+
+ lr->lr_foid = object;
+ lr->lr_size = 0;
+ lr->lr_mode = 0;
+
+ error = ztest_replay_setattr(zd, lr, B_FALSE);
+
+ ztest_lr_free(lr, sizeof (*lr), NULL);
+
+ return (error);
+}
+
+static void
+ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
+{
+ objset_t *os = zd->zd_os;
+ dmu_tx_t *tx;
+ uint64_t txg;
+ rl_t *rl;
+
+ txg_wait_synced(dmu_objset_pool(os), 0);
+
+ ztest_object_lock(zd, object, RL_READER);
+ rl = ztest_range_lock(zd, object, offset, size, RL_WRITER);
+
+ tx = dmu_tx_create(os);
+
+ dmu_tx_hold_write(tx, object, offset, size);
+
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+
+ if (txg != 0) {
+ dmu_prealloc(os, object, offset, size, tx);
+ dmu_tx_commit(tx);
+ txg_wait_synced(dmu_objset_pool(os), txg);
+ } else {
+ (void) dmu_free_long_range(os, object, offset, size);
+ }
+
+ ztest_range_unlock(rl);
+ ztest_object_unlock(zd, object);
+}
+
+static void
+ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
+{
+ ztest_block_tag_t wbt;
+ dmu_object_info_t doi;
+ enum ztest_io_type io_type;
+ uint64_t blocksize;
+ void *data;
+
+ VERIFY(dmu_object_info(zd->zd_os, object, &doi) == 0);
+ blocksize = doi.doi_data_block_size;
+ data = umem_alloc(blocksize, UMEM_NOFAIL);
+
+ /*
+ * Pick an i/o type at random, biased toward writing block tags.
+ */
+ io_type = ztest_random(ZTEST_IO_TYPES);
+ if (ztest_random(2) == 0)
+ io_type = ZTEST_IO_WRITE_TAG;
+
+ switch (io_type) {
+
+ case ZTEST_IO_WRITE_TAG:
+ ztest_bt_generate(&wbt, zd->zd_os, object, offset, 0, 0, 0);
+ (void) ztest_write(zd, object, offset, sizeof (wbt), &wbt);
+ break;
+
+ case ZTEST_IO_WRITE_PATTERN:
+ (void) memset(data, 'a' + (object + offset) % 5, blocksize);
+ if (ztest_random(2) == 0) {
+ /*
+ * Induce fletcher2 collisions to ensure that
+ * zio_ddt_collision() detects and resolves them
+ * when using fletcher2-verify for deduplication.
+ */
+ ((uint64_t *)data)[0] ^= 1ULL << 63;
+ ((uint64_t *)data)[4] ^= 1ULL << 63;
+ }
+ (void) ztest_write(zd, object, offset, blocksize, data);
+ break;
+
+ case ZTEST_IO_WRITE_ZEROES:
+ bzero(data, blocksize);
+ (void) ztest_write(zd, object, offset, blocksize, data);
+ break;
+
+ case ZTEST_IO_TRUNCATE:
+ (void) ztest_truncate(zd, object, offset, blocksize);
+ break;
+
+ case ZTEST_IO_SETATTR:
+ (void) ztest_setattr(zd, object);
+ break;
+ }
+
+ umem_free(data, blocksize);
+}
+
+/*
+ * Initialize an object description template.
+ */
+static void
+ztest_od_init(ztest_od_t *od, uint64_t id, char *tag, uint64_t index,
+ dmu_object_type_t type, uint64_t blocksize, uint64_t gen)
+{
+ od->od_dir = ZTEST_DIROBJ;
+ od->od_object = 0;
+
+ od->od_crtype = type;
+ od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize();
+ od->od_crgen = gen;
+
+ od->od_type = DMU_OT_NONE;
+ od->od_blocksize = 0;
+ od->od_gen = 0;
+
+ (void) snprintf(od->od_name, sizeof (od->od_name), "%s(%lld)[%llu]",
+ tag, (int64_t)id, index);
+}
+
+/*
+ * Lookup or create the objects for a test using the od template.
+ * If the objects do not all exist, or if 'remove' is specified,
+ * remove any existing objects and create new ones. Otherwise,
+ * use the existing objects.
+ */
+static int
+ztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove)
+{
+ int count = size / sizeof (*od);
+ int rv = 0;
+
+ VERIFY(mutex_lock(&zd->zd_dirobj_lock) == 0);
+ if ((ztest_lookup(zd, od, count) != 0 || remove) &&
+ (ztest_remove(zd, od, count) != 0 ||
+ ztest_create(zd, od, count) != 0))
+ rv = -1;
+ zd->zd_od = od;
+ VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0);
+
+ return (rv);
+}
+
+/* ARGSUSED */
+void
+ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
+{
+ zilog_t *zilog = zd->zd_zilog;
+
+ zil_commit(zilog, ztest_random(ZTEST_OBJECTS));
+
+ /*
+ * Remember the committed values in zd, which is in parent/child
+ * shared memory. If we die, the next iteration of ztest_run()
+ * will verify that the log really does contain this record.
+ */
+ mutex_enter(&zilog->zl_lock);
+ ASSERT(zd->zd_seq <= zilog->zl_commit_lr_seq);
+ zd->zd_seq = zilog->zl_commit_lr_seq;
+ mutex_exit(&zilog->zl_lock);
+}
+
+/*
* Verify that we can't destroy an active pool, create an existing pool,
* or create a pool with a bad vdev spec.
*/
+/* ARGSUSED */
void
-ztest_spa_create_destroy(ztest_args_t *za)
+ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
{
- int error;
+ ztest_shared_t *zs = ztest_shared;
spa_t *spa;
nvlist_t *nvroot;
@@ -808,41 +2082,31 @@ ztest_spa_create_destroy(ztest_args_t *za)
* Attempt to create using a bad file.
*/
nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 0, 1);
- error = spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL);
+ VERIFY3U(ENOENT, ==,
+ spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL));
nvlist_free(nvroot);
- if (error != ENOENT)
- fatal(0, "spa_create(bad_file) = %d", error);
/*
* Attempt to create using a bad mirror.
*/
nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 2, 1);
- error = spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL);
+ VERIFY3U(ENOENT, ==,
+ spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL));
nvlist_free(nvroot);
- if (error != ENOENT)
- fatal(0, "spa_create(bad_mirror) = %d", error);
/*
* Attempt to create an existing pool. It shouldn't matter
* what's in the nvroot; we should fail with EEXIST.
*/
- (void) rw_rdlock(&ztest_shared->zs_name_lock);
+ (void) rw_rdlock(&zs->zs_name_lock);
nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 0, 1);
- error = spa_create(za->za_pool, nvroot, NULL, NULL, NULL);
+ VERIFY3U(EEXIST, ==, spa_create(zs->zs_pool, nvroot, NULL, NULL, NULL));
nvlist_free(nvroot);
- if (error != EEXIST)
- fatal(0, "spa_create(whatever) = %d", error);
-
- error = spa_open(za->za_pool, &spa, FTAG);
- if (error)
- fatal(0, "spa_open() = %d", error);
-
- error = spa_destroy(za->za_pool);
- if (error != EBUSY)
- fatal(0, "spa_destroy() = %d", error);
-
+ VERIFY3U(0, ==, spa_open(zs->zs_pool, &spa, FTAG));
+ VERIFY3U(EBUSY, ==, spa_destroy(zs->zs_pool));
spa_close(spa, FTAG);
- (void) rw_unlock(&ztest_shared->zs_name_lock);
+
+ (void) rw_unlock(&zs->zs_name_lock);
}
static vdev_t *
@@ -862,49 +2126,101 @@ vdev_lookup_by_path(vdev_t *vd, const char *path)
}
/*
+ * Find the first available hole which can be used as a top-level.
+ */
+int
+find_vdev_hole(spa_t *spa)
+{
+ vdev_t *rvd = spa->spa_root_vdev;
+ int c;
+
+ ASSERT(spa_config_held(spa, SCL_VDEV, RW_READER) == SCL_VDEV);
+
+ for (c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *cvd = rvd->vdev_child[c];
+
+ if (cvd->vdev_ishole)
+ break;
+ }
+ return (c);
+}
+
+/*
* Verify that vdev_add() works as expected.
*/
+/* ARGSUSED */
void
-ztest_vdev_add_remove(ztest_args_t *za)
+ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
{
- spa_t *spa = za->za_spa;
- uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
+ ztest_shared_t *zs = ztest_shared;
+ spa_t *spa = zs->zs_spa;
+ uint64_t leaves;
+ uint64_t guid;
nvlist_t *nvroot;
int error;
- (void) mutex_lock(&ztest_shared->zs_vdev_lock);
+ VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * zopt_raidz;
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
- ztest_shared->zs_vdev_primaries =
- spa->spa_root_vdev->vdev_children * leaves;
-
- spa_config_exit(spa, SCL_VDEV, FTAG);
+ ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves;
/*
- * Make 1/4 of the devices be log devices.
+ * If we have slogs then remove them 1/4 of the time.
*/
- nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0,
- ztest_random(4) == 0, zopt_raidz, zopt_mirrors, 1);
+ if (spa_has_slogs(spa) && ztest_random(4) == 0) {
+ /*
+ * Grab the guid from the head of the log class rotor.
+ */
+ guid = spa_log_class(spa)->mc_rotor->mg_vd->vdev_guid;
- error = spa_vdev_add(spa, nvroot);
- nvlist_free(nvroot);
+ spa_config_exit(spa, SCL_VDEV, FTAG);
- (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
+ /*
+ * We have to grab the zs_name_lock as writer to
+ * prevent a race between removing a slog (dmu_objset_find)
+ * and destroying a dataset. Removing the slog will
+ * grab a reference on the dataset which may cause
+ * dmu_objset_destroy() to fail with EBUSY thus
+ * leaving the dataset in an inconsistent state.
+ */
+ VERIFY(rw_wrlock(&ztest_shared->zs_name_lock) == 0);
+ error = spa_vdev_remove(spa, guid, B_FALSE);
+ VERIFY(rw_unlock(&ztest_shared->zs_name_lock) == 0);
+
+ if (error && error != EEXIST)
+ fatal(0, "spa_vdev_remove() = %d", error);
+ } else {
+ spa_config_exit(spa, SCL_VDEV, FTAG);
+
+ /*
+ * Make 1/4 of the devices be log devices.
+ */
+ nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0,
+ ztest_random(4) == 0, zopt_raidz, zs->zs_mirrors, 1);
+
+ error = spa_vdev_add(spa, nvroot);
+ nvlist_free(nvroot);
+
+ if (error == ENOSPC)
+ ztest_record_enospc("spa_vdev_add");
+ else if (error != 0)
+ fatal(0, "spa_vdev_add() = %d", error);
+ }
- if (error == ENOSPC)
- ztest_record_enospc("spa_vdev_add");
- else if (error != 0)
- fatal(0, "spa_vdev_add() = %d", error);
+ VERIFY(mutex_unlock(&ztest_shared->zs_vdev_lock) == 0);
}
/*
* Verify that adding/removing aux devices (l2arc, hot spare) works as expected.
*/
+/* ARGSUSED */
void
-ztest_vdev_aux_add_remove(ztest_args_t *za)
+ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
{
- spa_t *spa = za->za_spa;
+ ztest_shared_t *zs = ztest_shared;
+ spa_t *spa = zs->zs_spa;
vdev_t *rvd = spa->spa_root_vdev;
spa_aux_vdev_t *sav;
char *aux;
@@ -919,7 +2235,7 @@ ztest_vdev_aux_add_remove(ztest_args_t *za)
aux = ZPOOL_CONFIG_L2CACHE;
}
- (void) mutex_lock(&ztest_shared->zs_vdev_lock);
+ VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
@@ -932,12 +2248,12 @@ ztest_vdev_aux_add_remove(ztest_args_t *za)
/*
* Find an unused device we can add.
*/
- ztest_shared->zs_vdev_aux = 0;
+ zs->zs_vdev_aux = 0;
for (;;) {
char path[MAXPATHLEN];
int c;
(void) sprintf(path, ztest_aux_template, zopt_dir,
- zopt_pool, aux, ztest_shared->zs_vdev_aux);
+ zopt_pool, aux, zs->zs_vdev_aux);
for (c = 0; c < sav->sav_count; c++)
if (strcmp(sav->sav_vdevs[c]->vdev_path,
path) == 0)
@@ -945,7 +2261,7 @@ ztest_vdev_aux_add_remove(ztest_args_t *za)
if (c == sav->sav_count &&
vdev_lookup_by_path(rvd, path) == NULL)
break;
- ztest_shared->zs_vdev_aux++;
+ zs->zs_vdev_aux++;
}
}
@@ -968,28 +2284,126 @@ ztest_vdev_aux_add_remove(ztest_args_t *za)
* of devices that have pending state changes.
*/
if (ztest_random(2) == 0)
- (void) vdev_online(spa, guid, B_FALSE, NULL);
+ (void) vdev_online(spa, guid, 0, NULL);
error = spa_vdev_remove(spa, guid, B_FALSE);
if (error != 0 && error != EBUSY)
fatal(0, "spa_vdev_remove(%llu) = %d", guid, error);
}
- (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+}
+
+/*
+ * split a pool if it has mirror tlvdevs
+ */
+/* ARGSUSED */
+void
+ztest_split_pool(ztest_ds_t *zd, uint64_t id)
+{
+ ztest_shared_t *zs = ztest_shared;
+ spa_t *spa = zs->zs_spa;
+ vdev_t *rvd = spa->spa_root_vdev;
+ nvlist_t *tree, **child, *config, *split, **schild;
+ uint_t c, children, schildren = 0, lastlogid = 0;
+ int error = 0;
+
+ VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+
+ /* ensure we have a useable config; mirrors of raidz aren't supported */
+ if (zs->zs_mirrors < 3 || zopt_raidz > 1) {
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ return;
+ }
+
+ /* clean up the old pool, if any */
+ (void) spa_destroy("splitp");
+
+ spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
+
+ /* generate a config from the existing config */
+ mutex_enter(&spa->spa_props_lock);
+ VERIFY(nvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE,
+ &tree) == 0);
+ mutex_exit(&spa->spa_props_lock);
+
+ VERIFY(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
+ &children) == 0);
+
+ schild = malloc(rvd->vdev_children * sizeof (nvlist_t *));
+ for (c = 0; c < children; c++) {
+ vdev_t *tvd = rvd->vdev_child[c];
+ nvlist_t **mchild;
+ uint_t mchildren;
+
+ if (tvd->vdev_islog || tvd->vdev_ops == &vdev_hole_ops) {
+ VERIFY(nvlist_alloc(&schild[schildren], NV_UNIQUE_NAME,
+ 0) == 0);
+ VERIFY(nvlist_add_string(schild[schildren],
+ ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) == 0);
+ VERIFY(nvlist_add_uint64(schild[schildren],
+ ZPOOL_CONFIG_IS_HOLE, 1) == 0);
+ if (lastlogid == 0)
+ lastlogid = schildren;
+ ++schildren;
+ continue;
+ }
+ lastlogid = 0;
+ VERIFY(nvlist_lookup_nvlist_array(child[c],
+ ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
+ VERIFY(nvlist_dup(mchild[0], &schild[schildren++], 0) == 0);
+ }
+
+ /* OK, create a config that can be used to split */
+ VERIFY(nvlist_alloc(&split, NV_UNIQUE_NAME, 0) == 0);
+ VERIFY(nvlist_add_string(split, ZPOOL_CONFIG_TYPE,
+ VDEV_TYPE_ROOT) == 0);
+ VERIFY(nvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN, schild,
+ lastlogid != 0 ? lastlogid : schildren) == 0);
+
+ VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0);
+ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split) == 0);
+
+ for (c = 0; c < schildren; c++)
+ nvlist_free(schild[c]);
+ free(schild);
+ nvlist_free(split);
+
+ spa_config_exit(spa, SCL_VDEV, FTAG);
+
+ (void) rw_wrlock(&zs->zs_name_lock);
+ error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE);
+ (void) rw_unlock(&zs->zs_name_lock);
+
+ nvlist_free(config);
+
+ if (error == 0) {
+ (void) printf("successful split - results:\n");
+ mutex_enter(&spa_namespace_lock);
+ show_pool_stats(spa);
+ show_pool_stats(spa_lookup("splitp"));
+ mutex_exit(&spa_namespace_lock);
+ ++zs->zs_splits;
+ --zs->zs_mirrors;
+ }
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+
}
/*
* Verify that we can attach and detach devices.
*/
+/* ARGSUSED */
void
-ztest_vdev_attach_detach(ztest_args_t *za)
+ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
{
- spa_t *spa = za->za_spa;
+ ztest_shared_t *zs = ztest_shared;
+ spa_t *spa = zs->zs_spa;
spa_aux_vdev_t *sav = &spa->spa_spares;
vdev_t *rvd = spa->spa_root_vdev;
vdev_t *oldvd, *newvd, *pvd;
nvlist_t *root;
- uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
+ uint64_t leaves;
uint64_t leaf, top;
uint64_t ashift = ztest_get_ashift();
uint64_t oldguid, pguid;
@@ -1001,7 +2415,8 @@ ztest_vdev_attach_detach(ztest_args_t *za)
int oldvd_is_log;
int error, expected_error;
- (void) mutex_lock(&ztest_shared->zs_vdev_lock);
+ VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ leaves = MAX(zs->zs_mirrors, 1) * zopt_raidz;
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
@@ -1013,7 +2428,7 @@ ztest_vdev_attach_detach(ztest_args_t *za)
/*
* Pick a random top-level vdev.
*/
- top = ztest_random(rvd->vdev_children);
+ top = ztest_random_vdev_top(spa, B_TRUE);
/*
* Pick a random leaf within it.
@@ -1024,9 +2439,9 @@ ztest_vdev_attach_detach(ztest_args_t *za)
* Locate this vdev.
*/
oldvd = rvd->vdev_child[top];
- if (zopt_mirrors >= 1) {
+ if (zs->zs_mirrors >= 1) {
ASSERT(oldvd->vdev_ops == &vdev_mirror_ops);
- ASSERT(oldvd->vdev_children >= zopt_mirrors);
+ ASSERT(oldvd->vdev_children >= zs->zs_mirrors);
oldvd = oldvd->vdev_child[leaf / zopt_raidz];
}
if (zopt_raidz > 1) {
@@ -1046,7 +2461,7 @@ ztest_vdev_attach_detach(ztest_args_t *za)
}
oldguid = oldvd->vdev_guid;
- oldsize = vdev_get_rsize(oldvd);
+ oldsize = vdev_get_min_asize(oldvd);
oldvd_is_log = oldvd->vdev_top->vdev_islog;
(void) strcpy(oldpath, oldvd->vdev_path);
pvd = oldvd->vdev_parent;
@@ -1061,7 +2476,7 @@ ztest_vdev_attach_detach(ztest_args_t *za)
if (error != 0 && error != ENODEV && error != EBUSY &&
error != ENOTSUP)
fatal(0, "detach (%s) returned %d", oldpath, error);
- (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
return;
}
@@ -1082,7 +2497,7 @@ ztest_vdev_attach_detach(ztest_args_t *za)
}
if (newvd) {
- newsize = vdev_get_rsize(newvd);
+ newsize = vdev_get_min_asize(newvd);
} else {
/*
* Make newsize a little bigger or smaller than oldsize.
@@ -1154,169 +2569,373 @@ ztest_vdev_attach_detach(ztest_args_t *za)
(longlong_t)newsize, replacing, error, expected_error);
}
- (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
}
/*
- * Verify that dynamic LUN growth works as expected.
+ * Callback function which expands the physical size of the vdev.
*/
-void
-ztest_vdev_LUN_growth(ztest_args_t *za)
+vdev_t *
+grow_vdev(vdev_t *vd, void *arg)
{
- spa_t *spa = za->za_spa;
- char dev_name[MAXPATHLEN];
- uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
- uint64_t vdev;
+ spa_t *spa = vd->vdev_spa;
+ size_t *newsize = arg;
size_t fsize;
int fd;
- (void) mutex_lock(&ztest_shared->zs_vdev_lock);
+ ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
+
+ if ((fd = open(vd->vdev_path, O_RDWR)) == -1)
+ return (vd);
+
+ fsize = lseek(fd, 0, SEEK_END);
+ (void) ftruncate(fd, *newsize);
+
+ if (zopt_verbose >= 6) {
+ (void) printf("%s grew from %lu to %lu bytes\n",
+ vd->vdev_path, (ulong_t)fsize, (ulong_t)*newsize);
+ }
+ (void) close(fd);
+ return (NULL);
+}
+
+/*
+ * Callback function which expands a given vdev by calling vdev_online().
+ */
+/* ARGSUSED */
+vdev_t *
+online_vdev(vdev_t *vd, void *arg)
+{
+ spa_t *spa = vd->vdev_spa;
+ vdev_t *tvd = vd->vdev_top;
+ uint64_t guid = vd->vdev_guid;
+ uint64_t generation = spa->spa_config_generation + 1;
+ vdev_state_t newstate = VDEV_STATE_UNKNOWN;
+ int error;
+
+ ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
+
+ /* Calling vdev_online will initialize the new metaslabs */
+ spa_config_exit(spa, SCL_STATE, spa);
+ error = vdev_online(spa, guid, ZFS_ONLINE_EXPAND, &newstate);
+ spa_config_enter(spa, SCL_STATE, spa, RW_READER);
/*
- * Pick a random leaf vdev.
+ * If vdev_online returned an error or the underlying vdev_open
+ * failed then we abort the expand. The only way to know that
+ * vdev_open fails is by checking the returned newstate.
*/
- spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
- vdev = ztest_random(spa->spa_root_vdev->vdev_children * leaves);
- spa_config_exit(spa, SCL_VDEV, FTAG);
+ if (error || newstate != VDEV_STATE_HEALTHY) {
+ if (zopt_verbose >= 5) {
+ (void) printf("Unable to expand vdev, state %llu, "
+ "error %d\n", (u_longlong_t)newstate, error);
+ }
+ return (vd);
+ }
+ ASSERT3U(newstate, ==, VDEV_STATE_HEALTHY);
+
+ /*
+ * Since we dropped the lock we need to ensure that we're
+ * still talking to the original vdev. It's possible this
+ * vdev may have been detached/replaced while we were
+ * trying to online it.
+ */
+ if (generation != spa->spa_config_generation) {
+ if (zopt_verbose >= 5) {
+ (void) printf("vdev configuration has changed, "
+ "guid %llu, state %llu, expected gen %llu, "
+ "got gen %llu\n",
+ (u_longlong_t)guid,
+ (u_longlong_t)tvd->vdev_state,
+ (u_longlong_t)generation,
+ (u_longlong_t)spa->spa_config_generation);
+ }
+ return (vd);
+ }
+ return (NULL);
+}
- (void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
+/*
+ * Traverse the vdev tree calling the supplied function.
+ * We continue to walk the tree until we either have walked all
+ * children or we receive a non-NULL return from the callback.
+ * If a NULL callback is passed, then we just return back the first
+ * leaf vdev we encounter.
+ */
+vdev_t *
+vdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg)
+{
+ if (vd->vdev_ops->vdev_op_leaf) {
+ if (func == NULL)
+ return (vd);
+ else
+ return (func(vd, arg));
+ }
- if ((fd = open(dev_name, O_RDWR)) != -1) {
- /*
- * Determine the size.
- */
- fsize = lseek(fd, 0, SEEK_END);
+ for (uint_t c = 0; c < vd->vdev_children; c++) {
+ vdev_t *cvd = vd->vdev_child[c];
+ if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL)
+ return (cvd);
+ }
+ return (NULL);
+}
- /*
- * If it's less than 2x the original size, grow by around 3%.
- */
- if (fsize < 2 * zopt_vdev_size) {
- size_t newsize = fsize + ztest_random(fsize / 32);
- (void) ftruncate(fd, newsize);
- if (zopt_verbose >= 6) {
- (void) printf("%s grew from %lu to %lu bytes\n",
- dev_name, (ulong_t)fsize, (ulong_t)newsize);
- }
+/*
+ * Verify that dynamic LUN growth works as expected.
+ */
+/* ARGSUSED */
+void
+ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
+{
+ ztest_shared_t *zs = ztest_shared;
+ spa_t *spa = zs->zs_spa;
+ vdev_t *vd, *tvd;
+ metaslab_class_t *mc;
+ metaslab_group_t *mg;
+ size_t psize, newsize;
+ uint64_t top;
+ uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count;
+
+ VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ spa_config_enter(spa, SCL_STATE, spa, RW_READER);
+
+ top = ztest_random_vdev_top(spa, B_TRUE);
+
+ tvd = spa->spa_root_vdev->vdev_child[top];
+ mg = tvd->vdev_mg;
+ mc = mg->mg_class;
+ old_ms_count = tvd->vdev_ms_count;
+ old_class_space = metaslab_class_get_space(mc);
+
+ /*
+ * Determine the size of the first leaf vdev associated with
+ * our top-level device.
+ */
+ vd = vdev_walk_tree(tvd, NULL, NULL);
+ ASSERT3P(vd, !=, NULL);
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
+
+ psize = vd->vdev_psize;
+
+ /*
+ * We only try to expand the vdev if it's healthy, less than 4x its
+ * original size, and it has a valid psize.
+ */
+ if (tvd->vdev_state != VDEV_STATE_HEALTHY ||
+ psize == 0 || psize >= 4 * zopt_vdev_size) {
+ spa_config_exit(spa, SCL_STATE, spa);
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ return;
+ }
+ ASSERT(psize > 0);
+ newsize = psize + psize / 8;
+ ASSERT3U(newsize, >, psize);
+
+ if (zopt_verbose >= 6) {
+ (void) printf("Expanding LUN %s from %lu to %lu\n",
+ vd->vdev_path, (ulong_t)psize, (ulong_t)newsize);
+ }
+
+ /*
+ * Growing the vdev is a two step process:
+ * 1). expand the physical size (i.e. relabel)
+ * 2). online the vdev to create the new metaslabs
+ */
+ if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL ||
+ vdev_walk_tree(tvd, online_vdev, NULL) != NULL ||
+ tvd->vdev_state != VDEV_STATE_HEALTHY) {
+ if (zopt_verbose >= 5) {
+ (void) printf("Could not expand LUN because "
+ "the vdev configuration changed.\n");
}
- (void) close(fd);
+ spa_config_exit(spa, SCL_STATE, spa);
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ return;
}
- (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
+ spa_config_exit(spa, SCL_STATE, spa);
+
+ /*
+ * Expanding the LUN will update the config asynchronously,
+ * thus we must wait for the async thread to complete any
+ * pending tasks before proceeding.
+ */
+ for (;;) {
+ boolean_t done;
+ mutex_enter(&spa->spa_async_lock);
+ done = (spa->spa_async_thread == NULL && !spa->spa_async_tasks);
+ mutex_exit(&spa->spa_async_lock);
+ if (done)
+ break;
+ txg_wait_synced(spa_get_dsl(spa), 0);
+ (void) poll(NULL, 0, 100);
+ }
+
+ spa_config_enter(spa, SCL_STATE, spa, RW_READER);
+
+ tvd = spa->spa_root_vdev->vdev_child[top];
+ new_ms_count = tvd->vdev_ms_count;
+ new_class_space = metaslab_class_get_space(mc);
+
+ if (tvd->vdev_mg != mg || mg->mg_class != mc) {
+ if (zopt_verbose >= 5) {
+ (void) printf("Could not verify LUN expansion due to "
+ "intervening vdev offline or remove.\n");
+ }
+ spa_config_exit(spa, SCL_STATE, spa);
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ return;
+ }
+
+ /*
+ * Make sure we were able to grow the vdev.
+ */
+ if (new_ms_count <= old_ms_count)
+ fatal(0, "LUN expansion failed: ms_count %llu <= %llu\n",
+ old_ms_count, new_ms_count);
+
+ /*
+ * Make sure we were able to grow the pool.
+ */
+ if (new_class_space <= old_class_space)
+ fatal(0, "LUN expansion failed: class_space %llu <= %llu\n",
+ old_class_space, new_class_space);
+
+ if (zopt_verbose >= 5) {
+ char oldnumbuf[6], newnumbuf[6];
+
+ nicenum(old_class_space, oldnumbuf);
+ nicenum(new_class_space, newnumbuf);
+ (void) printf("%s grew from %s to %s\n",
+ spa->spa_name, oldnumbuf, newnumbuf);
+ }
+
+ spa_config_exit(spa, SCL_STATE, spa);
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
}
+/*
+ * Verify that dmu_objset_{create,destroy,open,close} work as expected.
+ */
/* ARGSUSED */
static void
-ztest_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
+ztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
{
/*
- * Create the directory object.
+ * Create the objects common to all ztest datasets.
*/
- VERIFY(dmu_object_claim(os, ZTEST_DIROBJ,
- DMU_OT_UINT64_OTHER, ZTEST_DIROBJ_BLOCKSIZE,
- DMU_OT_UINT64_OTHER, 5 * sizeof (ztest_block_tag_t), tx) == 0);
-
- VERIFY(zap_create_claim(os, ZTEST_MICROZAP_OBJ,
+ VERIFY(zap_create_claim(os, ZTEST_DIROBJ,
DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0);
+}
- VERIFY(zap_create_claim(os, ZTEST_FATZAP_OBJ,
- DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0);
+static int
+ztest_dataset_create(char *dsname)
+{
+ uint64_t zilset = ztest_random(100);
+ int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0,
+ ztest_objset_create_cb, NULL);
+
+ if (err || zilset < 80)
+ return (err);
+
+ (void) printf("Setting dataset %s to sync always\n", dsname);
+ return (ztest_dsl_prop_set_uint64(dsname, ZFS_PROP_SYNC,
+ ZFS_SYNC_ALWAYS, B_FALSE));
}
+/* ARGSUSED */
static int
-ztest_destroy_cb(char *name, void *arg)
+ztest_objset_destroy_cb(const char *name, void *arg)
{
- ztest_args_t *za = arg;
objset_t *os;
- dmu_object_info_t *doi = &za->za_doi;
+ dmu_object_info_t doi;
int error;
/*
* Verify that the dataset contains a directory object.
*/
- error = dmu_objset_open(name, DMU_OST_OTHER,
- DS_MODE_USER | DS_MODE_READONLY, &os);
- ASSERT3U(error, ==, 0);
- error = dmu_object_info(os, ZTEST_DIROBJ, doi);
+ VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os));
+ error = dmu_object_info(os, ZTEST_DIROBJ, &doi);
if (error != ENOENT) {
/* We could have crashed in the middle of destroying it */
ASSERT3U(error, ==, 0);
- ASSERT3U(doi->doi_type, ==, DMU_OT_UINT64_OTHER);
- ASSERT3S(doi->doi_physical_blks, >=, 0);
+ ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER);
+ ASSERT3S(doi.doi_physical_blocks_512, >=, 0);
}
- dmu_objset_close(os);
+ dmu_objset_rele(os, FTAG);
/*
* Destroy the dataset.
*/
- error = dmu_objset_destroy(name);
- if (error) {
- (void) dmu_objset_open(name, DMU_OST_OTHER,
- DS_MODE_USER | DS_MODE_READONLY, &os);
- fatal(0, "dmu_objset_destroy(os=%p) = %d\n", &os, error);
- }
+ VERIFY3U(0, ==, dmu_objset_destroy(name, B_FALSE));
return (0);
}
-/*
- * Verify that dmu_objset_{create,destroy,open,close} work as expected.
- */
-static uint64_t
-ztest_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t object, int mode)
+static boolean_t
+ztest_snapshot_create(char *osname, uint64_t id)
{
- itx_t *itx;
- lr_create_t *lr;
- size_t namesize;
- char name[24];
-
- (void) sprintf(name, "ZOBJ_%llu", (u_longlong_t)object);
- namesize = strlen(name) + 1;
-
- itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize +
- ztest_random(ZIL_MAX_BLKSZ));
- lr = (lr_create_t *)&itx->itx_lr;
- bzero(lr + 1, lr->lr_common.lrc_reclen - sizeof (*lr));
- lr->lr_doid = object;
- lr->lr_foid = 0;
- lr->lr_mode = mode;
- lr->lr_uid = 0;
- lr->lr_gid = 0;
- lr->lr_gen = dmu_tx_get_txg(tx);
- lr->lr_crtime[0] = time(NULL);
- lr->lr_crtime[1] = 0;
- lr->lr_rdev = 0;
- bcopy(name, (char *)(lr + 1), namesize);
-
- return (zil_itx_assign(zilog, itx, tx));
+ char snapname[MAXNAMELEN];
+ int error;
+
+ (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname,
+ (u_longlong_t)id);
+
+ error = dmu_objset_snapshot(osname, strchr(snapname, '@') + 1,
+ NULL, NULL, B_FALSE, B_FALSE, -1);
+ if (error == ENOSPC) {
+ ztest_record_enospc(FTAG);
+ return (B_FALSE);
+ }
+ if (error != 0 && error != EEXIST)
+ fatal(0, "ztest_snapshot_create(%s) = %d", snapname, error);
+ return (B_TRUE);
}
+static boolean_t
+ztest_snapshot_destroy(char *osname, uint64_t id)
+{
+ char snapname[MAXNAMELEN];
+ int error;
+
+ (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname,
+ (u_longlong_t)id);
+
+ error = dmu_objset_destroy(snapname, B_FALSE);
+ if (error != 0 && error != ENOENT)
+ fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error);
+ return (B_TRUE);
+}
+
+/* ARGSUSED */
void
-ztest_dmu_objset_create_destroy(ztest_args_t *za)
+ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
{
+ ztest_shared_t *zs = ztest_shared;
+ ztest_ds_t zdtmp;
+ int iters;
int error;
objset_t *os, *os2;
- char name[100];
- int basemode, expected_error;
+ char name[MAXNAMELEN];
zilog_t *zilog;
- uint64_t seq;
- uint64_t objects;
- (void) rw_rdlock(&ztest_shared->zs_name_lock);
- (void) snprintf(name, 100, "%s/%s_temp_%llu", za->za_pool, za->za_pool,
- (u_longlong_t)za->za_instance);
+ (void) rw_rdlock(&zs->zs_name_lock);
- basemode = DS_MODE_TYPE(za->za_instance);
- if (basemode != DS_MODE_USER && basemode != DS_MODE_OWNER)
- basemode = DS_MODE_USER;
+ (void) snprintf(name, MAXNAMELEN, "%s/temp_%llu",
+ zs->zs_pool, (u_longlong_t)id);
/*
* If this dataset exists from a previous run, process its replay log
* half of the time. If we don't replay it, then dmu_objset_destroy()
- * (invoked from ztest_destroy_cb() below) should just throw it away.
+ * (invoked from ztest_objset_destroy_cb()) should just throw it away.
*/
if (ztest_random(2) == 0 &&
- dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os) == 0) {
- zil_replay(os, os, ztest_replay_vector);
- dmu_objset_close(os);
+ dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) {
+ ztest_zd_init(&zdtmp, os);
+ zil_replay(os, &zdtmp, ztest_replay_vector);
+ ztest_zd_fini(&zdtmp);
+ dmu_objset_disown(os, FTAG);
}
/*
@@ -1324,170 +2943,152 @@ ztest_dmu_objset_create_destroy(ztest_args_t *za)
* create lying around from a previous run. If so, destroy it
* and all of its snapshots.
*/
- (void) dmu_objset_find(name, ztest_destroy_cb, za,
+ (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL,
DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
/*
* Verify that the destroyed dataset is no longer in the namespace.
*/
- error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os);
- if (error != ENOENT)
- fatal(1, "dmu_objset_open(%s) found destroyed dataset %p",
- name, os);
+ VERIFY3U(ENOENT, ==, dmu_objset_hold(name, FTAG, &os));
/*
* Verify that we can create a new dataset.
*/
- error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0,
- ztest_create_cb, NULL);
+ error = ztest_dataset_create(name);
if (error) {
if (error == ENOSPC) {
- ztest_record_enospc("dmu_objset_create");
- (void) rw_unlock(&ztest_shared->zs_name_lock);
+ ztest_record_enospc(FTAG);
+ (void) rw_unlock(&zs->zs_name_lock);
return;
}
fatal(0, "dmu_objset_create(%s) = %d", name, error);
}
- error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os);
- if (error) {
- fatal(0, "dmu_objset_open(%s) = %d", name, error);
- }
+ VERIFY3U(0, ==,
+ dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os));
+
+ ztest_zd_init(&zdtmp, os);
/*
* Open the intent log for it.
*/
- zilog = zil_open(os, NULL);
+ zilog = zil_open(os, ztest_get_data);
/*
- * Put a random number of objects in there.
+ * Put some objects in there, do a little I/O to them,
+ * and randomly take a couple of snapshots along the way.
*/
- objects = ztest_random(20);
- seq = 0;
- while (objects-- != 0) {
- uint64_t object;
- dmu_tx_t *tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, sizeof (name));
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- dmu_tx_abort(tx);
- } else {
- object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_NONE, 0, tx);
- ztest_set_random_blocksize(os, object, tx);
- seq = ztest_log_create(zilog, tx, object,
- DMU_OT_UINT64_OTHER);
- dmu_write(os, object, 0, sizeof (name), name, tx);
- dmu_tx_commit(tx);
- }
- if (ztest_random(5) == 0) {
- zil_commit(zilog, seq, object);
- }
- if (ztest_random(100) == 0) {
- error = zil_suspend(zilog);
- if (error == 0) {
- zil_resume(zilog);
- }
- }
+ iters = ztest_random(5);
+ for (int i = 0; i < iters; i++) {
+ ztest_dmu_object_alloc_free(&zdtmp, id);
+ if (ztest_random(iters) == 0)
+ (void) ztest_snapshot_create(name, i);
}
/*
* Verify that we cannot create an existing dataset.
*/
- error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0, NULL, NULL);
- if (error != EEXIST)
- fatal(0, "created existing dataset, error = %d", error);
+ VERIFY3U(EEXIST, ==,
+ dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL));
/*
- * Verify that multiple dataset holds are allowed, but only when
- * the new access mode is compatible with the base mode.
+ * Verify that we can hold an objset that is also owned.
*/
- if (basemode == DS_MODE_OWNER) {
- error = dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_USER,
- &os2);
- if (error)
- fatal(0, "dmu_objset_open('%s') = %d", name, error);
- else
- dmu_objset_close(os2);
- }
- error = dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os2);
- expected_error = (basemode == DS_MODE_OWNER) ? EBUSY : 0;
- if (error != expected_error)
- fatal(0, "dmu_objset_open('%s') = %d, expected %d",
- name, error, expected_error);
- if (error == 0)
- dmu_objset_close(os2);
+ VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os2));
+ dmu_objset_rele(os2, FTAG);
- zil_close(zilog);
- dmu_objset_close(os);
+ /*
+ * Verify that we cannot own an objset that is already owned.
+ */
+ VERIFY3U(EBUSY, ==,
+ dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2));
- error = dmu_objset_destroy(name);
- if (error)
- fatal(0, "dmu_objset_destroy(%s) = %d", name, error);
+ zil_close(zilog);
+ dmu_objset_disown(os, FTAG);
+ ztest_zd_fini(&zdtmp);
- (void) rw_unlock(&ztest_shared->zs_name_lock);
+ (void) rw_unlock(&zs->zs_name_lock);
}
/*
* Verify that dmu_snapshot_{create,destroy,open,close} work as expected.
*/
void
-ztest_dmu_snapshot_create_destroy(ztest_args_t *za)
+ztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id)
+{
+ ztest_shared_t *zs = ztest_shared;
+
+ (void) rw_rdlock(&zs->zs_name_lock);
+ (void) ztest_snapshot_destroy(zd->zd_name, id);
+ (void) ztest_snapshot_create(zd->zd_name, id);
+ (void) rw_unlock(&zs->zs_name_lock);
+}
+
+/*
+ * Cleanup non-standard snapshots and clones.
+ */
+void
+ztest_dsl_dataset_cleanup(char *osname, uint64_t id)
{
+ char snap1name[MAXNAMELEN];
+ char clone1name[MAXNAMELEN];
+ char snap2name[MAXNAMELEN];
+ char clone2name[MAXNAMELEN];
+ char snap3name[MAXNAMELEN];
int error;
- objset_t *os = za->za_os;
- char snapname[100];
- char osname[MAXNAMELEN];
- (void) rw_rdlock(&ztest_shared->zs_name_lock);
- dmu_objset_name(os, osname);
- (void) snprintf(snapname, 100, "%s@%llu", osname,
- (u_longlong_t)za->za_instance);
+ (void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu", osname, id);
+ (void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", osname, id);
+ (void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu", clone1name, id);
+ (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id);
+ (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id);
- error = dmu_objset_destroy(snapname);
- if (error != 0 && error != ENOENT)
- fatal(0, "dmu_objset_destroy() = %d", error);
- error = dmu_objset_snapshot(osname, strchr(snapname, '@')+1,
- NULL, FALSE);
- if (error == ENOSPC)
- ztest_record_enospc("dmu_take_snapshot");
- else if (error != 0 && error != EEXIST)
- fatal(0, "dmu_take_snapshot() = %d", error);
- (void) rw_unlock(&ztest_shared->zs_name_lock);
+ error = dmu_objset_destroy(clone2name, B_FALSE);
+ if (error && error != ENOENT)
+ fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error);
+ error = dmu_objset_destroy(snap3name, B_FALSE);
+ if (error && error != ENOENT)
+ fatal(0, "dmu_objset_destroy(%s) = %d", snap3name, error);
+ error = dmu_objset_destroy(snap2name, B_FALSE);
+ if (error && error != ENOENT)
+ fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error);
+ error = dmu_objset_destroy(clone1name, B_FALSE);
+ if (error && error != ENOENT)
+ fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error);
+ error = dmu_objset_destroy(snap1name, B_FALSE);
+ if (error && error != ENOENT)
+ fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error);
}
/*
* Verify dsl_dataset_promote handles EBUSY
*/
void
-ztest_dsl_dataset_promote_busy(ztest_args_t *za)
+ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
{
- int error;
- objset_t *os = za->za_os;
+ ztest_shared_t *zs = ztest_shared;
objset_t *clone;
dsl_dataset_t *ds;
- char snap1name[100];
- char clone1name[100];
- char snap2name[100];
- char clone2name[100];
- char snap3name[100];
- char osname[MAXNAMELEN];
- static uint64_t uniq = 0;
- uint64_t curval;
+ char snap1name[MAXNAMELEN];
+ char clone1name[MAXNAMELEN];
+ char snap2name[MAXNAMELEN];
+ char clone2name[MAXNAMELEN];
+ char snap3name[MAXNAMELEN];
+ char *osname = zd->zd_name;
+ int error;
- curval = atomic_add_64_nv(&uniq, 5) - 5;
+ (void) rw_rdlock(&zs->zs_name_lock);
- (void) rw_rdlock(&ztest_shared->zs_name_lock);
+ ztest_dsl_dataset_cleanup(osname, id);
- dmu_objset_name(os, osname);
- (void) snprintf(snap1name, 100, "%s@s1_%llu", osname, curval++);
- (void) snprintf(clone1name, 100, "%s/c1_%llu", osname, curval++);
- (void) snprintf(snap2name, 100, "%s@s2_%llu", clone1name, curval++);
- (void) snprintf(clone2name, 100, "%s/c2_%llu", osname, curval++);
- (void) snprintf(snap3name, 100, "%s@s3_%llu", clone1name, curval++);
+ (void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu", osname, id);
+ (void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", osname, id);
+ (void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu", clone1name, id);
+ (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id);
+ (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id);
error = dmu_objset_snapshot(osname, strchr(snap1name, '@')+1,
- NULL, FALSE);
+ NULL, NULL, B_FALSE, B_FALSE, -1);
if (error && error != EEXIST) {
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@@ -1496,14 +3097,12 @@ ztest_dsl_dataset_promote_busy(ztest_args_t *za)
fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error);
}
- error = dmu_objset_open(snap1name, DMU_OST_OTHER,
- DS_MODE_USER | DS_MODE_READONLY, &clone);
+ error = dmu_objset_hold(snap1name, FTAG, &clone);
if (error)
fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error);
- error = dmu_objset_create(clone1name, DMU_OST_OTHER, clone, 0,
- NULL, NULL);
- dmu_objset_close(clone);
+ error = dmu_objset_clone(clone1name, dmu_objset_ds(clone), 0);
+ dmu_objset_rele(clone, FTAG);
if (error) {
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@@ -1513,7 +3112,7 @@ ztest_dsl_dataset_promote_busy(ztest_args_t *za)
}
error = dmu_objset_snapshot(clone1name, strchr(snap2name, '@')+1,
- NULL, FALSE);
+ NULL, NULL, B_FALSE, B_FALSE, -1);
if (error && error != EEXIST) {
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@@ -1523,7 +3122,7 @@ ztest_dsl_dataset_promote_busy(ztest_args_t *za)
}
error = dmu_objset_snapshot(clone1name, strchr(snap3name, '@')+1,
- NULL, FALSE);
+ NULL, NULL, B_FALSE, B_FALSE, -1);
if (error && error != EEXIST) {
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@@ -1532,289 +3131,73 @@ ztest_dsl_dataset_promote_busy(ztest_args_t *za)
fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
}
- error = dmu_objset_open(snap3name, DMU_OST_OTHER,
- DS_MODE_USER | DS_MODE_READONLY, &clone);
+ error = dmu_objset_hold(snap3name, FTAG, &clone);
if (error)
fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
- error = dmu_objset_create(clone2name, DMU_OST_OTHER, clone, 0,
- NULL, NULL);
- dmu_objset_close(clone);
+ error = dmu_objset_clone(clone2name, dmu_objset_ds(clone), 0);
+ dmu_objset_rele(clone, FTAG);
if (error) {
if (error == ENOSPC) {
- ztest_record_enospc("dmu_objset_create");
+ ztest_record_enospc(FTAG);
goto out;
}
fatal(0, "dmu_objset_create(%s) = %d", clone2name, error);
}
- error = dsl_dataset_own(snap1name, 0, FTAG, &ds);
+ error = dsl_dataset_own(snap2name, B_FALSE, FTAG, &ds);
if (error)
- fatal(0, "dsl_dataset_own(%s) = %d", snap1name, error);
- error = dsl_dataset_promote(clone2name);
+ fatal(0, "dsl_dataset_own(%s) = %d", snap2name, error);
+ error = dsl_dataset_promote(clone2name, NULL);
if (error != EBUSY)
fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name,
error);
dsl_dataset_disown(ds, FTAG);
out:
- error = dmu_objset_destroy(clone2name);
- if (error && error != ENOENT)
- fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error);
-
- error = dmu_objset_destroy(snap3name);
- if (error && error != ENOENT)
- fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error);
+ ztest_dsl_dataset_cleanup(osname, id);
- error = dmu_objset_destroy(snap2name);
- if (error && error != ENOENT)
- fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error);
-
- error = dmu_objset_destroy(clone1name);
- if (error && error != ENOENT)
- fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error);
- error = dmu_objset_destroy(snap1name);
- if (error && error != ENOENT)
- fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error);
-
- (void) rw_unlock(&ztest_shared->zs_name_lock);
+ (void) rw_unlock(&zs->zs_name_lock);
}
/*
* Verify that dmu_object_{alloc,free} work as expected.
*/
void
-ztest_dmu_object_alloc_free(ztest_args_t *za)
+ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id)
{
- objset_t *os = za->za_os;
- dmu_buf_t *db;
- dmu_tx_t *tx;
- uint64_t batchobj, object, batchsize, endoff, temp;
- int b, c, error, bonuslen;
- dmu_object_info_t *doi = &za->za_doi;
- char osname[MAXNAMELEN];
-
- dmu_objset_name(os, osname);
+ ztest_od_t od[4];
+ int batchsize = sizeof (od) / sizeof (od[0]);
- endoff = -8ULL;
- batchsize = 2;
-
- /*
- * Create a batch object if necessary, and record it in the directory.
- */
- VERIFY3U(0, ==, dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t), &batchobj, DMU_READ_PREFETCH));
- if (batchobj == 0) {
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t));
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("create a batch object");
- dmu_tx_abort(tx);
- return;
- }
- batchobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_NONE, 0, tx);
- ztest_set_random_blocksize(os, batchobj, tx);
- dmu_write(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t), &batchobj, tx);
- dmu_tx_commit(tx);
- }
-
- /*
- * Destroy the previous batch of objects.
- */
- for (b = 0; b < batchsize; b++) {
- VERIFY3U(0, ==, dmu_read(os, batchobj, b * sizeof (uint64_t),
- sizeof (uint64_t), &object, DMU_READ_PREFETCH));
- if (object == 0)
- continue;
- /*
- * Read and validate contents.
- * We expect the nth byte of the bonus buffer to be n.
- */
- VERIFY(0 == dmu_bonus_hold(os, object, FTAG, &db));
- za->za_dbuf = db;
-
- dmu_object_info_from_db(db, doi);
- ASSERT(doi->doi_type == DMU_OT_UINT64_OTHER);
- ASSERT(doi->doi_bonus_type == DMU_OT_PLAIN_OTHER);
- ASSERT3S(doi->doi_physical_blks, >=, 0);
-
- bonuslen = doi->doi_bonus_size;
-
- for (c = 0; c < bonuslen; c++) {
- if (((uint8_t *)db->db_data)[c] !=
- (uint8_t)(c + bonuslen)) {
- fatal(0,
- "bad bonus: %s, obj %llu, off %d: %u != %u",
- osname, object, c,
- ((uint8_t *)db->db_data)[c],
- (uint8_t)(c + bonuslen));
- }
- }
-
- dmu_buf_rele(db, FTAG);
- za->za_dbuf = NULL;
-
- /*
- * We expect the word at endoff to be our object number.
- */
- VERIFY(0 == dmu_read(os, object, endoff,
- sizeof (uint64_t), &temp, DMU_READ_PREFETCH));
-
- if (temp != object) {
- fatal(0, "bad data in %s, got %llu, expected %llu",
- osname, temp, object);
- }
-
- /*
- * Destroy old object and clear batch entry.
- */
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, batchobj,
- b * sizeof (uint64_t), sizeof (uint64_t));
- dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("free object");
- dmu_tx_abort(tx);
- return;
- }
- error = dmu_object_free(os, object, tx);
- if (error) {
- fatal(0, "dmu_object_free('%s', %llu) = %d",
- osname, object, error);
- }
- object = 0;
-
- dmu_object_set_checksum(os, batchobj,
- ztest_random_checksum(), tx);
- dmu_object_set_compress(os, batchobj,
- ztest_random_compress(), tx);
-
- dmu_write(os, batchobj, b * sizeof (uint64_t),
- sizeof (uint64_t), &object, tx);
-
- dmu_tx_commit(tx);
- }
-
- /*
- * Before creating the new batch of objects, generate a bunch of churn.
- */
- for (b = ztest_random(100); b > 0; b--) {
- tx = dmu_tx_create(os);
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("churn objects");
- dmu_tx_abort(tx);
- return;
- }
- object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_NONE, 0, tx);
- ztest_set_random_blocksize(os, object, tx);
- error = dmu_object_free(os, object, tx);
- if (error) {
- fatal(0, "dmu_object_free('%s', %llu) = %d",
- osname, object, error);
- }
- dmu_tx_commit(tx);
- }
+ for (int b = 0; b < batchsize; b++)
+ ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER, 0, 0);
/*
- * Create a new batch of objects with randomly chosen
- * blocksizes and record them in the batch directory.
+ * Destroy the previous batch of objects, create a new batch,
+ * and do some I/O on the new objects.
*/
- for (b = 0; b < batchsize; b++) {
- uint32_t va_blksize;
- u_longlong_t va_nblocks;
-
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, batchobj, b * sizeof (uint64_t),
- sizeof (uint64_t));
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- dmu_tx_hold_write(tx, DMU_NEW_OBJECT, endoff,
- sizeof (uint64_t));
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("create batchobj");
- dmu_tx_abort(tx);
- return;
- }
- bonuslen = (int)ztest_random(dmu_bonus_max()) + 1;
-
- object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_PLAIN_OTHER, bonuslen, tx);
-
- ztest_set_random_blocksize(os, object, tx);
-
- dmu_object_set_checksum(os, object,
- ztest_random_checksum(), tx);
- dmu_object_set_compress(os, object,
- ztest_random_compress(), tx);
-
- dmu_write(os, batchobj, b * sizeof (uint64_t),
- sizeof (uint64_t), &object, tx);
-
- /*
- * Write to both the bonus buffer and the regular data.
- */
- VERIFY(dmu_bonus_hold(os, object, FTAG, &db) == 0);
- za->za_dbuf = db;
- ASSERT3U(bonuslen, <=, db->db_size);
-
- dmu_object_size_from_db(db, &va_blksize, &va_nblocks);
- ASSERT3S(va_nblocks, >=, 0);
-
- dmu_buf_will_dirty(db, tx);
-
- /*
- * See comments above regarding the contents of
- * the bonus buffer and the word at endoff.
- */
- for (c = 0; c < bonuslen; c++)
- ((uint8_t *)db->db_data)[c] = (uint8_t)(c + bonuslen);
-
- dmu_buf_rele(db, FTAG);
- za->za_dbuf = NULL;
-
- /*
- * Write to a large offset to increase indirection.
- */
- dmu_write(os, object, endoff, sizeof (uint64_t), &object, tx);
+ if (ztest_object_init(zd, od, sizeof (od), B_TRUE) != 0)
+ return;
- dmu_tx_commit(tx);
- }
+ while (ztest_random(4 * batchsize) != 0)
+ ztest_io(zd, od[ztest_random(batchsize)].od_object,
+ ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
}
/*
* Verify that dmu_{read,write} work as expected.
*/
-typedef struct bufwad {
- uint64_t bw_index;
- uint64_t bw_txg;
- uint64_t bw_data;
-} bufwad_t;
-
-typedef struct dmu_read_write_dir {
- uint64_t dd_packobj;
- uint64_t dd_bigobj;
- uint64_t dd_chunk;
-} dmu_read_write_dir_t;
-
void
-ztest_dmu_read_write(ztest_args_t *za)
+ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
{
- objset_t *os = za->za_os;
- dmu_read_write_dir_t dd;
+ objset_t *os = zd->zd_os;
+ ztest_od_t od[2];
dmu_tx_t *tx;
int i, freeit, error;
uint64_t n, s, txg;
bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT;
- uint64_t packoff, packsize, bigoff, bigsize;
+ uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize;
+ uint64_t chunksize = (1000 + ztest_random(1000)) * sizeof (uint64_t);
uint64_t regions = 997;
uint64_t stride = 123456789ULL;
uint64_t width = 40;
@@ -1847,34 +3230,16 @@ ztest_dmu_read_write(ztest_args_t *za)
/*
* Read the directory info. If it's the first time, set things up.
*/
- VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (dd), &dd, DMU_READ_PREFETCH));
- if (dd.dd_chunk == 0) {
- ASSERT(dd.dd_packobj == 0);
- ASSERT(dd.dd_bigobj == 0);
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (dd));
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("create r/w directory");
- dmu_tx_abort(tx);
- return;
- }
-
- dd.dd_packobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_NONE, 0, tx);
- dd.dd_bigobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_NONE, 0, tx);
- dd.dd_chunk = (1000 + ztest_random(1000)) * sizeof (uint64_t);
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, chunksize);
+ ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize);
- ztest_set_random_blocksize(os, dd.dd_packobj, tx);
- ztest_set_random_blocksize(os, dd.dd_bigobj, tx);
+ if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
+ return;
- dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (dd), &dd,
- tx);
- dmu_tx_commit(tx);
- }
+ bigobj = od[0].od_object;
+ packobj = od[1].od_object;
+ chunksize = od[0].od_gen;
+ ASSERT(chunksize == od[1].od_gen);
/*
* Prefetch a random chunk of the big object.
@@ -1884,7 +3249,7 @@ ztest_dmu_read_write(ztest_args_t *za)
*/
n = ztest_random(regions) * stride + ztest_random(width);
s = 1 + ztest_random(2 * width - 1);
- dmu_prefetch(os, dd.dd_bigobj, n * dd.dd_chunk, s * dd.dd_chunk);
+ dmu_prefetch(os, bigobj, n * chunksize, s * chunksize);
/*
* Pick a random index and compute the offsets into packobj and bigobj.
@@ -1895,8 +3260,8 @@ ztest_dmu_read_write(ztest_args_t *za)
packoff = n * sizeof (bufwad_t);
packsize = s * sizeof (bufwad_t);
- bigoff = n * dd.dd_chunk;
- bigsize = s * dd.dd_chunk;
+ bigoff = n * chunksize;
+ bigsize = s * chunksize;
packbuf = umem_alloc(packsize, UMEM_NOFAIL);
bigbuf = umem_alloc(bigsize, UMEM_NOFAIL);
@@ -1910,10 +3275,10 @@ ztest_dmu_read_write(ztest_args_t *za)
/*
* Read the current contents of our objects.
*/
- error = dmu_read(os, dd.dd_packobj, packoff, packsize, packbuf,
+ error = dmu_read(os, packobj, packoff, packsize, packbuf,
DMU_READ_PREFETCH);
ASSERT3U(error, ==, 0);
- error = dmu_read(os, dd.dd_bigobj, bigoff, bigsize, bigbuf,
+ error = dmu_read(os, bigobj, bigoff, bigsize, bigbuf,
DMU_READ_PREFETCH);
ASSERT3U(error, ==, 0);
@@ -1922,24 +3287,25 @@ ztest_dmu_read_write(ztest_args_t *za)
*/
tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, dd.dd_packobj, packoff, packsize);
+ dmu_tx_hold_write(tx, packobj, packoff, packsize);
if (freeit)
- dmu_tx_hold_free(tx, dd.dd_bigobj, bigoff, bigsize);
+ dmu_tx_hold_free(tx, bigobj, bigoff, bigsize);
else
- dmu_tx_hold_write(tx, dd.dd_bigobj, bigoff, bigsize);
+ dmu_tx_hold_write(tx, bigobj, bigoff, bigsize);
- error = dmu_tx_assign(tx, TXG_WAIT);
-
- if (error) {
- ztest_record_enospc("dmu r/w range");
- dmu_tx_abort(tx);
+ txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
+ if (txg == 0) {
umem_free(packbuf, packsize);
umem_free(bigbuf, bigsize);
return;
}
- txg = dmu_tx_get_txg(tx);
+ dmu_object_set_checksum(os, bigobj,
+ (enum zio_checksum)ztest_random_dsl_prop(ZFS_PROP_CHECKSUM), tx);
+
+ dmu_object_set_compress(os, bigobj,
+ (enum zio_compress)ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), tx);
/*
* For each index from n to n + s, verify that the existing bufwad
@@ -1951,9 +3317,9 @@ ztest_dmu_read_write(ztest_args_t *za)
/* LINTED */
pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t));
/* LINTED */
- bigH = (bufwad_t *)((char *)bigbuf + i * dd.dd_chunk);
+ bigH = (bufwad_t *)((char *)bigbuf + i * chunksize);
/* LINTED */
- bigT = (bufwad_t *)((char *)bigH + dd.dd_chunk) - 1;
+ bigT = (bufwad_t *)((char *)bigH + chunksize) - 1;
ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize);
ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize);
@@ -1987,27 +3353,26 @@ ztest_dmu_read_write(ztest_args_t *za)
* We've verified all the old bufwads, and made new ones.
* Now write them out.
*/
- dmu_write(os, dd.dd_packobj, packoff, packsize, packbuf, tx);
+ dmu_write(os, packobj, packoff, packsize, packbuf, tx);
if (freeit) {
- if (zopt_verbose >= 6) {
+ if (zopt_verbose >= 7) {
(void) printf("freeing offset %llx size %llx"
" txg %llx\n",
(u_longlong_t)bigoff,
(u_longlong_t)bigsize,
(u_longlong_t)txg);
}
- VERIFY(0 == dmu_free_range(os, dd.dd_bigobj, bigoff,
- bigsize, tx));
+ VERIFY(0 == dmu_free_range(os, bigobj, bigoff, bigsize, tx));
} else {
- if (zopt_verbose >= 6) {
+ if (zopt_verbose >= 7) {
(void) printf("writing offset %llx size %llx"
" txg %llx\n",
(u_longlong_t)bigoff,
(u_longlong_t)bigsize,
(u_longlong_t)txg);
}
- dmu_write(os, dd.dd_bigobj, bigoff, bigsize, bigbuf, tx);
+ dmu_write(os, bigobj, bigoff, bigsize, bigbuf, tx);
}
dmu_tx_commit(tx);
@@ -2019,9 +3384,9 @@ ztest_dmu_read_write(ztest_args_t *za)
void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
- VERIFY(0 == dmu_read(os, dd.dd_packobj, packoff,
+ VERIFY(0 == dmu_read(os, packobj, packoff,
packsize, packcheck, DMU_READ_PREFETCH));
- VERIFY(0 == dmu_read(os, dd.dd_bigobj, bigoff,
+ VERIFY(0 == dmu_read(os, bigobj, bigoff,
bigsize, bigcheck, DMU_READ_PREFETCH));
ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
@@ -2037,7 +3402,7 @@ ztest_dmu_read_write(ztest_args_t *za)
void
compare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf,
- uint64_t bigsize, uint64_t n, dmu_read_write_dir_t dd, uint64_t txg)
+ uint64_t bigsize, uint64_t n, uint64_t chunksize, uint64_t txg)
{
uint64_t i;
bufwad_t *pack;
@@ -2054,9 +3419,9 @@ compare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf,
/* LINTED */
pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t));
/* LINTED */
- bigH = (bufwad_t *)((char *)bigbuf + i * dd.dd_chunk);
+ bigH = (bufwad_t *)((char *)bigbuf + i * chunksize);
/* LINTED */
- bigT = (bufwad_t *)((char *)bigH + dd.dd_chunk) - 1;
+ bigT = (bufwad_t *)((char *)bigH + chunksize) - 1;
ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize);
ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize);
@@ -2085,22 +3450,24 @@ compare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf,
}
void
-ztest_dmu_read_write_zcopy(ztest_args_t *za)
+ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
{
- objset_t *os = za->za_os;
- dmu_read_write_dir_t dd;
+ objset_t *os = zd->zd_os;
+ ztest_od_t od[2];
dmu_tx_t *tx;
uint64_t i;
int error;
uint64_t n, s, txg;
bufwad_t *packbuf, *bigbuf;
- uint64_t packoff, packsize, bigoff, bigsize;
+ uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize;
+ uint64_t blocksize = ztest_random_blocksize();
+ uint64_t chunksize = blocksize;
uint64_t regions = 997;
uint64_t stride = 123456789ULL;
uint64_t width = 9;
dmu_buf_t *bonus_db;
arc_buf_t **bigbuf_arcbufs;
- dmu_object_info_t *doi = &za->za_doi;
+ dmu_object_info_t doi;
/*
* This test uses two objects, packobj and bigobj, that are always
@@ -2121,42 +3488,22 @@ ztest_dmu_read_write_zcopy(ztest_args_t *za)
/*
* Read the directory info. If it's the first time, set things up.
*/
- VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (dd), &dd, DMU_READ_PREFETCH));
- if (dd.dd_chunk == 0) {
- ASSERT(dd.dd_packobj == 0);
- ASSERT(dd.dd_bigobj == 0);
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (dd));
- dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("create r/w directory");
- dmu_tx_abort(tx);
- return;
- }
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0);
+ ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize);
- dd.dd_packobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_NONE, 0, tx);
- dd.dd_bigobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
- DMU_OT_NONE, 0, tx);
- ztest_set_random_blocksize(os, dd.dd_packobj, tx);
- ztest_set_random_blocksize(os, dd.dd_bigobj, tx);
+ if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
+ return;
- VERIFY(dmu_object_info(os, dd.dd_bigobj, doi) == 0);
- ASSERT(doi->doi_data_block_size >= 2 * sizeof (bufwad_t));
- ASSERT(ISP2(doi->doi_data_block_size));
- dd.dd_chunk = doi->doi_data_block_size;
+ bigobj = od[0].od_object;
+ packobj = od[1].od_object;
+ blocksize = od[0].od_blocksize;
+ chunksize = blocksize;
+ ASSERT(chunksize == od[1].od_gen);
- dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (dd), &dd,
- tx);
- dmu_tx_commit(tx);
- } else {
- VERIFY(dmu_object_info(os, dd.dd_bigobj, doi) == 0);
- VERIFY(ISP2(doi->doi_data_block_size));
- VERIFY(dd.dd_chunk == doi->doi_data_block_size);
- VERIFY(dd.dd_chunk >= 2 * sizeof (bufwad_t));
- }
+ VERIFY(dmu_object_info(os, bigobj, &doi) == 0);
+ VERIFY(ISP2(doi.doi_data_block_size));
+ VERIFY(chunksize == doi.doi_data_block_size);
+ VERIFY(chunksize >= 2 * sizeof (bufwad_t));
/*
* Pick a random index and compute the offsets into packobj and bigobj.
@@ -2167,13 +3514,13 @@ ztest_dmu_read_write_zcopy(ztest_args_t *za)
packoff = n * sizeof (bufwad_t);
packsize = s * sizeof (bufwad_t);
- bigoff = n * dd.dd_chunk;
- bigsize = s * dd.dd_chunk;
+ bigoff = n * chunksize;
+ bigsize = s * chunksize;
packbuf = umem_zalloc(packsize, UMEM_NOFAIL);
bigbuf = umem_zalloc(bigsize, UMEM_NOFAIL);
- VERIFY(dmu_bonus_hold(os, dd.dd_bigobj, FTAG, &bonus_db) == 0);
+ VERIFY3U(0, ==, dmu_bonus_hold(os, bigobj, FTAG, &bonus_db));
bigbuf_arcbufs = umem_zalloc(2 * s * sizeof (arc_buf_t *), UMEM_NOFAIL);
@@ -2199,15 +3546,12 @@ ztest_dmu_read_write_zcopy(ztest_args_t *za)
for (j = 0; j < s; j++) {
if (i != 5) {
bigbuf_arcbufs[j] =
- dmu_request_arcbuf(bonus_db,
- dd.dd_chunk);
+ dmu_request_arcbuf(bonus_db, chunksize);
} else {
bigbuf_arcbufs[2 * j] =
- dmu_request_arcbuf(bonus_db,
- dd.dd_chunk / 2);
+ dmu_request_arcbuf(bonus_db, chunksize / 2);
bigbuf_arcbufs[2 * j + 1] =
- dmu_request_arcbuf(bonus_db,
- dd.dd_chunk / 2);
+ dmu_request_arcbuf(bonus_db, chunksize / 2);
}
}
@@ -2216,20 +3560,11 @@ ztest_dmu_read_write_zcopy(ztest_args_t *za)
*/
tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, dd.dd_packobj, packoff, packsize);
- dmu_tx_hold_write(tx, dd.dd_bigobj, bigoff, bigsize);
-
- if (ztest_random(100) == 0) {
- error = -1;
- } else {
- error = dmu_tx_assign(tx, TXG_WAIT);
- }
+ dmu_tx_hold_write(tx, packobj, packoff, packsize);
+ dmu_tx_hold_write(tx, bigobj, bigoff, bigsize);
- if (error) {
- if (error != -1) {
- ztest_record_enospc("dmu r/w range");
- }
- dmu_tx_abort(tx);
+ txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
+ if (txg == 0) {
umem_free(packbuf, packsize);
umem_free(bigbuf, bigsize);
for (j = 0; j < s; j++) {
@@ -2247,54 +3582,52 @@ ztest_dmu_read_write_zcopy(ztest_args_t *za)
return;
}
- txg = dmu_tx_get_txg(tx);
-
/*
* 50% of the time don't read objects in the 1st iteration to
* test dmu_assign_arcbuf() for the case when there're no
* existing dbufs for the specified offsets.
*/
if (i != 0 || ztest_random(2) != 0) {
- error = dmu_read(os, dd.dd_packobj, packoff,
+ error = dmu_read(os, packobj, packoff,
packsize, packbuf, DMU_READ_PREFETCH);
ASSERT3U(error, ==, 0);
- error = dmu_read(os, dd.dd_bigobj, bigoff, bigsize,
+ error = dmu_read(os, bigobj, bigoff, bigsize,
bigbuf, DMU_READ_PREFETCH);
ASSERT3U(error, ==, 0);
}
compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize,
- n, dd, txg);
+ n, chunksize, txg);
/*
* We've verified all the old bufwads, and made new ones.
* Now write them out.
*/
- dmu_write(os, dd.dd_packobj, packoff, packsize, packbuf, tx);
- if (zopt_verbose >= 6) {
+ dmu_write(os, packobj, packoff, packsize, packbuf, tx);
+ if (zopt_verbose >= 7) {
(void) printf("writing offset %llx size %llx"
" txg %llx\n",
(u_longlong_t)bigoff,
(u_longlong_t)bigsize,
(u_longlong_t)txg);
}
- for (off = bigoff, j = 0; j < s; j++, off += dd.dd_chunk) {
+ for (off = bigoff, j = 0; j < s; j++, off += chunksize) {
dmu_buf_t *dbt;
if (i != 5) {
bcopy((caddr_t)bigbuf + (off - bigoff),
- bigbuf_arcbufs[j]->b_data, dd.dd_chunk);
+ bigbuf_arcbufs[j]->b_data, chunksize);
} else {
bcopy((caddr_t)bigbuf + (off - bigoff),
bigbuf_arcbufs[2 * j]->b_data,
- dd.dd_chunk / 2);
+ chunksize / 2);
bcopy((caddr_t)bigbuf + (off - bigoff) +
- dd.dd_chunk / 2,
+ chunksize / 2,
bigbuf_arcbufs[2 * j + 1]->b_data,
- dd.dd_chunk / 2);
+ chunksize / 2);
}
if (i == 1) {
- VERIFY(dmu_buf_hold(os, dd.dd_bigobj, off,
- FTAG, &dbt) == 0);
+ VERIFY(dmu_buf_hold(os, bigobj, off,
+ FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0);
}
if (i != 5) {
dmu_assign_arcbuf(bonus_db, off,
@@ -2303,7 +3636,7 @@ ztest_dmu_read_write_zcopy(ztest_args_t *za)
dmu_assign_arcbuf(bonus_db, off,
bigbuf_arcbufs[2 * j], tx);
dmu_assign_arcbuf(bonus_db,
- off + dd.dd_chunk / 2,
+ off + chunksize / 2,
bigbuf_arcbufs[2 * j + 1], tx);
}
if (i == 1) {
@@ -2319,9 +3652,9 @@ ztest_dmu_read_write_zcopy(ztest_args_t *za)
void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
- VERIFY(0 == dmu_read(os, dd.dd_packobj, packoff,
+ VERIFY(0 == dmu_read(os, packobj, packoff,
packsize, packcheck, DMU_READ_PREFETCH));
- VERIFY(0 == dmu_read(os, dd.dd_bigobj, bigoff,
+ VERIFY(0 == dmu_read(os, bigobj, bigoff,
bigsize, bigcheck, DMU_READ_PREFETCH));
ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
@@ -2343,256 +3676,60 @@ ztest_dmu_read_write_zcopy(ztest_args_t *za)
umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *));
}
+/* ARGSUSED */
void
-ztest_dmu_check_future_leak(ztest_args_t *za)
+ztest_dmu_write_parallel(ztest_ds_t *zd, uint64_t id)
{
- objset_t *os = za->za_os;
- dmu_buf_t *db;
- ztest_block_tag_t *bt;
- dmu_object_info_t *doi = &za->za_doi;
-
- /*
- * Make sure that, if there is a write record in the bonus buffer
- * of the ZTEST_DIROBJ, that the txg for this record is <= the
- * last synced txg of the pool.
- */
- VERIFY(dmu_bonus_hold(os, ZTEST_DIROBJ, FTAG, &db) == 0);
- za->za_dbuf = db;
- VERIFY(dmu_object_info(os, ZTEST_DIROBJ, doi) == 0);
- ASSERT3U(doi->doi_bonus_size, >=, sizeof (*bt));
- ASSERT3U(doi->doi_bonus_size, <=, db->db_size);
- ASSERT3U(doi->doi_bonus_size % sizeof (*bt), ==, 0);
- bt = (void *)((char *)db->db_data + doi->doi_bonus_size - sizeof (*bt));
- if (bt->bt_objset != 0) {
- ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os));
- ASSERT3U(bt->bt_object, ==, ZTEST_DIROBJ);
- ASSERT3U(bt->bt_offset, ==, -1ULL);
- ASSERT3U(bt->bt_txg, <, spa_first_txg(za->za_spa));
- }
- dmu_buf_rele(db, FTAG);
- za->za_dbuf = NULL;
-}
-
-void
-ztest_dmu_write_parallel(ztest_args_t *za)
-{
- objset_t *os = za->za_os;
- ztest_block_tag_t *rbt = &za->za_rbt;
- ztest_block_tag_t *wbt = &za->za_wbt;
- const size_t btsize = sizeof (ztest_block_tag_t);
- dmu_buf_t *db;
- int b, error;
- int bs = ZTEST_DIROBJ_BLOCKSIZE;
- int do_free = 0;
- uint64_t off, txg, txg_how;
- mutex_t *lp;
- char osname[MAXNAMELEN];
- char iobuf[SPA_MAXBLOCKSIZE];
- blkptr_t blk = { 0 };
- uint64_t blkoff;
- zbookmark_t zb;
- dmu_tx_t *tx = dmu_tx_create(os);
- dmu_buf_t *bonus_db;
- arc_buf_t *abuf = NULL;
-
- dmu_objset_name(os, osname);
+ ztest_od_t od[1];
+ uint64_t offset = (1ULL << (ztest_random(20) + 43)) +
+ (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
/*
- * Have multiple threads write to large offsets in ZTEST_DIROBJ
- * to verify that having multiple threads writing to the same object
- * in parallel doesn't cause any trouble.
+ * Have multiple threads write to large offsets in an object
+ * to verify that parallel writes to an object -- even to the
+ * same blocks within the object -- doesn't cause any trouble.
*/
- if (ztest_random(4) == 0) {
- /*
- * Do the bonus buffer instead of a regular block.
- * We need a lock to serialize resize vs. others,
- * so we hash on the objset ID.
- */
- b = dmu_objset_id(os) % ZTEST_SYNC_LOCKS;
- off = -1ULL;
- dmu_tx_hold_bonus(tx, ZTEST_DIROBJ);
- } else {
- b = ztest_random(ZTEST_SYNC_LOCKS);
- off = za->za_diroff_shared + (b << SPA_MAXBLOCKSHIFT);
- if (ztest_random(4) == 0) {
- do_free = 1;
- dmu_tx_hold_free(tx, ZTEST_DIROBJ, off, bs);
- } else {
- dmu_tx_hold_write(tx, ZTEST_DIROBJ, off, bs);
- }
- }
+ ztest_od_init(&od[0], ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0);
- if (off != -1ULL && P2PHASE(off, bs) == 0 && !do_free &&
- ztest_random(8) == 0) {
- VERIFY(dmu_bonus_hold(os, ZTEST_DIROBJ, FTAG, &bonus_db) == 0);
- abuf = dmu_request_arcbuf(bonus_db, bs);
- }
-
- txg_how = ztest_random(2) == 0 ? TXG_WAIT : TXG_NOWAIT;
- error = dmu_tx_assign(tx, txg_how);
- if (error) {
- if (error == ERESTART) {
- ASSERT(txg_how == TXG_NOWAIT);
- dmu_tx_wait(tx);
- } else {
- ztest_record_enospc("dmu write parallel");
- }
- dmu_tx_abort(tx);
- if (abuf != NULL) {
- dmu_return_arcbuf(abuf);
- dmu_buf_rele(bonus_db, FTAG);
- }
+ if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
return;
- }
- txg = dmu_tx_get_txg(tx);
-
- lp = &ztest_shared->zs_sync_lock[b];
- (void) mutex_lock(lp);
-
- wbt->bt_objset = dmu_objset_id(os);
- wbt->bt_object = ZTEST_DIROBJ;
- wbt->bt_offset = off;
- wbt->bt_txg = txg;
- wbt->bt_thread = za->za_instance;
- wbt->bt_seq = ztest_shared->zs_seq[b]++; /* protected by lp */
-
- /*
- * Occasionally, write an all-zero block to test the behavior
- * of blocks that compress into holes.
- */
- if (off != -1ULL && ztest_random(8) == 0)
- bzero(wbt, btsize);
-
- if (off == -1ULL) {
- dmu_object_info_t *doi = &za->za_doi;
- char *dboff;
-
- VERIFY(dmu_bonus_hold(os, ZTEST_DIROBJ, FTAG, &db) == 0);
- za->za_dbuf = db;
- dmu_object_info_from_db(db, doi);
- ASSERT3U(doi->doi_bonus_size, <=, db->db_size);
- ASSERT3U(doi->doi_bonus_size, >=, btsize);
- ASSERT3U(doi->doi_bonus_size % btsize, ==, 0);
- dboff = (char *)db->db_data + doi->doi_bonus_size - btsize;
- bcopy(dboff, rbt, btsize);
- if (rbt->bt_objset != 0) {
- ASSERT3U(rbt->bt_objset, ==, wbt->bt_objset);
- ASSERT3U(rbt->bt_object, ==, wbt->bt_object);
- ASSERT3U(rbt->bt_offset, ==, wbt->bt_offset);
- ASSERT3U(rbt->bt_txg, <=, wbt->bt_txg);
- }
- if (ztest_random(10) == 0) {
- int newsize = (ztest_random(db->db_size /
- btsize) + 1) * btsize;
-
- ASSERT3U(newsize, >=, btsize);
- ASSERT3U(newsize, <=, db->db_size);
- VERIFY3U(dmu_set_bonus(db, newsize, tx), ==, 0);
- dboff = (char *)db->db_data + newsize - btsize;
- }
- dmu_buf_will_dirty(db, tx);
- bcopy(wbt, dboff, btsize);
- dmu_buf_rele(db, FTAG);
- za->za_dbuf = NULL;
- } else if (do_free) {
- VERIFY(dmu_free_range(os, ZTEST_DIROBJ, off, bs, tx) == 0);
- } else if (abuf == NULL) {
- dmu_write(os, ZTEST_DIROBJ, off, btsize, wbt, tx);
- } else {
- bcopy(wbt, abuf->b_data, btsize);
- dmu_assign_arcbuf(bonus_db, off, abuf, tx);
- dmu_buf_rele(bonus_db, FTAG);
- }
- (void) mutex_unlock(lp);
-
- if (ztest_random(1000) == 0)
- (void) poll(NULL, 0, 1); /* open dn_notxholds window */
+ while (ztest_random(10) != 0)
+ ztest_io(zd, od[0].od_object, offset);
+}
- dmu_tx_commit(tx);
+void
+ztest_dmu_prealloc(ztest_ds_t *zd, uint64_t id)
+{
+ ztest_od_t od[1];
+ uint64_t offset = (1ULL << (ztest_random(4) + SPA_MAXBLOCKSHIFT)) +
+ (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
+ uint64_t count = ztest_random(20) + 1;
+ uint64_t blocksize = ztest_random_blocksize();
+ void *data;
- if (ztest_random(10000) == 0)
- txg_wait_synced(dmu_objset_pool(os), txg);
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0);
- if (off == -1ULL || do_free)
+ if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0)
return;
- if (ztest_random(2) != 0)
+ if (ztest_truncate(zd, od[0].od_object, offset, count * blocksize) != 0)
return;
- /*
- * dmu_sync() the block we just wrote.
- */
- (void) mutex_lock(lp);
-
- blkoff = P2ALIGN_TYPED(off, bs, uint64_t);
- error = dmu_buf_hold(os, ZTEST_DIROBJ, blkoff, FTAG, &db);
- za->za_dbuf = db;
- if (error) {
- (void) mutex_unlock(lp);
- return;
- }
- blkoff = off - blkoff;
- error = dmu_sync(NULL, db, &blk, txg, NULL, NULL);
- dmu_buf_rele(db, FTAG);
- za->za_dbuf = NULL;
+ ztest_prealloc(zd, od[0].od_object, offset, count * blocksize);
- if (error) {
- (void) mutex_unlock(lp);
- return;
- }
+ data = umem_zalloc(blocksize, UMEM_NOFAIL);
- if (blk.blk_birth == 0) { /* concurrent free */
- (void) mutex_unlock(lp);
- return;
+ while (ztest_random(count) != 0) {
+ uint64_t randoff = offset + (ztest_random(count) * blocksize);
+ if (ztest_write(zd, od[0].od_object, randoff, blocksize,
+ data) != 0)
+ break;
+ while (ztest_random(4) != 0)
+ ztest_io(zd, od[0].od_object, randoff);
}
- txg_suspend(dmu_objset_pool(os));
-
- (void) mutex_unlock(lp);
-
- ASSERT(blk.blk_fill == 1);
- ASSERT3U(BP_GET_TYPE(&blk), ==, DMU_OT_UINT64_OTHER);
- ASSERT3U(BP_GET_LEVEL(&blk), ==, 0);
- ASSERT3U(BP_GET_LSIZE(&blk), ==, bs);
-
- /*
- * Read the block that dmu_sync() returned to make sure its contents
- * match what we wrote. We do this while still txg_suspend()ed
- * to ensure that the block can't be reused before we read it.
- */
- zb.zb_objset = dmu_objset_id(os);
- zb.zb_object = ZTEST_DIROBJ;
- zb.zb_level = 0;
- zb.zb_blkid = off / bs;
- error = zio_wait(zio_read(NULL, za->za_spa, &blk, iobuf, bs,
- NULL, NULL, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_MUSTSUCCEED, &zb));
- ASSERT3U(error, ==, 0);
-
- txg_resume(dmu_objset_pool(os));
-
- bcopy(&iobuf[blkoff], rbt, btsize);
-
- if (rbt->bt_objset == 0) /* concurrent free */
- return;
-
- if (wbt->bt_objset == 0) /* all-zero overwrite */
- return;
-
- ASSERT3U(rbt->bt_objset, ==, wbt->bt_objset);
- ASSERT3U(rbt->bt_object, ==, wbt->bt_object);
- ASSERT3U(rbt->bt_offset, ==, wbt->bt_offset);
-
- /*
- * The semantic of dmu_sync() is that we always push the most recent
- * version of the data, so in the face of concurrent updates we may
- * see a newer version of the block. That's OK.
- */
- ASSERT3U(rbt->bt_txg, >=, wbt->bt_txg);
- if (rbt->bt_thread == wbt->bt_thread)
- ASSERT3U(rbt->bt_seq, ==, wbt->bt_seq);
- else
- ASSERT3U(rbt->bt_seq, >, wbt->bt_seq);
+ umem_free(data, blocksize);
}
/*
@@ -2603,9 +3740,10 @@ ztest_dmu_write_parallel(ztest_args_t *za)
#define ZTEST_ZAP_MAX_PROPS 1000
void
-ztest_zap(ztest_args_t *za)
+ztest_zap(ztest_ds_t *zd, uint64_t id)
{
- objset_t *os = za->za_os;
+ objset_t *os = zd->zd_os;
+ ztest_od_t od[1];
uint64_t object;
uint64_t txg, last_txg;
uint64_t value[ZTEST_ZAP_MAX_INTS];
@@ -2614,64 +3752,45 @@ ztest_zap(ztest_args_t *za)
dmu_tx_t *tx;
char propname[100], txgname[100];
int error;
- char osname[MAXNAMELEN];
char *hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" };
- dmu_objset_name(os, osname);
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0);
- /*
- * Create a new object if necessary, and record it in the directory.
- */
- VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t), &object, DMU_READ_PREFETCH));
+ if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0)
+ return;
- if (object == 0) {
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t));
- dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("create zap test obj");
- dmu_tx_abort(tx);
- return;
- }
- object = zap_create(os, DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx);
- if (error) {
- fatal(0, "zap_create('%s', %llu) = %d",
- osname, object, error);
- }
- ASSERT(object != 0);
- dmu_write(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t), &object, tx);
- /*
- * Generate a known hash collision, and verify that
- * we can lookup and remove both entries.
- */
- for (i = 0; i < 2; i++) {
- value[i] = i;
- error = zap_add(os, object, hc[i], sizeof (uint64_t),
- 1, &value[i], tx);
- ASSERT3U(error, ==, 0);
- }
- for (i = 0; i < 2; i++) {
- error = zap_add(os, object, hc[i], sizeof (uint64_t),
- 1, &value[i], tx);
- ASSERT3U(error, ==, EEXIST);
- error = zap_length(os, object, hc[i],
- &zl_intsize, &zl_ints);
- ASSERT3U(error, ==, 0);
- ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
- ASSERT3U(zl_ints, ==, 1);
- }
- for (i = 0; i < 2; i++) {
- error = zap_remove(os, object, hc[i], tx);
- ASSERT3U(error, ==, 0);
- }
+ object = od[0].od_object;
- dmu_tx_commit(tx);
+ /*
+ * Generate a known hash collision, and verify that
+ * we can lookup and remove both entries.
+ */
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
+ txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
+ if (txg == 0)
+ return;
+ for (i = 0; i < 2; i++) {
+ value[i] = i;
+ VERIFY3U(0, ==, zap_add(os, object, hc[i], sizeof (uint64_t),
+ 1, &value[i], tx));
+ }
+ for (i = 0; i < 2; i++) {
+ VERIFY3U(EEXIST, ==, zap_add(os, object, hc[i],
+ sizeof (uint64_t), 1, &value[i], tx));
+ VERIFY3U(0, ==,
+ zap_length(os, object, hc[i], &zl_intsize, &zl_ints));
+ ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
+ ASSERT3U(zl_ints, ==, 1);
}
+ for (i = 0; i < 2; i++) {
+ VERIFY3U(0, ==, zap_remove(os, object, hc[i], tx));
+ }
+ dmu_tx_commit(tx);
+ /*
+ * Generate a buch of random entries.
+ */
ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS);
prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
@@ -2715,14 +3834,10 @@ ztest_zap(ztest_args_t *za)
* should be txg + object + n.
*/
tx = dmu_tx_create(os);
- dmu_tx_hold_zap(tx, object, TRUE, NULL);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("create zap entry");
- dmu_tx_abort(tx);
+ dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
+ txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
+ if (txg == 0)
return;
- }
- txg = dmu_tx_get_txg(tx);
if (last_txg > txg)
fatal(0, "zap future leak: old %llu new %llu", last_txg, txg);
@@ -2730,16 +3845,10 @@ ztest_zap(ztest_args_t *za)
for (i = 0; i < ints; i++)
value[i] = txg + object + i;
- error = zap_update(os, object, txgname, sizeof (uint64_t), 1, &txg, tx);
- if (error)
- fatal(0, "zap_update('%s', %llu, '%s') = %d",
- osname, object, txgname, error);
-
- error = zap_update(os, object, propname, sizeof (uint64_t),
- ints, value, tx);
- if (error)
- fatal(0, "zap_update('%s', %llu, '%s') = %d",
- osname, object, propname, error);
+ VERIFY3U(0, ==, zap_update(os, object, txgname, sizeof (uint64_t),
+ 1, &txg, tx));
+ VERIFY3U(0, ==, zap_update(os, object, propname, sizeof (uint64_t),
+ ints, value, tx));
dmu_tx_commit(tx);
@@ -2758,47 +3867,12 @@ ztest_zap(ztest_args_t *za)
ASSERT3U(error, ==, 0);
tx = dmu_tx_create(os);
- dmu_tx_hold_zap(tx, object, TRUE, NULL);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("remove zap entry");
- dmu_tx_abort(tx);
+ dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
+ txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
+ if (txg == 0)
return;
- }
- error = zap_remove(os, object, txgname, tx);
- if (error)
- fatal(0, "zap_remove('%s', %llu, '%s') = %d",
- osname, object, txgname, error);
-
- error = zap_remove(os, object, propname, tx);
- if (error)
- fatal(0, "zap_remove('%s', %llu, '%s') = %d",
- osname, object, propname, error);
-
- dmu_tx_commit(tx);
-
- /*
- * Once in a while, destroy the object.
- */
- if (ztest_random(1000) != 0)
- return;
-
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t));
- dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("destroy zap object");
- dmu_tx_abort(tx);
- return;
- }
- error = zap_destroy(os, object, tx);
- if (error)
- fatal(0, "zap_destroy('%s', %llu) = %d",
- osname, object, error);
- object = 0;
- dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t),
- &object, tx);
+ VERIFY3U(0, ==, zap_remove(os, object, txgname, tx));
+ VERIFY3U(0, ==, zap_remove(os, object, propname, tx));
dmu_tx_commit(tx);
}
@@ -2806,108 +3880,65 @@ ztest_zap(ztest_args_t *za)
* Testcase to test the upgrading of a microzap to fatzap.
*/
void
-ztest_fzap(ztest_args_t *za)
+ztest_fzap(ztest_ds_t *zd, uint64_t id)
{
- objset_t *os = za->za_os;
- uint64_t object;
- uint64_t value;
- dmu_tx_t *tx;
- int i, error;
- char osname[MAXNAMELEN];
- char *name = "aaa";
- char entname[MAXNAMELEN];
+ objset_t *os = zd->zd_os;
+ ztest_od_t od[1];
+ uint64_t object, txg;
- dmu_objset_name(os, osname);
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0);
- /*
- * Create a new object if necessary, and record it in the directory.
- */
- VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t), &object, DMU_READ_PREFETCH));
+ if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0)
+ return;
- if (object == 0) {
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t));
- dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("create zap test obj");
- dmu_tx_abort(tx);
- return;
- }
- object = zap_create(os, DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx);
- if (error) {
- fatal(0, "zap_create('%s', %llu) = %d",
- osname, object, error);
- }
- ASSERT(object != 0);
- dmu_write(os, ZTEST_DIROBJ, za->za_diroff,
- sizeof (uint64_t), &object, tx);
- dmu_tx_commit(tx);
- }
+ object = od[0].od_object;
/*
- * Add entries to this ZAP amd make sure it spills over
+ * Add entries to this ZAP and make sure it spills over
* and gets upgraded to a fatzap. Also, since we are adding
- * 2050 entries we should see ptrtbl growth and leaf-block
- * split.
+ * 2050 entries we should see ptrtbl growth and leaf-block split.
*/
- for (i = 0; i < 2050; i++) {
- (void) snprintf(entname, sizeof (entname), "%s-%d", name, i);
- value = i;
+ for (int i = 0; i < 2050; i++) {
+ char name[MAXNAMELEN];
+ uint64_t value = i;
+ dmu_tx_t *tx;
+ int error;
- tx = dmu_tx_create(os);
- dmu_tx_hold_zap(tx, object, TRUE, entname);
- error = dmu_tx_assign(tx, TXG_WAIT);
+ (void) snprintf(name, sizeof (name), "fzap-%llu-%llu",
+ id, value);
- if (error) {
- ztest_record_enospc("create zap entry");
- dmu_tx_abort(tx);
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_zap(tx, object, B_TRUE, name);
+ txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
+ if (txg == 0)
return;
- }
- error = zap_add(os, object, entname, sizeof (uint64_t),
- 1, &value, tx);
-
+ error = zap_add(os, object, name, sizeof (uint64_t), 1,
+ &value, tx);
ASSERT(error == 0 || error == EEXIST);
dmu_tx_commit(tx);
}
-
- /*
- * Once in a while, destroy the object.
- */
- if (ztest_random(1000) != 0)
- return;
-
- tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t));
- dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("destroy zap object");
- dmu_tx_abort(tx);
- return;
- }
- error = zap_destroy(os, object, tx);
- if (error)
- fatal(0, "zap_destroy('%s', %llu) = %d",
- osname, object, error);
- object = 0;
- dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t),
- &object, tx);
- dmu_tx_commit(tx);
}
+/* ARGSUSED */
void
-ztest_zap_parallel(ztest_args_t *za)
+ztest_zap_parallel(ztest_ds_t *zd, uint64_t id)
{
- objset_t *os = za->za_os;
+ objset_t *os = zd->zd_os;
+ ztest_od_t od[1];
uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc;
dmu_tx_t *tx;
int i, namelen, error;
+ int micro = ztest_random(2);
char name[20], string_value[20];
void *data;
+ ztest_od_init(&od[0], ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER, 0, 0);
+
+ if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
+ return;
+
+ object = od[0].od_object;
+
/*
* Generate a random name of the form 'xxx.....' where each
* x is a random printable character and the dots are dots.
@@ -2922,12 +3953,7 @@ ztest_zap_parallel(ztest_args_t *za)
name[i] = '.';
name[i] = '\0';
- if (ztest_random(2) == 0)
- object = ZTEST_MICROZAP_OBJ;
- else
- object = ZTEST_FATZAP_OBJ;
-
- if ((namelen & 1) || object == ZTEST_MICROZAP_OBJ) {
+ if ((namelen & 1) || micro) {
wsize = sizeof (txg);
wc = 1;
data = &txg;
@@ -2948,14 +3974,10 @@ ztest_zap_parallel(ztest_args_t *za)
if (i >= 2) {
tx = dmu_tx_create(os);
- dmu_tx_hold_zap(tx, object, TRUE, NULL);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("zap parallel");
- dmu_tx_abort(tx);
+ dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
+ txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
+ if (txg == 0)
return;
- }
- txg = dmu_tx_get_txg(tx);
bcopy(name, string_value, namelen);
} else {
tx = NULL;
@@ -3006,79 +4028,400 @@ ztest_zap_parallel(ztest_args_t *za)
dmu_tx_commit(tx);
}
+/*
+ * Commit callback data.
+ */
+typedef struct ztest_cb_data {
+ list_node_t zcd_node;
+ uint64_t zcd_txg;
+ int zcd_expected_err;
+ boolean_t zcd_added;
+ boolean_t zcd_called;
+ spa_t *zcd_spa;
+} ztest_cb_data_t;
+
+/* This is the actual commit callback function */
+static void
+ztest_commit_callback(void *arg, int error)
+{
+ ztest_cb_data_t *data = arg;
+ uint64_t synced_txg;
+
+ VERIFY(data != NULL);
+ VERIFY3S(data->zcd_expected_err, ==, error);
+ VERIFY(!data->zcd_called);
+
+ synced_txg = spa_last_synced_txg(data->zcd_spa);
+ if (data->zcd_txg > synced_txg)
+ fatal(0, "commit callback of txg %" PRIu64 " called prematurely"
+ ", last synced txg = %" PRIu64 "\n", data->zcd_txg,
+ synced_txg);
+
+ data->zcd_called = B_TRUE;
+
+ if (error == ECANCELED) {
+ ASSERT3U(data->zcd_txg, ==, 0);
+ ASSERT(!data->zcd_added);
+
+ /*
+ * The private callback data should be destroyed here, but
+ * since we are going to check the zcd_called field after
+ * dmu_tx_abort(), we will destroy it there.
+ */
+ return;
+ }
+
+ /* Was this callback added to the global callback list? */
+ if (!data->zcd_added)
+ goto out;
+
+ ASSERT3U(data->zcd_txg, !=, 0);
+
+ /* Remove our callback from the list */
+ (void) mutex_lock(&zcl.zcl_callbacks_lock);
+ list_remove(&zcl.zcl_callbacks, data);
+ (void) mutex_unlock(&zcl.zcl_callbacks_lock);
+
+out:
+ umem_free(data, sizeof (ztest_cb_data_t));
+}
+
+/* Allocate and initialize callback data structure */
+static ztest_cb_data_t *
+ztest_create_cb_data(objset_t *os, uint64_t txg)
+{
+ ztest_cb_data_t *cb_data;
+
+ cb_data = umem_zalloc(sizeof (ztest_cb_data_t), UMEM_NOFAIL);
+
+ cb_data->zcd_txg = txg;
+ cb_data->zcd_spa = dmu_objset_spa(os);
+
+ return (cb_data);
+}
+
+/*
+ * If a number of txgs equal to this threshold have been created after a commit
+ * callback has been registered but not called, then we assume there is an
+ * implementation bug.
+ */
+#define ZTEST_COMMIT_CALLBACK_THRESH (TXG_CONCURRENT_STATES + 2)
+
+/*
+ * Commit callback test.
+ */
void
-ztest_dsl_prop_get_set(ztest_args_t *za)
+ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id)
+{
+ objset_t *os = zd->zd_os;
+ ztest_od_t od[1];
+ dmu_tx_t *tx;
+ ztest_cb_data_t *cb_data[3], *tmp_cb;
+ uint64_t old_txg, txg;
+ int i, error;
+
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0);
+
+ if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
+ return;
+
+ tx = dmu_tx_create(os);
+
+ cb_data[0] = ztest_create_cb_data(os, 0);
+ dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[0]);
+
+ dmu_tx_hold_write(tx, od[0].od_object, 0, sizeof (uint64_t));
+
+ /* Every once in a while, abort the transaction on purpose */
+ if (ztest_random(100) == 0)
+ error = -1;
+
+ if (!error)
+ error = dmu_tx_assign(tx, TXG_NOWAIT);
+
+ txg = error ? 0 : dmu_tx_get_txg(tx);
+
+ cb_data[0]->zcd_txg = txg;
+ cb_data[1] = ztest_create_cb_data(os, txg);
+ dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[1]);
+
+ if (error) {
+ /*
+ * It's not a strict requirement to call the registered
+ * callbacks from inside dmu_tx_abort(), but that's what
+ * it's supposed to happen in the current implementation
+ * so we will check for that.
+ */
+ for (i = 0; i < 2; i++) {
+ cb_data[i]->zcd_expected_err = ECANCELED;
+ VERIFY(!cb_data[i]->zcd_called);
+ }
+
+ dmu_tx_abort(tx);
+
+ for (i = 0; i < 2; i++) {
+ VERIFY(cb_data[i]->zcd_called);
+ umem_free(cb_data[i], sizeof (ztest_cb_data_t));
+ }
+
+ return;
+ }
+
+ cb_data[2] = ztest_create_cb_data(os, txg);
+ dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[2]);
+
+ /*
+ * Read existing data to make sure there isn't a future leak.
+ */
+ VERIFY(0 == dmu_read(os, od[0].od_object, 0, sizeof (uint64_t),
+ &old_txg, DMU_READ_PREFETCH));
+
+ if (old_txg > txg)
+ fatal(0, "future leak: got %" PRIu64 ", open txg is %" PRIu64,
+ old_txg, txg);
+
+ dmu_write(os, od[0].od_object, 0, sizeof (uint64_t), &txg, tx);
+
+ (void) mutex_lock(&zcl.zcl_callbacks_lock);
+
+ /*
+ * Since commit callbacks don't have any ordering requirement and since
+ * it is theoretically possible for a commit callback to be called
+ * after an arbitrary amount of time has elapsed since its txg has been
+ * synced, it is difficult to reliably determine whether a commit
+ * callback hasn't been called due to high load or due to a flawed
+ * implementation.
+ *
+ * In practice, we will assume that if after a certain number of txgs a
+ * commit callback hasn't been called, then most likely there's an
+ * implementation bug..
+ */
+ tmp_cb = list_head(&zcl.zcl_callbacks);
+ if (tmp_cb != NULL &&
+ tmp_cb->zcd_txg > txg - ZTEST_COMMIT_CALLBACK_THRESH) {
+ fatal(0, "Commit callback threshold exceeded, oldest txg: %"
+ PRIu64 ", open txg: %" PRIu64 "\n", tmp_cb->zcd_txg, txg);
+ }
+
+ /*
+ * Let's find the place to insert our callbacks.
+ *
+ * Even though the list is ordered by txg, it is possible for the
+ * insertion point to not be the end because our txg may already be
+ * quiescing at this point and other callbacks in the open txg
+ * (from other objsets) may have sneaked in.
+ */
+ tmp_cb = list_tail(&zcl.zcl_callbacks);
+ while (tmp_cb != NULL && tmp_cb->zcd_txg > txg)
+ tmp_cb = list_prev(&zcl.zcl_callbacks, tmp_cb);
+
+ /* Add the 3 callbacks to the list */
+ for (i = 0; i < 3; i++) {
+ if (tmp_cb == NULL)
+ list_insert_head(&zcl.zcl_callbacks, cb_data[i]);
+ else
+ list_insert_after(&zcl.zcl_callbacks, tmp_cb,
+ cb_data[i]);
+
+ cb_data[i]->zcd_added = B_TRUE;
+ VERIFY(!cb_data[i]->zcd_called);
+
+ tmp_cb = cb_data[i];
+ }
+
+ (void) mutex_unlock(&zcl.zcl_callbacks_lock);
+
+ dmu_tx_commit(tx);
+}
+
+/* ARGSUSED */
+void
+ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id)
+{
+ zfs_prop_t proplist[] = {
+ ZFS_PROP_CHECKSUM,
+ ZFS_PROP_COMPRESSION,
+ ZFS_PROP_COPIES,
+ ZFS_PROP_DEDUP
+ };
+ ztest_shared_t *zs = ztest_shared;
+
+ (void) rw_rdlock(&zs->zs_name_lock);
+
+ for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++)
+ (void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p],
+ ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2));
+
+ (void) rw_unlock(&zs->zs_name_lock);
+}
+
+/* ARGSUSED */
+void
+ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
+{
+ ztest_shared_t *zs = ztest_shared;
+ nvlist_t *props = NULL;
+
+ (void) rw_rdlock(&zs->zs_name_lock);
+
+ (void) ztest_spa_prop_set_uint64(zs, ZPOOL_PROP_DEDUPDITTO,
+ ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN));
+
+ VERIFY3U(spa_prop_get(zs->zs_spa, &props), ==, 0);
+
+ if (zopt_verbose >= 6)
+ dump_nvlist(props, 4);
+
+ nvlist_free(props);
+
+ (void) rw_unlock(&zs->zs_name_lock);
+}
+
+/*
+ * Test snapshot hold/release and deferred destroy.
+ */
+void
+ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
{
- objset_t *os = za->za_os;
- int i, inherit;
- uint64_t value;
- const char *prop, *valname;
- char setpoint[MAXPATHLEN];
- char osname[MAXNAMELEN];
int error;
+ objset_t *os = zd->zd_os;
+ objset_t *origin;
+ char snapname[100];
+ char fullname[100];
+ char clonename[100];
+ char tag[100];
+ char osname[MAXNAMELEN];
(void) rw_rdlock(&ztest_shared->zs_name_lock);
dmu_objset_name(os, osname);
- for (i = 0; i < 2; i++) {
- if (i == 0) {
- prop = "checksum";
- value = ztest_random_checksum();
- inherit = (value == ZIO_CHECKSUM_INHERIT);
- } else {
- prop = "compression";
- value = ztest_random_compress();
- inherit = (value == ZIO_COMPRESS_INHERIT);
+ (void) snprintf(snapname, 100, "sh1_%llu", id);
+ (void) snprintf(fullname, 100, "%s@%s", osname, snapname);
+ (void) snprintf(clonename, 100, "%s/ch1_%llu", osname, id);
+ (void) snprintf(tag, 100, "%tag_%llu", id);
+
+ /*
+ * Clean up from any previous run.
+ */
+ (void) dmu_objset_destroy(clonename, B_FALSE);
+ (void) dsl_dataset_user_release(osname, snapname, tag, B_FALSE);
+ (void) dmu_objset_destroy(fullname, B_FALSE);
+
+ /*
+ * Create snapshot, clone it, mark snap for deferred destroy,
+ * destroy clone, verify snap was also destroyed.
+ */
+ error = dmu_objset_snapshot(osname, snapname, NULL, NULL, FALSE,
+ FALSE, -1);
+ if (error) {
+ if (error == ENOSPC) {
+ ztest_record_enospc("dmu_objset_snapshot");
+ goto out;
}
+ fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error);
+ }
- error = dsl_prop_set(osname, prop, sizeof (value),
- !inherit, &value);
+ error = dmu_objset_hold(fullname, FTAG, &origin);
+ if (error)
+ fatal(0, "dmu_objset_hold(%s) = %d", fullname, error);
+ error = dmu_objset_clone(clonename, dmu_objset_ds(origin), 0);
+ dmu_objset_rele(origin, FTAG);
+ if (error) {
if (error == ENOSPC) {
- ztest_record_enospc("dsl_prop_set");
- break;
+ ztest_record_enospc("dmu_objset_clone");
+ goto out;
}
+ fatal(0, "dmu_objset_clone(%s) = %d", clonename, error);
+ }
- ASSERT3U(error, ==, 0);
+ error = dmu_objset_destroy(fullname, B_TRUE);
+ if (error) {
+ fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d",
+ fullname, error);
+ }
- VERIFY3U(dsl_prop_get(osname, prop, sizeof (value),
- 1, &value, setpoint), ==, 0);
+ error = dmu_objset_destroy(clonename, B_FALSE);
+ if (error)
+ fatal(0, "dmu_objset_destroy(%s) = %d", clonename, error);
- if (i == 0)
- valname = zio_checksum_table[value].ci_name;
- else
- valname = zio_compress_table[value].ci_name;
+ error = dmu_objset_hold(fullname, FTAG, &origin);
+ if (error != ENOENT)
+ fatal(0, "dmu_objset_hold(%s) = %d", fullname, error);
- if (zopt_verbose >= 6) {
- (void) printf("%s %s = %s for '%s'\n",
- osname, prop, valname, setpoint);
+ /*
+ * Create snapshot, add temporary hold, verify that we can't
+ * destroy a held snapshot, mark for deferred destroy,
+ * release hold, verify snapshot was destroyed.
+ */
+ error = dmu_objset_snapshot(osname, snapname, NULL, NULL, FALSE,
+ FALSE, -1);
+ if (error) {
+ if (error == ENOSPC) {
+ ztest_record_enospc("dmu_objset_snapshot");
+ goto out;
}
+ fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error);
+ }
+
+ error = dsl_dataset_user_hold(osname, snapname, tag, B_FALSE,
+ B_TRUE, -1);
+ if (error)
+ fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag);
+
+ error = dmu_objset_destroy(fullname, B_FALSE);
+ if (error != EBUSY) {
+ fatal(0, "dmu_objset_destroy(%s, B_FALSE) = %d",
+ fullname, error);
+ }
+
+ error = dmu_objset_destroy(fullname, B_TRUE);
+ if (error) {
+ fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d",
+ fullname, error);
}
+ error = dsl_dataset_user_release(osname, snapname, tag, B_FALSE);
+ if (error)
+ fatal(0, "dsl_dataset_user_release(%s)", fullname, tag);
+
+ VERIFY(dmu_objset_hold(fullname, FTAG, &origin) == ENOENT);
+
+out:
(void) rw_unlock(&ztest_shared->zs_name_lock);
}
/*
* Inject random faults into the on-disk data.
*/
+/* ARGSUSED */
void
-ztest_fault_inject(ztest_args_t *za)
+ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
{
+ ztest_shared_t *zs = ztest_shared;
+ spa_t *spa = zs->zs_spa;
int fd;
uint64_t offset;
- uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
+ uint64_t leaves;
uint64_t bad = 0x1990c0ffeedecadeULL;
uint64_t top, leaf;
char path0[MAXPATHLEN];
char pathrand[MAXPATHLEN];
size_t fsize;
- spa_t *spa = za->za_spa;
int bshift = SPA_MAXBLOCKSHIFT + 2; /* don't scrog all labels */
int iters = 1000;
- int maxfaults = zopt_maxfaults;
+ int maxfaults;
+ int mirror_save;
vdev_t *vd0 = NULL;
uint64_t guid0 = 0;
+ boolean_t islog = B_FALSE;
+
+ VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ maxfaults = MAXFAULTS();
+ leaves = MAX(zs->zs_mirrors, 1) * zopt_raidz;
+ mirror_save = zs->zs_mirrors;
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
ASSERT(leaves >= 1);
@@ -3089,10 +4432,10 @@ ztest_fault_inject(ztest_args_t *za)
if (ztest_random(2) == 0) {
/*
- * Inject errors on a normal data device.
+ * Inject errors on a normal data device or slog device.
*/
- top = ztest_random(spa->spa_root_vdev->vdev_children);
- leaf = ztest_random(leaves);
+ top = ztest_random_vdev_top(spa, B_TRUE);
+ leaf = ztest_random(leaves) + zs->zs_splits;
/*
* Generate paths to the first leaf in this top-level vdev,
@@ -3101,11 +4444,14 @@ ztest_fault_inject(ztest_args_t *za)
* and we'll write random garbage to the randomly chosen leaf.
*/
(void) snprintf(path0, sizeof (path0), ztest_dev_template,
- zopt_dir, zopt_pool, top * leaves + 0);
+ zopt_dir, zopt_pool, top * leaves + zs->zs_splits);
(void) snprintf(pathrand, sizeof (pathrand), ztest_dev_template,
zopt_dir, zopt_pool, top * leaves + leaf);
vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0);
+ if (vd0 != NULL && vd0->vdev_top->vdev_islog)
+ islog = B_TRUE;
+
if (vd0 != NULL && maxfaults != 1) {
/*
* Make vd0 explicitly claim to be unreadable,
@@ -3151,22 +4497,38 @@ ztest_fault_inject(ztest_args_t *za)
spa_config_exit(spa, SCL_STATE, FTAG);
- if (maxfaults == 0)
- return;
-
/*
- * If we can tolerate two or more faults, randomly online/offline vd0.
+ * If we can tolerate two or more faults, or we're dealing
+ * with a slog, randomly online/offline vd0.
*/
- if (maxfaults >= 2 && guid0 != 0) {
+ if ((maxfaults >= 2 || islog) && guid0 != 0) {
if (ztest_random(10) < 6) {
int flags = (ztest_random(2) == 0 ?
ZFS_OFFLINE_TEMPORARY : 0);
+
+ /*
+ * We have to grab the zs_name_lock as writer to
+ * prevent a race between offlining a slog and
+ * destroying a dataset. Offlining the slog will
+ * grab a reference on the dataset which may cause
+ * dmu_objset_destroy() to fail with EBUSY thus
+ * leaving the dataset in an inconsistent state.
+ */
+ if (islog)
+ (void) rw_wrlock(&ztest_shared->zs_name_lock);
+
VERIFY(vdev_offline(spa, guid0, flags) != EBUSY);
+
+ if (islog)
+ (void) rw_unlock(&ztest_shared->zs_name_lock);
} else {
(void) vdev_online(spa, guid0, 0, NULL);
}
}
+ if (maxfaults == 0)
+ return;
+
/*
* We have at least single-fault tolerance, so inject data corruption.
*/
@@ -3185,173 +4547,198 @@ ztest_fault_inject(ztest_args_t *za)
if (offset >= fsize)
continue;
- if (zopt_verbose >= 6)
- (void) printf("injecting bad word into %s,"
- " offset 0x%llx\n", pathrand, (u_longlong_t)offset);
+ VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ if (mirror_save != zs->zs_mirrors) {
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ (void) close(fd);
+ return;
+ }
if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad))
fatal(1, "can't inject bad word at 0x%llx in %s",
offset, pathrand);
+
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+
+ if (zopt_verbose >= 7)
+ (void) printf("injected bad word into %s,"
+ " offset 0x%llx\n", pathrand, (u_longlong_t)offset);
}
(void) close(fd);
}
/*
- * Scrub the pool.
+ * Verify that DDT repair works as expected.
*/
void
-ztest_scrub(ztest_args_t *za)
+ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
{
- spa_t *spa = za->za_spa;
-
- (void) spa_scrub(spa, POOL_SCRUB_EVERYTHING);
- (void) poll(NULL, 0, 1000); /* wait a second, then force a restart */
- (void) spa_scrub(spa, POOL_SCRUB_EVERYTHING);
-}
+ ztest_shared_t *zs = ztest_shared;
+ spa_t *spa = zs->zs_spa;
+ objset_t *os = zd->zd_os;
+ ztest_od_t od[1];
+ uint64_t object, blocksize, txg, pattern, psize;
+ enum zio_checksum checksum = spa_dedup_checksum(spa);
+ dmu_buf_t *db;
+ dmu_tx_t *tx;
+ void *buf;
+ blkptr_t blk;
+ int copies = 2 * ZIO_DEDUPDITTO_MIN;
-/*
- * Rename the pool to a different name and then rename it back.
- */
-void
-ztest_spa_rename(ztest_args_t *za)
-{
- char *oldname, *newname;
- int error;
- spa_t *spa;
+ blocksize = ztest_random_blocksize();
+ blocksize = MIN(blocksize, 2048); /* because we write so many */
- (void) rw_wrlock(&ztest_shared->zs_name_lock);
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0);
- oldname = za->za_pool;
- newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL);
- (void) strcpy(newname, oldname);
- (void) strcat(newname, "_tmp");
+ if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
+ return;
/*
- * Do the rename
+ * Take the name lock as writer to prevent anyone else from changing
+ * the pool and dataset properies we need to maintain during this test.
*/
- error = spa_rename(oldname, newname);
- if (error)
- fatal(0, "spa_rename('%s', '%s') = %d", oldname,
- newname, error);
+ (void) rw_wrlock(&zs->zs_name_lock);
- /*
- * Try to open it under the old name, which shouldn't exist
- */
- error = spa_open(oldname, &spa, FTAG);
- if (error != ENOENT)
- fatal(0, "spa_open('%s') = %d", oldname, error);
+ if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum,
+ B_FALSE) != 0 ||
+ ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1,
+ B_FALSE) != 0) {
+ (void) rw_unlock(&zs->zs_name_lock);
+ return;
+ }
+
+ object = od[0].od_object;
+ blocksize = od[0].od_blocksize;
+ pattern = spa_guid(spa) ^ dmu_objset_fsid_guid(os);
+
+ ASSERT(object != 0);
+
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_write(tx, object, 0, copies * blocksize);
+ txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
+ if (txg == 0) {
+ (void) rw_unlock(&zs->zs_name_lock);
+ return;
+ }
/*
- * Open it under the new name and make sure it's still the same spa_t.
+ * Write all the copies of our block.
*/
- error = spa_open(newname, &spa, FTAG);
- if (error != 0)
- fatal(0, "spa_open('%s') = %d", newname, error);
+ for (int i = 0; i < copies; i++) {
+ uint64_t offset = i * blocksize;
+ VERIFY(dmu_buf_hold(os, object, offset, FTAG, &db,
+ DMU_READ_NO_PREFETCH) == 0);
+ ASSERT(db->db_offset == offset);
+ ASSERT(db->db_size == blocksize);
+ ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) ||
+ ztest_pattern_match(db->db_data, db->db_size, 0ULL));
+ dmu_buf_will_fill(db, tx);
+ ztest_pattern_set(db->db_data, db->db_size, pattern);
+ dmu_buf_rele(db, FTAG);
+ }
- ASSERT(spa == za->za_spa);
- spa_close(spa, FTAG);
+ dmu_tx_commit(tx);
+ txg_wait_synced(spa_get_dsl(spa), txg);
/*
- * Rename it back to the original
+ * Find out what block we got.
*/
- error = spa_rename(newname, oldname);
- if (error)
- fatal(0, "spa_rename('%s', '%s') = %d", newname,
- oldname, error);
+ VERIFY(dmu_buf_hold(os, object, 0, FTAG, &db,
+ DMU_READ_NO_PREFETCH) == 0);
+ blk = *((dmu_buf_impl_t *)db)->db_blkptr;
+ dmu_buf_rele(db, FTAG);
/*
- * Make sure it can still be opened
+ * Damage the block. Dedup-ditto will save us when we read it later.
*/
- error = spa_open(oldname, &spa, FTAG);
- if (error != 0)
- fatal(0, "spa_open('%s') = %d", oldname, error);
+ psize = BP_GET_PSIZE(&blk);
+ buf = zio_buf_alloc(psize);
+ ztest_pattern_set(buf, psize, ~pattern);
- ASSERT(spa == za->za_spa);
- spa_close(spa, FTAG);
+ (void) zio_wait(zio_rewrite(NULL, spa, 0, &blk,
+ buf, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE,
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL));
- umem_free(newname, strlen(newname) + 1);
+ zio_buf_free(buf, psize);
- (void) rw_unlock(&ztest_shared->zs_name_lock);
+ (void) rw_unlock(&zs->zs_name_lock);
}
-
/*
- * Completely obliterate one disk.
+ * Scrub the pool.
*/
-static void
-ztest_obliterate_one_disk(uint64_t vdev)
+/* ARGSUSED */
+void
+ztest_scrub(ztest_ds_t *zd, uint64_t id)
{
- int fd;
- char dev_name[MAXPATHLEN], copy_name[MAXPATHLEN];
- size_t fsize;
+ ztest_shared_t *zs = ztest_shared;
+ spa_t *spa = zs->zs_spa;
- if (zopt_maxfaults < 2)
- return;
+ (void) spa_scan(spa, POOL_SCAN_SCRUB);
+ (void) poll(NULL, 0, 100); /* wait a moment, then force a restart */
+ (void) spa_scan(spa, POOL_SCAN_SCRUB);
+}
- (void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
- (void) snprintf(copy_name, MAXPATHLEN, "%s.old", dev_name);
+/*
+ * Rename the pool to a different name and then rename it back.
+ */
+/* ARGSUSED */
+void
+ztest_spa_rename(ztest_ds_t *zd, uint64_t id)
+{
+ ztest_shared_t *zs = ztest_shared;
+ char *oldname, *newname;
+ spa_t *spa;
- fd = open(dev_name, O_RDWR);
+ (void) rw_wrlock(&zs->zs_name_lock);
- if (fd == -1)
- fatal(1, "can't open %s", dev_name);
+ oldname = zs->zs_pool;
+ newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL);
+ (void) strcpy(newname, oldname);
+ (void) strcat(newname, "_tmp");
/*
- * Determine the size.
+ * Do the rename
*/
- fsize = lseek(fd, 0, SEEK_END);
-
- (void) close(fd);
+ VERIFY3U(0, ==, spa_rename(oldname, newname));
/*
- * Rename the old device to dev_name.old (useful for debugging).
+ * Try to open it under the old name, which shouldn't exist
*/
- VERIFY(rename(dev_name, copy_name) == 0);
+ VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG));
/*
- * Create a new one.
+ * Open it under the new name and make sure it's still the same spa_t.
*/
- VERIFY((fd = open(dev_name, O_RDWR | O_CREAT | O_TRUNC, 0666)) >= 0);
- VERIFY(ftruncate(fd, fsize) == 0);
- (void) close(fd);
-}
+ VERIFY3U(0, ==, spa_open(newname, &spa, FTAG));
-static void
-ztest_replace_one_disk(spa_t *spa, uint64_t vdev)
-{
- char dev_name[MAXPATHLEN];
- nvlist_t *root;
- int error;
- uint64_t guid;
- vdev_t *vd;
+ ASSERT(spa == zs->zs_spa);
+ spa_close(spa, FTAG);
- (void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
+ /*
+ * Rename it back to the original
+ */
+ VERIFY3U(0, ==, spa_rename(newname, oldname));
/*
- * Build the nvlist describing dev_name.
+ * Make sure it can still be opened
*/
- root = make_vdev_root(dev_name, NULL, 0, 0, 0, 0, 0, 1);
+ VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG));
- spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
- if ((vd = vdev_lookup_by_path(spa->spa_root_vdev, dev_name)) == NULL)
- guid = 0;
- else
- guid = vd->vdev_guid;
- spa_config_exit(spa, SCL_VDEV, FTAG);
- error = spa_vdev_attach(spa, guid, root, B_TRUE);
- if (error != 0 &&
- error != EBUSY &&
- error != ENOTSUP &&
- error != ENODEV &&
- error != EDOM)
- fatal(0, "spa_vdev_attach(in-place) = %d", error);
+ ASSERT(spa == zs->zs_spa);
+ spa_close(spa, FTAG);
- nvlist_free(root);
+ umem_free(newname, strlen(newname) + 1);
+
+ (void) rw_unlock(&zs->zs_name_lock);
}
+/*
+ * Verify pool integrity by running zdb.
+ */
static void
-ztest_verify_blocks(char *pool)
+ztest_run_zdb(char *pool)
{
int status;
char zdb[MAXPATHLEN + MAXNAMELEN + 20];
@@ -3372,11 +4759,12 @@ ztest_verify_blocks(char *pool)
isa = strdup(isa);
/* LINTED */
(void) sprintf(bin,
- "/usr/sbin%.*s/zdb -bcc%s%s -U /tmp/zpool.cache %s",
+ "/usr/sbin%.*s/zdb -bcc%s%s -U %s %s",
isalen,
isa,
zopt_verbose >= 3 ? "s" : "",
zopt_verbose >= 4 ? "v" : "",
+ spa_config_path,
pool);
free(isa);
@@ -3423,7 +4811,6 @@ ztest_spa_import_export(char *oldname, char *newname)
nvlist_t *config, *newconfig;
uint64_t pool_guid;
spa_t *spa;
- int error;
if (zopt_verbose >= 4) {
(void) printf("import/export: old = %s, new = %s\n",
@@ -3438,15 +4825,13 @@ ztest_spa_import_export(char *oldname, char *newname)
/*
* Get the pool's configuration and guid.
*/
- error = spa_open(oldname, &spa, FTAG);
- if (error)
- fatal(0, "spa_open('%s') = %d", oldname, error);
+ VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG));
/*
* Kick off a scrub to tickle scrub/export races.
*/
if (ztest_random(2) == 0)
- (void) spa_scrub(spa, POOL_SCRUB_EVERYTHING);
+ (void) spa_scan(spa, POOL_SCAN_SCRUB);
pool_guid = spa_guid(spa);
spa_close(spa, FTAG);
@@ -3456,9 +4841,7 @@ ztest_spa_import_export(char *oldname, char *newname)
/*
* Export it.
*/
- error = spa_export(oldname, &config, B_FALSE, B_FALSE);
- if (error)
- fatal(0, "spa_export('%s') = %d", oldname, error);
+ VERIFY3U(0, ==, spa_export(oldname, &config, B_FALSE, B_FALSE));
ztest_walk_pool_directory("pools after export");
@@ -3472,39 +4855,29 @@ ztest_spa_import_export(char *oldname, char *newname)
/*
* Import it under the new name.
*/
- error = spa_import(newname, config, NULL);
- if (error)
- fatal(0, "spa_import('%s') = %d", newname, error);
+ VERIFY3U(0, ==, spa_import(newname, config, NULL, 0));
ztest_walk_pool_directory("pools after import");
/*
* Try to import it again -- should fail with EEXIST.
*/
- error = spa_import(newname, config, NULL);
- if (error != EEXIST)
- fatal(0, "spa_import('%s') twice", newname);
+ VERIFY3U(EEXIST, ==, spa_import(newname, config, NULL, 0));
/*
* Try to import it under a different name -- should fail with EEXIST.
*/
- error = spa_import(oldname, config, NULL);
- if (error != EEXIST)
- fatal(0, "spa_import('%s') under multiple names", newname);
+ VERIFY3U(EEXIST, ==, spa_import(oldname, config, NULL, 0));
/*
* Verify that the pool is no longer visible under the old name.
*/
- error = spa_open(oldname, &spa, FTAG);
- if (error != ENOENT)
- fatal(0, "spa_open('%s') = %d", newname, error);
+ VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG));
/*
* Verify that we can open and close the pool using the new name.
*/
- error = spa_open(newname, &spa, FTAG);
- if (error)
- fatal(0, "spa_open('%s') = %d", newname, error);
+ VERIFY3U(0, ==, spa_open(newname, &spa, FTAG));
ASSERT(pool_guid == spa_guid(spa));
spa_close(spa, FTAG);
@@ -3514,12 +4887,12 @@ ztest_spa_import_export(char *oldname, char *newname)
static void
ztest_resume(spa_t *spa)
{
- if (spa_suspended(spa)) {
- spa_vdev_state_enter(spa);
- vdev_clear(spa, NULL);
- (void) spa_vdev_state_exit(spa, NULL, 0);
- (void) zio_resume(spa);
- }
+ if (spa_suspended(spa) && zopt_verbose >= 6)
+ (void) printf("resuming from suspended state\n");
+ spa_vdev_state_enter(spa, SCL_NONE);
+ vdev_clear(spa, NULL);
+ (void) spa_vdev_state_exit(spa, NULL, 0);
+ (void) zio_resume(spa);
}
static void *
@@ -3528,155 +4901,252 @@ ztest_resume_thread(void *arg)
spa_t *spa = arg;
while (!ztest_exiting) {
- (void) poll(NULL, 0, 1000);
- ztest_resume(spa);
+ if (spa_suspended(spa))
+ ztest_resume(spa);
+ (void) poll(NULL, 0, 100);
}
return (NULL);
}
static void *
+ztest_deadman_thread(void *arg)
+{
+ ztest_shared_t *zs = arg;
+ int grace = 300;
+ hrtime_t delta;
+
+ delta = (zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + grace;
+
+ (void) poll(NULL, 0, (int)(1000 * delta));
+
+ fatal(0, "failed to complete within %d seconds of deadline", grace);
+
+ return (NULL);
+}
+
+static void
+ztest_execute(ztest_info_t *zi, uint64_t id)
+{
+ ztest_shared_t *zs = ztest_shared;
+ ztest_ds_t *zd = &zs->zs_zd[id % zopt_datasets];
+ hrtime_t functime = gethrtime();
+
+ for (int i = 0; i < zi->zi_iters; i++)
+ zi->zi_func(zd, id);
+
+ functime = gethrtime() - functime;
+
+ atomic_add_64(&zi->zi_call_count, 1);
+ atomic_add_64(&zi->zi_call_time, functime);
+
+ if (zopt_verbose >= 4) {
+ Dl_info dli;
+ (void) dladdr((void *)zi->zi_func, &dli);
+ (void) printf("%6.2f sec in %s\n",
+ (double)functime / NANOSEC, dli.dli_sname);
+ }
+}
+
+static void *
ztest_thread(void *arg)
{
- ztest_args_t *za = arg;
+ uint64_t id = (uintptr_t)arg;
ztest_shared_t *zs = ztest_shared;
- hrtime_t now, functime;
+ uint64_t call_next;
+ hrtime_t now;
ztest_info_t *zi;
- int f, i;
- while ((now = gethrtime()) < za->za_stop) {
+ while ((now = gethrtime()) < zs->zs_thread_stop) {
/*
* See if it's time to force a crash.
*/
- if (now > za->za_kill) {
- zs->zs_alloc = spa_get_alloc(za->za_spa);
- zs->zs_space = spa_get_space(za->za_spa);
- (void) kill(getpid(), SIGKILL);
- }
+ if (now > zs->zs_thread_kill)
+ ztest_kill(zs);
/*
- * Pick a random function.
+ * If we're getting ENOSPC with some regularity, stop.
*/
- f = ztest_random(ZTEST_FUNCS);
- zi = &zs->zs_info[f];
+ if (zs->zs_enospc_count > 10)
+ break;
/*
- * Decide whether to call it, based on the requested frequency.
+ * Pick a random function to execute.
*/
- if (zi->zi_call_target == 0 ||
- (double)zi->zi_call_total / zi->zi_call_target >
- (double)(now - zs->zs_start_time) / (zopt_time * NANOSEC))
- continue;
+ zi = &zs->zs_info[ztest_random(ZTEST_FUNCS)];
+ call_next = zi->zi_call_next;
- atomic_add_64(&zi->zi_calls, 1);
- atomic_add_64(&zi->zi_call_total, 1);
+ if (now >= call_next &&
+ atomic_cas_64(&zi->zi_call_next, call_next, call_next +
+ ztest_random(2 * zi->zi_interval[0] + 1)) == call_next)
+ ztest_execute(zi, id);
+ }
- za->za_diroff = (za->za_instance * ZTEST_FUNCS + f) *
- ZTEST_DIRSIZE;
- za->za_diroff_shared = (1ULL << 63);
+ return (NULL);
+}
- for (i = 0; i < zi->zi_iters; i++)
- zi->zi_func(za);
+static void
+ztest_dataset_name(char *dsname, char *pool, int d)
+{
+ (void) snprintf(dsname, MAXNAMELEN, "%s/ds_%d", pool, d);
+}
- functime = gethrtime() - now;
+static void
+ztest_dataset_destroy(ztest_shared_t *zs, int d)
+{
+ char name[MAXNAMELEN];
- atomic_add_64(&zi->zi_call_time, functime);
+ ztest_dataset_name(name, zs->zs_pool, d);
- if (zopt_verbose >= 4) {
- Dl_info dli;
- (void) dladdr((void *)zi->zi_func, &dli);
- (void) printf("%6.2f sec in %s\n",
- (double)functime / NANOSEC, dli.dli_sname);
- }
+ if (zopt_verbose >= 3)
+ (void) printf("Destroying %s to free up space\n", name);
- /*
- * If we're getting ENOSPC with some regularity, stop.
- */
- if (zs->zs_enospc_count > 10)
- break;
+ /*
+ * Cleanup any non-standard clones and snapshots. In general,
+ * ztest thread t operates on dataset (t % zopt_datasets),
+ * so there may be more than one thing to clean up.
+ */
+ for (int t = d; t < zopt_threads; t += zopt_datasets)
+ ztest_dsl_dataset_cleanup(name, t);
+
+ (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL,
+ DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
+}
+
+static void
+ztest_dataset_dirobj_verify(ztest_ds_t *zd)
+{
+ uint64_t usedobjs, dirobjs, scratch;
+
+ /*
+ * ZTEST_DIROBJ is the object directory for the entire dataset.
+ * Therefore, the number of objects in use should equal the
+ * number of ZTEST_DIROBJ entries, +1 for ZTEST_DIROBJ itself.
+ * If not, we have an object leak.
+ *
+ * Note that we can only check this in ztest_dataset_open(),
+ * when the open-context and syncing-context values agree.
+ * That's because zap_count() returns the open-context value,
+ * while dmu_objset_space() returns the rootbp fill count.
+ */
+ VERIFY3U(0, ==, zap_count(zd->zd_os, ZTEST_DIROBJ, &dirobjs));
+ dmu_objset_space(zd->zd_os, &scratch, &scratch, &usedobjs, &scratch);
+ ASSERT3U(dirobjs + 1, ==, usedobjs);
+}
+
+static int
+ztest_dataset_open(ztest_shared_t *zs, int d)
+{
+ ztest_ds_t *zd = &zs->zs_zd[d];
+ uint64_t committed_seq = zd->zd_seq;
+ objset_t *os;
+ zilog_t *zilog;
+ char name[MAXNAMELEN];
+ int error;
+
+ ztest_dataset_name(name, zs->zs_pool, d);
+
+ (void) rw_rdlock(&zs->zs_name_lock);
+
+ error = ztest_dataset_create(name);
+ if (error == ENOSPC) {
+ (void) rw_unlock(&zs->zs_name_lock);
+ ztest_record_enospc(FTAG);
+ return (error);
}
+ ASSERT(error == 0 || error == EEXIST);
- return (NULL);
+ VERIFY3U(dmu_objset_hold(name, zd, &os), ==, 0);
+ (void) rw_unlock(&zs->zs_name_lock);
+
+ ztest_zd_init(zd, os);
+
+ zilog = zd->zd_zilog;
+
+ if (zilog->zl_header->zh_claim_lr_seq != 0 &&
+ zilog->zl_header->zh_claim_lr_seq < committed_seq)
+ fatal(0, "missing log records: claimed %llu < committed %llu",
+ zilog->zl_header->zh_claim_lr_seq, committed_seq);
+
+ ztest_dataset_dirobj_verify(zd);
+
+ zil_replay(os, zd, ztest_replay_vector);
+
+ ztest_dataset_dirobj_verify(zd);
+
+ if (zopt_verbose >= 6)
+ (void) printf("%s replay %llu blocks, %llu records, seq %llu\n",
+ zd->zd_name,
+ (u_longlong_t)zilog->zl_parse_blk_count,
+ (u_longlong_t)zilog->zl_parse_lr_count,
+ (u_longlong_t)zilog->zl_replaying_seq);
+
+ zilog = zil_open(os, ztest_get_data);
+
+ if (zilog->zl_replaying_seq != 0 &&
+ zilog->zl_replaying_seq < committed_seq)
+ fatal(0, "missing log records: replayed %llu < committed %llu",
+ zilog->zl_replaying_seq, committed_seq);
+
+ return (0);
+}
+
+static void
+ztest_dataset_close(ztest_shared_t *zs, int d)
+{
+ ztest_ds_t *zd = &zs->zs_zd[d];
+
+ zil_close(zd->zd_zilog);
+ dmu_objset_rele(zd->zd_os, zd);
+
+ ztest_zd_fini(zd);
}
/*
* Kick off threads to run tests on all datasets in parallel.
*/
static void
-ztest_run(char *pool)
+ztest_run(ztest_shared_t *zs)
{
- int t, d, error;
- ztest_shared_t *zs = ztest_shared;
- ztest_args_t *za;
+ thread_t *tid;
spa_t *spa;
- char name[100];
thread_t resume_tid;
+ int error;
ztest_exiting = B_FALSE;
- (void) _mutex_init(&zs->zs_vdev_lock, USYNC_THREAD, NULL);
- (void) rwlock_init(&zs->zs_name_lock, USYNC_THREAD, NULL);
-
- for (t = 0; t < ZTEST_SYNC_LOCKS; t++)
- (void) _mutex_init(&zs->zs_sync_lock[t], USYNC_THREAD, NULL);
-
- /*
- * Destroy one disk before we even start.
- * It's mirrored, so everything should work just fine.
- * This makes us exercise fault handling very early in spa_load().
- */
- ztest_obliterate_one_disk(0);
-
- /*
- * Verify that the sum of the sizes of all blocks in the pool
- * equals the SPA's allocated space total.
- */
- ztest_verify_blocks(pool);
-
/*
- * Kick off a replacement of the disk we just obliterated.
+ * Initialize parent/child shared state.
*/
- kernel_init(FREAD | FWRITE);
- VERIFY(spa_open(pool, &spa, FTAG) == 0);
- ztest_replace_one_disk(spa, 0);
- if (zopt_verbose >= 5)
- show_pool_stats(spa);
- spa_close(spa, FTAG);
- kernel_fini();
+ VERIFY(_mutex_init(&zs->zs_vdev_lock, USYNC_THREAD, NULL) == 0);
+ VERIFY(rwlock_init(&zs->zs_name_lock, USYNC_THREAD, NULL) == 0);
- kernel_init(FREAD | FWRITE);
+ zs->zs_thread_start = gethrtime();
+ zs->zs_thread_stop = zs->zs_thread_start + zopt_passtime * NANOSEC;
+ zs->zs_thread_stop = MIN(zs->zs_thread_stop, zs->zs_proc_stop);
+ zs->zs_thread_kill = zs->zs_thread_stop;
+ if (ztest_random(100) < zopt_killrate)
+ zs->zs_thread_kill -= ztest_random(zopt_passtime * NANOSEC);
- /*
- * Verify that we can export the pool and reimport it under a
- * different name.
- */
- if (ztest_random(2) == 0) {
- (void) snprintf(name, 100, "%s_import", pool);
- ztest_spa_import_export(pool, name);
- ztest_spa_import_export(name, pool);
- }
+ (void) _mutex_init(&zcl.zcl_callbacks_lock, USYNC_THREAD, NULL);
- /*
- * Verify that we can loop over all pools.
- */
- mutex_enter(&spa_namespace_lock);
- for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa)) {
- if (zopt_verbose > 3) {
- (void) printf("spa_next: found %s\n", spa_name(spa));
- }
- }
- mutex_exit(&spa_namespace_lock);
+ list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t),
+ offsetof(ztest_cb_data_t, zcd_node));
/*
* Open our pool.
*/
- VERIFY(spa_open(pool, &spa, FTAG) == 0);
+ kernel_init(FREAD | FWRITE);
+ VERIFY(spa_open(zs->zs_pool, &spa, FTAG) == 0);
+ zs->zs_spa = spa;
+
+ spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;
/*
* We don't expect the pool to suspend unless maxfaults == 0,
* in which case ztest_fault_inject() temporarily takes away
* the only valid replica.
*/
- if (zopt_maxfaults == 0)
+ if (MAXFAULTS() == 0)
spa->spa_failmode = ZIO_FAILURE_MODE_WAIT;
else
spa->spa_failmode = ZIO_FAILURE_MODE_PANIC;
@@ -3688,13 +5158,19 @@ ztest_run(char *pool)
&resume_tid) == 0);
/*
+ * Create a deadman thread to abort() if we hang.
+ */
+ VERIFY(thr_create(0, 0, ztest_deadman_thread, zs, THR_BOUND,
+ NULL) == 0);
+
+ /*
* Verify that we can safely inquire about about any object,
* whether it's allocated or not. To make it interesting,
* we probe a 5-wide window around each power of two.
* This hits all edge cases, including zero and the max.
*/
- for (t = 0; t < 64; t++) {
- for (d = -5; d <= 5; d++) {
+ for (int t = 0; t < 64; t++) {
+ for (int d = -5; d <= 5; d++) {
error = dmu_object_info(spa->spa_meta_objset,
(1ULL << t) + d, NULL);
ASSERT(error == 0 || error == ENOENT ||
@@ -3703,101 +5179,45 @@ ztest_run(char *pool)
}
/*
- * Now kick off all the tests that run in parallel.
+ * If we got any ENOSPC errors on the previous run, destroy something.
*/
+ if (zs->zs_enospc_count != 0) {
+ int d = ztest_random(zopt_datasets);
+ ztest_dataset_destroy(zs, d);
+ }
zs->zs_enospc_count = 0;
- za = umem_zalloc(zopt_threads * sizeof (ztest_args_t), UMEM_NOFAIL);
+ tid = umem_zalloc(zopt_threads * sizeof (thread_t), UMEM_NOFAIL);
if (zopt_verbose >= 4)
(void) printf("starting main threads...\n");
- za[0].za_start = gethrtime();
- za[0].za_stop = za[0].za_start + zopt_passtime * NANOSEC;
- za[0].za_stop = MIN(za[0].za_stop, zs->zs_stop_time);
- za[0].za_kill = za[0].za_stop;
- if (ztest_random(100) < zopt_killrate)
- za[0].za_kill -= ztest_random(zopt_passtime * NANOSEC);
-
- for (t = 0; t < zopt_threads; t++) {
- d = t % zopt_datasets;
-
- (void) strcpy(za[t].za_pool, pool);
- za[t].za_os = za[d].za_os;
- za[t].za_spa = spa;
- za[t].za_zilog = za[d].za_zilog;
- za[t].za_instance = t;
- za[t].za_random = ztest_random(-1ULL);
- za[t].za_start = za[0].za_start;
- za[t].za_stop = za[0].za_stop;
- za[t].za_kill = za[0].za_kill;
-
- if (t < zopt_datasets) {
- int test_future = FALSE;
- (void) rw_rdlock(&ztest_shared->zs_name_lock);
- (void) snprintf(name, 100, "%s/%s_%d", pool, pool, d);
- error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0,
- ztest_create_cb, NULL);
- if (error == EEXIST) {
- test_future = TRUE;
- } else if (error == ENOSPC) {
- zs->zs_enospc_count++;
- (void) rw_unlock(&ztest_shared->zs_name_lock);
- break;
- } else if (error != 0) {
- fatal(0, "dmu_objset_create(%s) = %d",
- name, error);
- }
- error = dmu_objset_open(name, DMU_OST_OTHER,
- DS_MODE_USER, &za[d].za_os);
- if (error)
- fatal(0, "dmu_objset_open('%s') = %d",
- name, error);
- (void) rw_unlock(&ztest_shared->zs_name_lock);
- if (test_future)
- ztest_dmu_check_future_leak(&za[t]);
- zil_replay(za[d].za_os, za[d].za_os,
- ztest_replay_vector);
- za[d].za_zilog = zil_open(za[d].za_os, NULL);
- }
-
- VERIFY(thr_create(0, 0, ztest_thread, &za[t], THR_BOUND,
- &za[t].za_thread) == 0);
- }
-
- while (--t >= 0) {
- VERIFY(thr_join(za[t].za_thread, NULL, NULL) == 0);
- if (t < zopt_datasets) {
- zil_close(za[t].za_zilog);
- dmu_objset_close(za[t].za_os);
- }
+ /*
+ * Kick off all the tests that run in parallel.
+ */
+ for (int t = 0; t < zopt_threads; t++) {
+ if (t < zopt_datasets && ztest_dataset_open(zs, t) != 0)
+ return;
+ VERIFY(thr_create(0, 0, ztest_thread, (void *)(uintptr_t)t,
+ THR_BOUND, &tid[t]) == 0);
}
- if (zopt_verbose >= 3)
- show_pool_stats(spa);
-
- txg_wait_synced(spa_get_dsl(spa), 0);
-
- zs->zs_alloc = spa_get_alloc(spa);
- zs->zs_space = spa_get_space(spa);
-
/*
- * If we had out-of-space errors, destroy a random objset.
+ * Wait for all of the tests to complete. We go in reverse order
+ * so we don't close datasets while threads are still using them.
*/
- if (zs->zs_enospc_count != 0) {
- (void) rw_rdlock(&ztest_shared->zs_name_lock);
- d = (int)ztest_random(zopt_datasets);
- (void) snprintf(name, 100, "%s/%s_%d", pool, pool, d);
- if (zopt_verbose >= 3)
- (void) printf("Destroying %s to free up space\n", name);
- (void) dmu_objset_find(name, ztest_destroy_cb, &za[d],
- DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
- (void) rw_unlock(&ztest_shared->zs_name_lock);
+ for (int t = zopt_threads - 1; t >= 0; t--) {
+ VERIFY(thr_join(tid[t], NULL, NULL) == 0);
+ if (t < zopt_datasets)
+ ztest_dataset_close(zs, t);
}
txg_wait_synced(spa_get_dsl(spa), 0);
- umem_free(za, zopt_threads * sizeof (ztest_args_t));
+ zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
+ zs->zs_space = metaslab_class_get_space(spa_normal_class(spa));
+
+ umem_free(tid, zopt_threads * sizeof (thread_t));
/* Kill the resume thread */
ztest_exiting = B_TRUE;
@@ -3808,11 +5228,107 @@ ztest_run(char *pool)
* Right before closing the pool, kick off a bunch of async I/O;
* spa_close() should wait for it to complete.
*/
- for (t = 1; t < 50; t++)
- dmu_prefetch(spa->spa_meta_objset, t, 0, 1 << 15);
+ for (uint64_t object = 1; object < 50; object++)
+ dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20);
spa_close(spa, FTAG);
+ /*
+ * Verify that we can loop over all pools.
+ */
+ mutex_enter(&spa_namespace_lock);
+ for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa))
+ if (zopt_verbose > 3)
+ (void) printf("spa_next: found %s\n", spa_name(spa));
+ mutex_exit(&spa_namespace_lock);
+
+ /*
+ * Verify that we can export the pool and reimport it under a
+ * different name.
+ */
+ if (ztest_random(2) == 0) {
+ char name[MAXNAMELEN];
+ (void) snprintf(name, MAXNAMELEN, "%s_import", zs->zs_pool);
+ ztest_spa_import_export(zs->zs_pool, name);
+ ztest_spa_import_export(name, zs->zs_pool);
+ }
+
+ kernel_fini();
+
+ list_destroy(&zcl.zcl_callbacks);
+
+ (void) _mutex_destroy(&zcl.zcl_callbacks_lock);
+
+ (void) rwlock_destroy(&zs->zs_name_lock);
+ (void) _mutex_destroy(&zs->zs_vdev_lock);
+}
+
+static void
+ztest_freeze(ztest_shared_t *zs)
+{
+ ztest_ds_t *zd = &zs->zs_zd[0];
+ spa_t *spa;
+ int numloops = 0;
+
+ if (zopt_verbose >= 3)
+ (void) printf("testing spa_freeze()...\n");
+
+ kernel_init(FREAD | FWRITE);
+ VERIFY3U(0, ==, spa_open(zs->zs_pool, &spa, FTAG));
+ VERIFY3U(0, ==, ztest_dataset_open(zs, 0));
+
+ /*
+ * Force the first log block to be transactionally allocated.
+ * We have to do this before we freeze the pool -- otherwise
+ * the log chain won't be anchored.
+ */
+ while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) {
+ ztest_dmu_object_alloc_free(zd, 0);
+ zil_commit(zd->zd_zilog, 0);
+ }
+
+ txg_wait_synced(spa_get_dsl(spa), 0);
+
+ /*
+ * Freeze the pool. This stops spa_sync() from doing anything,
+ * so that the only way to record changes from now on is the ZIL.
+ */
+ spa_freeze(spa);
+
+ /*
+ * Run tests that generate log records but don't alter the pool config
+ * or depend on DSL sync tasks (snapshots, objset create/destroy, etc).
+ * We do a txg_wait_synced() after each iteration to force the txg
+ * to increase well beyond the last synced value in the uberblock.
+ * The ZIL should be OK with that.
+ */
+ while (ztest_random(10) != 0 && numloops++ < zopt_maxloops) {
+ ztest_dmu_write_parallel(zd, 0);
+ ztest_dmu_object_alloc_free(zd, 0);
+ txg_wait_synced(spa_get_dsl(spa), 0);
+ }
+
+ /*
+ * Commit all of the changes we just generated.
+ */
+ zil_commit(zd->zd_zilog, 0);
+ txg_wait_synced(spa_get_dsl(spa), 0);
+
+ /*
+ * Close our dataset and close the pool.
+ */
+ ztest_dataset_close(zs, 0);
+ spa_close(spa, FTAG);
+ kernel_fini();
+
+ /*
+ * Open and close the pool and dataset to induce log replay.
+ */
+ kernel_init(FREAD | FWRITE);
+ VERIFY3U(0, ==, spa_open(zs->zs_pool, &spa, FTAG));
+ VERIFY3U(0, ==, ztest_dataset_open(zs, 0));
+ ztest_dataset_close(zs, 0);
+ spa_close(spa, FTAG);
kernel_fini();
}
@@ -3841,43 +5357,65 @@ print_time(hrtime_t t, char *timebuf)
(void) sprintf(timebuf, "%llus", s);
}
+static nvlist_t *
+make_random_props()
+{
+ nvlist_t *props;
+
+ if (ztest_random(2) == 0)
+ return (NULL);
+
+ VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
+ VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0);
+
+ (void) printf("props:\n");
+ dump_nvlist(props, 4);
+
+ return (props);
+}
+
/*
* Create a storage pool with the given name and initial vdev size.
- * Then create the specified number of datasets in the pool.
+ * Then test spa_freeze() functionality.
*/
static void
-ztest_init(char *pool)
+ztest_init(ztest_shared_t *zs)
{
spa_t *spa;
- int error;
- nvlist_t *nvroot;
+ nvlist_t *nvroot, *props;
+
+ VERIFY(_mutex_init(&zs->zs_vdev_lock, USYNC_THREAD, NULL) == 0);
+ VERIFY(rwlock_init(&zs->zs_name_lock, USYNC_THREAD, NULL) == 0);
kernel_init(FREAD | FWRITE);
/*
* Create the storage pool.
*/
- (void) spa_destroy(pool);
- ztest_shared->zs_vdev_primaries = 0;
+ (void) spa_destroy(zs->zs_pool);
+ ztest_shared->zs_vdev_next_leaf = 0;
+ zs->zs_splits = 0;
+ zs->zs_mirrors = zopt_mirrors;
nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0,
- 0, zopt_raidz, zopt_mirrors, 1);
- error = spa_create(pool, nvroot, NULL, NULL, NULL);
+ 0, zopt_raidz, zs->zs_mirrors, 1);
+ props = make_random_props();
+ VERIFY3U(0, ==, spa_create(zs->zs_pool, nvroot, props, NULL, NULL));
nvlist_free(nvroot);
- if (error)
- fatal(0, "spa_create() = %d", error);
- error = spa_open(pool, &spa, FTAG);
- if (error)
- fatal(0, "spa_open() = %d", error);
-
+ VERIFY3U(0, ==, spa_open(zs->zs_pool, &spa, FTAG));
metaslab_sz = 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
-
- if (zopt_verbose >= 3)
- show_pool_stats(spa);
-
spa_close(spa, FTAG);
kernel_fini();
+
+ ztest_run_zdb(zs->zs_pool);
+
+ ztest_freeze(zs);
+
+ ztest_run_zdb(zs->zs_pool);
+
+ (void) rwlock_destroy(&zs->zs_name_lock);
+ (void) _mutex_destroy(&zs->zs_vdev_lock);
}
int
@@ -3885,29 +5423,32 @@ main(int argc, char **argv)
{
int kills = 0;
int iters = 0;
- int i, f;
ztest_shared_t *zs;
+ size_t shared_size;
ztest_info_t *zi;
char timebuf[100];
char numbuf[6];
+ spa_t *spa;
(void) setvbuf(stdout, NULL, _IOLBF, 0);
- /* Override location of zpool.cache */
- spa_config_path = "/tmp/zpool.cache";
-
ztest_random_fd = open("/dev/urandom", O_RDONLY);
process_options(argc, argv);
+ /* Override location of zpool.cache */
+ (void) asprintf((char **)&spa_config_path, "%s/zpool.cache", zopt_dir);
+
/*
* Blow away any existing copy of zpool.cache
*/
if (zopt_init != 0)
- (void) remove("/tmp/zpool.cache");
+ (void) remove(spa_config_path);
+
+ shared_size = sizeof (*zs) + zopt_datasets * sizeof (ztest_ds_t);
zs = ztest_shared = (void *)mmap(0,
- P2ROUNDUP(sizeof (ztest_shared_t), getpagesize()),
+ P2ROUNDUP(shared_size, getpagesize()),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
if (zopt_verbose >= 1) {
@@ -3920,46 +5461,43 @@ main(int argc, char **argv)
/*
* Create and initialize our storage pool.
*/
- for (i = 1; i <= zopt_init; i++) {
+ for (int i = 1; i <= zopt_init; i++) {
bzero(zs, sizeof (ztest_shared_t));
if (zopt_verbose >= 3 && zopt_init != 1)
(void) printf("ztest_init(), pass %d\n", i);
- ztest_init(zopt_pool);
+ zs->zs_pool = zopt_pool;
+ ztest_init(zs);
}
- /*
- * Initialize the call targets for each function.
- */
- for (f = 0; f < ZTEST_FUNCS; f++) {
- zi = &zs->zs_info[f];
+ zs->zs_pool = zopt_pool;
+ zs->zs_proc_start = gethrtime();
+ zs->zs_proc_stop = zs->zs_proc_start + zopt_time * NANOSEC;
+ for (int f = 0; f < ZTEST_FUNCS; f++) {
+ zi = &zs->zs_info[f];
*zi = ztest_info[f];
-
- if (*zi->zi_interval == 0)
- zi->zi_call_target = UINT64_MAX;
+ if (zs->zs_proc_start + zi->zi_interval[0] > zs->zs_proc_stop)
+ zi->zi_call_next = UINT64_MAX;
else
- zi->zi_call_target = zopt_time / *zi->zi_interval;
+ zi->zi_call_next = zs->zs_proc_start +
+ ztest_random(2 * zi->zi_interval[0] + 1);
}
- zs->zs_start_time = gethrtime();
- zs->zs_stop_time = zs->zs_start_time + zopt_time * NANOSEC;
-
/*
* Run the tests in a loop. These tests include fault injection
* to verify that self-healing data works, and forced crashes
* to verify that we never lose on-disk consistency.
*/
- while (gethrtime() < zs->zs_stop_time) {
+ while (gethrtime() < zs->zs_proc_stop) {
int status;
pid_t pid;
- char *tmp;
/*
* Initialize the workload counters for each function.
*/
- for (f = 0; f < ZTEST_FUNCS; f++) {
+ for (int f = 0; f < ZTEST_FUNCS; f++) {
zi = &zs->zs_info[f];
- zi->zi_calls = 0;
+ zi->zi_call_count = 0;
zi->zi_call_time = 0;
}
@@ -3975,7 +5513,7 @@ main(int argc, char **argv)
struct rlimit rl = { 1024, 1024 };
(void) setrlimit(RLIMIT_NOFILE, &rl);
(void) enable_extended_FILE_stdio(-1, -1);
- ztest_run(zopt_pool);
+ ztest_run(zs);
exit(0);
}
@@ -4008,8 +5546,8 @@ main(int argc, char **argv)
if (zopt_verbose >= 1) {
hrtime_t now = gethrtime();
- now = MIN(now, zs->zs_stop_time);
- print_time(zs->zs_stop_time - now, timebuf);
+ now = MIN(now, zs->zs_proc_stop);
+ print_time(zs->zs_proc_stop - now, timebuf);
nicenum(zs->zs_space, numbuf);
(void) printf("Pass %3d, %8s, %3llu ENOSPC, "
@@ -4019,7 +5557,7 @@ main(int argc, char **argv)
(u_longlong_t)zs->zs_enospc_count,
100.0 * zs->zs_alloc / zs->zs_space,
numbuf,
- 100.0 * (now - zs->zs_start_time) /
+ 100.0 * (now - zs->zs_proc_start) /
(zopt_time * NANOSEC), timebuf);
}
@@ -4029,34 +5567,39 @@ main(int argc, char **argv)
"Calls", "Time", "Function");
(void) printf("%7s %9s %s\n",
"-----", "----", "--------");
- for (f = 0; f < ZTEST_FUNCS; f++) {
+ for (int f = 0; f < ZTEST_FUNCS; f++) {
Dl_info dli;
zi = &zs->zs_info[f];
print_time(zi->zi_call_time, timebuf);
(void) dladdr((void *)zi->zi_func, &dli);
(void) printf("%7llu %9s %s\n",
- (u_longlong_t)zi->zi_calls, timebuf,
+ (u_longlong_t)zi->zi_call_count, timebuf,
dli.dli_sname);
}
(void) printf("\n");
}
/*
- * It's possible that we killed a child during a rename test, in
- * which case we'll have a 'ztest_tmp' pool lying around instead
- * of 'ztest'. Do a blind rename in case this happened.
+ * It's possible that we killed a child during a rename test,
+ * in which case we'll have a 'ztest_tmp' pool lying around
+ * instead of 'ztest'. Do a blind rename in case this happened.
*/
- tmp = umem_alloc(strlen(zopt_pool) + 5, UMEM_NOFAIL);
- (void) strcpy(tmp, zopt_pool);
- (void) strcat(tmp, "_tmp");
- kernel_init(FREAD | FWRITE);
- (void) spa_rename(tmp, zopt_pool);
+ kernel_init(FREAD);
+ if (spa_open(zopt_pool, &spa, FTAG) == 0) {
+ spa_close(spa, FTAG);
+ } else {
+ char tmpname[MAXNAMELEN];
+ kernel_fini();
+ kernel_init(FREAD | FWRITE);
+ (void) snprintf(tmpname, sizeof (tmpname), "%s_tmp",
+ zopt_pool);
+ (void) spa_rename(tmpname, zopt_pool);
+ }
kernel_fini();
- umem_free(tmp, strlen(tmp) + 1);
- }
- ztest_verify_blocks(zopt_pool);
+ ztest_run_zdb(zopt_pool);
+ }
if (zopt_verbose >= 1) {
(void) printf("%d killed, %d completed, %.0f%% kill rate\n",
diff --git a/cddl/contrib/opensolaris/head/synch.h b/cddl/contrib/opensolaris/head/synch.h
index eab9de8..89efe9c 100644
--- a/cddl/contrib/opensolaris/head/synch.h
+++ b/cddl/contrib/opensolaris/head/synch.h
@@ -20,15 +20,12 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYNCH_H
#define _SYNCH_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* synch.h:
* definitions needed to use the thread synchronization interface
@@ -243,10 +240,17 @@ int sema_trywait();
#ifdef __STDC__
-int _sema_held(sema_t *);
-int _rw_read_held(rwlock_t *);
-int _rw_write_held(rwlock_t *);
-int _mutex_held(mutex_t *);
+/*
+ * The *_held() functions apply equally well to Solaris threads
+ * and to Posix threads synchronization objects, but the formal
+ * type declarations are different, so we just declare the argument
+ * to each *_held() function to be a void *, expecting that they will
+ * be called with the proper type of argument in each case.
+ */
+int _sema_held(void *); /* sema_t or sem_t */
+int _rw_read_held(void *); /* rwlock_t or pthread_rwlock_t */
+int _rw_write_held(void *); /* rwlock_t or pthread_rwlock_t */
+int _mutex_held(void *); /* mutex_t or pthread_mutex_t */
#else /* __STDC__ */
@@ -257,6 +261,13 @@ int _mutex_held();
#endif /* __STDC__ */
+/* Pause API */
+#ifdef __STDC__
+void smt_pause(void);
+#else /* __STDC__ */
+void smt_pause();
+#endif /* __STDC__ */
+
#endif /* _ASM */
#ifdef __cplusplus
diff --git a/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.c b/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.c
index 89e01dd..1425748 100644
--- a/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.c
+++ b/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.c
@@ -19,15 +19,15 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
+#include <solaris.h>
#include <inttypes.h>
#include <unistd.h>
#include <strings.h>
+#include <libintl.h>
+#include <stdarg.h>
#include "libnvpair.h"
/*
@@ -38,21 +38,531 @@
* between kernel and userland, and possibly saving onto disk files.
*/
+/*
+ * Print control structure.
+ */
+
+#define DEFINEOP(opname, vtype) \
+ struct { \
+ int (*op)(struct nvlist_prtctl *, void *, nvlist_t *, \
+ const char *, vtype); \
+ void *arg; \
+ } opname
+
+#define DEFINEARROP(opname, vtype) \
+ struct { \
+ int (*op)(struct nvlist_prtctl *, void *, nvlist_t *, \
+ const char *, vtype, uint_t); \
+ void *arg; \
+ } opname
+
+struct nvlist_printops {
+ DEFINEOP(print_boolean, int);
+ DEFINEOP(print_boolean_value, boolean_t);
+ DEFINEOP(print_byte, uchar_t);
+ DEFINEOP(print_int8, int8_t);
+ DEFINEOP(print_uint8, uint8_t);
+ DEFINEOP(print_int16, int16_t);
+ DEFINEOP(print_uint16, uint16_t);
+ DEFINEOP(print_int32, int32_t);
+ DEFINEOP(print_uint32, uint32_t);
+ DEFINEOP(print_int64, int64_t);
+ DEFINEOP(print_uint64, uint64_t);
+ DEFINEOP(print_double, double);
+ DEFINEOP(print_string, char *);
+ DEFINEOP(print_hrtime, hrtime_t);
+ DEFINEOP(print_nvlist, nvlist_t *);
+ DEFINEARROP(print_boolean_array, boolean_t *);
+ DEFINEARROP(print_byte_array, uchar_t *);
+ DEFINEARROP(print_int8_array, int8_t *);
+ DEFINEARROP(print_uint8_array, uint8_t *);
+ DEFINEARROP(print_int16_array, int16_t *);
+ DEFINEARROP(print_uint16_array, uint16_t *);
+ DEFINEARROP(print_int32_array, int32_t *);
+ DEFINEARROP(print_uint32_array, uint32_t *);
+ DEFINEARROP(print_int64_array, int64_t *);
+ DEFINEARROP(print_uint64_array, uint64_t *);
+ DEFINEARROP(print_string_array, char **);
+ DEFINEARROP(print_nvlist_array, nvlist_t **);
+};
+
+struct nvlist_prtctl {
+ FILE *nvprt_fp; /* output destination */
+ enum nvlist_indent_mode nvprt_indent_mode; /* see above */
+ int nvprt_indent; /* absolute indent, or tab depth */
+ int nvprt_indentinc; /* indent or tab increment */
+ const char *nvprt_nmfmt; /* member name format, max one %s */
+ const char *nvprt_eomfmt; /* after member format, e.g. "\n" */
+ const char *nvprt_btwnarrfmt; /* between array members */
+ int nvprt_btwnarrfmt_nl; /* nvprt_eoamfmt includes newline? */
+ struct nvlist_printops *nvprt_dfltops;
+ struct nvlist_printops *nvprt_custops;
+};
+
+#define DFLTPRTOP(pctl, type) \
+ ((pctl)->nvprt_dfltops->print_##type.op)
+
+#define DFLTPRTOPARG(pctl, type) \
+ ((pctl)->nvprt_dfltops->print_##type.arg)
+
+#define CUSTPRTOP(pctl, type) \
+ ((pctl)->nvprt_custops->print_##type.op)
+
+#define CUSTPRTOPARG(pctl, type) \
+ ((pctl)->nvprt_custops->print_##type.arg)
+
+#define RENDER(pctl, type, nvl, name, val) \
+ { \
+ int done = 0; \
+ if ((pctl)->nvprt_custops && CUSTPRTOP(pctl, type)) { \
+ done = CUSTPRTOP(pctl, type)(pctl, \
+ CUSTPRTOPARG(pctl, type), nvl, name, val); \
+ } \
+ if (!done) { \
+ (void) DFLTPRTOP(pctl, type)(pctl, \
+ DFLTPRTOPARG(pctl, type), nvl, name, val); \
+ } \
+ (void) fprintf(pctl->nvprt_fp, pctl->nvprt_eomfmt); \
+ }
+
+#define ARENDER(pctl, type, nvl, name, arrp, count) \
+ { \
+ int done = 0; \
+ if ((pctl)->nvprt_custops && CUSTPRTOP(pctl, type)) { \
+ done = CUSTPRTOP(pctl, type)(pctl, \
+ CUSTPRTOPARG(pctl, type), nvl, name, arrp, count); \
+ } \
+ if (!done) { \
+ (void) DFLTPRTOP(pctl, type)(pctl, \
+ DFLTPRTOPARG(pctl, type), nvl, name, arrp, count); \
+ } \
+ (void) fprintf(pctl->nvprt_fp, pctl->nvprt_eomfmt); \
+ }
+
+static void nvlist_print_with_indent(nvlist_t *, nvlist_prtctl_t);
+
+/*
+ * ======================================================================
+ * | |
+ * | Indentation |
+ * | |
+ * ======================================================================
+ */
+
static void
-indent(FILE *fp, int depth)
+indent(nvlist_prtctl_t pctl, int onemore)
{
- while (depth-- > 0)
- (void) fprintf(fp, "\t");
+ int depth;
+
+ switch (pctl->nvprt_indent_mode) {
+ case NVLIST_INDENT_ABS:
+ (void) fprintf(pctl->nvprt_fp, "%*s",
+ pctl->nvprt_indent + onemore * pctl->nvprt_indentinc, "");
+ break;
+
+ case NVLIST_INDENT_TABBED:
+ depth = pctl->nvprt_indent + onemore;
+ while (depth-- > 0)
+ (void) fprintf(pctl->nvprt_fp, "\t");
+ }
}
/*
- * nvlist_print - Prints elements in an event buffer
+ * ======================================================================
+ * | |
+ * | Default nvlist member rendering functions. |
+ * | |
+ * ======================================================================
+ */
+
+/*
+ * Generate functions to print single-valued nvlist members.
+ *
+ * type_and_variant - suffix to form function name
+ * vtype - C type for the member value
+ * ptype - C type to cast value to for printing
+ * vfmt - format string for pair value, e.g "%d" or "0x%llx"
+ */
+
+#define NVLIST_PRTFUNC(type_and_variant, vtype, ptype, vfmt) \
+static int \
+nvprint_##type_and_variant(nvlist_prtctl_t pctl, void *private, \
+ nvlist_t *nvl, const char *name, vtype value) \
+{ \
+ FILE *fp = pctl->nvprt_fp; \
+ NOTE(ARGUNUSED(private)) \
+ NOTE(ARGUNUSED(nvl)) \
+ indent(pctl, 1); \
+ (void) fprintf(fp, pctl->nvprt_nmfmt, name); \
+ (void) fprintf(fp, vfmt, (ptype)value); \
+ return (1); \
+}
+
+NVLIST_PRTFUNC(boolean, int, int, "%d")
+NVLIST_PRTFUNC(boolean_value, boolean_t, int, "%d")
+NVLIST_PRTFUNC(byte, uchar_t, uchar_t, "0x%2.2x")
+NVLIST_PRTFUNC(int8, int8_t, int, "%d")
+NVLIST_PRTFUNC(uint8, uint8_t, uint8_t, "0x%x")
+NVLIST_PRTFUNC(int16, int16_t, int16_t, "%d")
+NVLIST_PRTFUNC(uint16, uint16_t, uint16_t, "0x%x")
+NVLIST_PRTFUNC(int32, int32_t, int32_t, "%d")
+NVLIST_PRTFUNC(uint32, uint32_t, uint32_t, "0x%x")
+NVLIST_PRTFUNC(int64, int64_t, longlong_t, "%lld")
+NVLIST_PRTFUNC(uint64, uint64_t, u_longlong_t, "0x%llx")
+NVLIST_PRTFUNC(double, double, double, "0x%llf")
+NVLIST_PRTFUNC(string, char *, char *, "%s")
+NVLIST_PRTFUNC(hrtime, hrtime_t, hrtime_t, "0x%llx")
+
+/*
+ * Generate functions to print array-valued nvlist members.
+ */
+
+#define NVLIST_ARRPRTFUNC(type_and_variant, vtype, ptype, vfmt) \
+static int \
+nvaprint_##type_and_variant(nvlist_prtctl_t pctl, void *private, \
+ nvlist_t *nvl, const char *name, vtype *valuep, uint_t count) \
+{ \
+ FILE *fp = pctl->nvprt_fp; \
+ uint_t i; \
+ NOTE(ARGUNUSED(private)) \
+ NOTE(ARGUNUSED(nvl)) \
+ for (i = 0; i < count; i++) { \
+ if (i == 0 || pctl->nvprt_btwnarrfmt_nl) { \
+ indent(pctl, 1); \
+ (void) fprintf(fp, pctl->nvprt_nmfmt, name); \
+ if (pctl->nvprt_btwnarrfmt_nl) \
+ (void) fprintf(fp, "[%d]: ", i); \
+ } \
+ if (i != 0) \
+ (void) fprintf(fp, pctl->nvprt_btwnarrfmt); \
+ (void) fprintf(fp, vfmt, (ptype)valuep[i]); \
+ } \
+ return (1); \
+}
+
+NVLIST_ARRPRTFUNC(boolean_array, boolean_t, boolean_t, "%d")
+NVLIST_ARRPRTFUNC(byte_array, uchar_t, uchar_t, "0x%2.2x")
+NVLIST_ARRPRTFUNC(int8_array, int8_t, int8_t, "%d")
+NVLIST_ARRPRTFUNC(uint8_array, uint8_t, uint8_t, "0x%x")
+NVLIST_ARRPRTFUNC(int16_array, int16_t, int16_t, "%d")
+NVLIST_ARRPRTFUNC(uint16_array, uint16_t, uint16_t, "0x%x")
+NVLIST_ARRPRTFUNC(int32_array, int32_t, int32_t, "%d")
+NVLIST_ARRPRTFUNC(uint32_array, uint32_t, uint32_t, "0x%x")
+NVLIST_ARRPRTFUNC(int64_array, int64_t, longlong_t, "%lld")
+NVLIST_ARRPRTFUNC(uint64_array, uint64_t, u_longlong_t, "0x%llx")
+NVLIST_ARRPRTFUNC(string_array, char *, char *, "%s")
+
+/*ARGSUSED*/
+static int
+nvprint_nvlist(nvlist_prtctl_t pctl, void *private,
+ nvlist_t *nvl, const char *name, nvlist_t *value)
+{
+ FILE *fp = pctl->nvprt_fp;
+
+ indent(pctl, 1);
+ (void) fprintf(fp, "%s = (embedded nvlist)\n", name);
+
+ pctl->nvprt_indent += pctl->nvprt_indentinc;
+ nvlist_print_with_indent(value, pctl);
+ pctl->nvprt_indent -= pctl->nvprt_indentinc;
+
+ indent(pctl, 1);
+ (void) fprintf(fp, "(end %s)\n", name);
+
+ return (1);
+}
+
+/*ARGSUSED*/
+static int
+nvaprint_nvlist_array(nvlist_prtctl_t pctl, void *private,
+ nvlist_t *nvl, const char *name, nvlist_t **valuep, uint_t count)
+{
+ FILE *fp = pctl->nvprt_fp;
+ uint_t i;
+
+ indent(pctl, 1);
+ (void) fprintf(fp, "%s = (array of embedded nvlists)\n", name);
+
+ for (i = 0; i < count; i++) {
+ indent(pctl, 1);
+ (void) fprintf(fp, "(start %s[%d])\n", name, i);
+
+ pctl->nvprt_indent += pctl->nvprt_indentinc;
+ nvlist_print_with_indent(valuep[i], pctl);
+ pctl->nvprt_indent -= pctl->nvprt_indentinc;
+
+ indent(pctl, 1);
+ (void) fprintf(fp, "(end %s[%d])\n", name, i);
+ }
+
+ return (1);
+}
+
+/*
+ * ======================================================================
+ * | |
+ * | Interfaces that allow control over formatting. |
+ * | |
+ * ======================================================================
+ */
+
+void
+nvlist_prtctl_setdest(nvlist_prtctl_t pctl, FILE *fp)
+{
+ pctl->nvprt_fp = fp;
+}
+
+FILE *
+nvlist_prtctl_getdest(nvlist_prtctl_t pctl)
+{
+ return (pctl->nvprt_fp);
+}
+
+
+void
+nvlist_prtctl_setindent(nvlist_prtctl_t pctl, enum nvlist_indent_mode mode,
+ int start, int inc)
+{
+ if (mode < NVLIST_INDENT_ABS || mode > NVLIST_INDENT_TABBED)
+ mode = NVLIST_INDENT_TABBED;
+
+ if (start < 0)
+ start = 0;
+
+ if (inc < 0)
+ inc = 1;
+
+ pctl->nvprt_indent_mode = mode;
+ pctl->nvprt_indent = start;
+ pctl->nvprt_indentinc = inc;
+}
+
+void
+nvlist_prtctl_doindent(nvlist_prtctl_t pctl, int onemore)
+{
+ indent(pctl, onemore);
+}
+
+
+void
+nvlist_prtctl_setfmt(nvlist_prtctl_t pctl, enum nvlist_prtctl_fmt which,
+ const char *fmt)
+{
+ switch (which) {
+ case NVLIST_FMT_MEMBER_NAME:
+ if (fmt == NULL)
+ fmt = "%s = ";
+ pctl->nvprt_nmfmt = fmt;
+ break;
+
+ case NVLIST_FMT_MEMBER_POSTAMBLE:
+ if (fmt == NULL)
+ fmt = "\n";
+ pctl->nvprt_eomfmt = fmt;
+ break;
+
+ case NVLIST_FMT_BTWN_ARRAY:
+ if (fmt == NULL) {
+ pctl->nvprt_btwnarrfmt = " ";
+ pctl->nvprt_btwnarrfmt_nl = 0;
+ } else {
+ pctl->nvprt_btwnarrfmt = fmt;
+ pctl->nvprt_btwnarrfmt_nl = (strstr(fmt, "\n") != NULL);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+
+void
+nvlist_prtctl_dofmt(nvlist_prtctl_t pctl, enum nvlist_prtctl_fmt which, ...)
+{
+ FILE *fp = pctl->nvprt_fp;
+ va_list ap;
+ char *name;
+
+ va_start(ap, which);
+
+ switch (which) {
+ case NVLIST_FMT_MEMBER_NAME:
+ name = va_arg(ap, char *);
+ (void) fprintf(fp, pctl->nvprt_nmfmt, name);
+ break;
+
+ case NVLIST_FMT_MEMBER_POSTAMBLE:
+ (void) fprintf(fp, pctl->nvprt_eomfmt);
+ break;
+
+ case NVLIST_FMT_BTWN_ARRAY:
+ (void) fprintf(fp, pctl->nvprt_btwnarrfmt); \
+ break;
+
+ default:
+ break;
+ }
+
+ va_end(ap);
+}
+
+/*
+ * ======================================================================
+ * | |
+ * | Interfaces to allow appointment of replacement rendering functions.|
+ * | |
+ * ======================================================================
+ */
+
+#define NVLIST_PRINTCTL_REPLACE(type, vtype) \
+void \
+nvlist_prtctlop_##type(nvlist_prtctl_t pctl, \
+ int (*func)(nvlist_prtctl_t, void *, nvlist_t *, const char *, vtype), \
+ void *private) \
+{ \
+ CUSTPRTOP(pctl, type) = func; \
+ CUSTPRTOPARG(pctl, type) = private; \
+}
+
+NVLIST_PRINTCTL_REPLACE(boolean, int)
+NVLIST_PRINTCTL_REPLACE(boolean_value, boolean_t)
+NVLIST_PRINTCTL_REPLACE(byte, uchar_t)
+NVLIST_PRINTCTL_REPLACE(int8, int8_t)
+NVLIST_PRINTCTL_REPLACE(uint8, uint8_t)
+NVLIST_PRINTCTL_REPLACE(int16, int16_t)
+NVLIST_PRINTCTL_REPLACE(uint16, uint16_t)
+NVLIST_PRINTCTL_REPLACE(int32, int32_t)
+NVLIST_PRINTCTL_REPLACE(uint32, uint32_t)
+NVLIST_PRINTCTL_REPLACE(int64, int64_t)
+NVLIST_PRINTCTL_REPLACE(uint64, uint64_t)
+NVLIST_PRINTCTL_REPLACE(double, double)
+NVLIST_PRINTCTL_REPLACE(string, char *)
+NVLIST_PRINTCTL_REPLACE(hrtime, hrtime_t)
+NVLIST_PRINTCTL_REPLACE(nvlist, nvlist_t *)
+
+#define NVLIST_PRINTCTL_AREPLACE(type, vtype) \
+void \
+nvlist_prtctlop_##type(nvlist_prtctl_t pctl, \
+ int (*func)(nvlist_prtctl_t, void *, nvlist_t *, const char *, vtype, \
+ uint_t), void *private) \
+{ \
+ CUSTPRTOP(pctl, type) = func; \
+ CUSTPRTOPARG(pctl, type) = private; \
+}
+
+NVLIST_PRINTCTL_AREPLACE(boolean_array, boolean_t *)
+NVLIST_PRINTCTL_AREPLACE(byte_array, uchar_t *)
+NVLIST_PRINTCTL_AREPLACE(int8_array, int8_t *)
+NVLIST_PRINTCTL_AREPLACE(uint8_array, uint8_t *)
+NVLIST_PRINTCTL_AREPLACE(int16_array, int16_t *)
+NVLIST_PRINTCTL_AREPLACE(uint16_array, uint16_t *)
+NVLIST_PRINTCTL_AREPLACE(int32_array, int32_t *)
+NVLIST_PRINTCTL_AREPLACE(uint32_array, uint32_t *)
+NVLIST_PRINTCTL_AREPLACE(int64_array, int64_t *)
+NVLIST_PRINTCTL_AREPLACE(uint64_array, uint64_t *)
+NVLIST_PRINTCTL_AREPLACE(string_array, char **)
+NVLIST_PRINTCTL_AREPLACE(nvlist_array, nvlist_t **)
+
+/*
+ * ======================================================================
+ * | |
+ * | Interfaces to manage nvlist_prtctl_t cookies. |
+ * | |
+ * ======================================================================
*/
-static
+
+
+static const struct nvlist_printops defprtops = {
+ { nvprint_boolean, NULL },
+ { nvprint_boolean_value, NULL },
+ { nvprint_byte, NULL },
+ { nvprint_int8, NULL },
+ { nvprint_uint8, NULL },
+ { nvprint_int16, NULL },
+ { nvprint_uint16, NULL },
+ { nvprint_int32, NULL },
+ { nvprint_uint32, NULL },
+ { nvprint_int64, NULL },
+ { nvprint_uint64, NULL },
+ { nvprint_double, NULL },
+ { nvprint_string, NULL },
+ { nvprint_hrtime, NULL },
+ { nvprint_nvlist, NULL },
+ { nvaprint_boolean_array, NULL },
+ { nvaprint_byte_array, NULL },
+ { nvaprint_int8_array, NULL },
+ { nvaprint_uint8_array, NULL },
+ { nvaprint_int16_array, NULL },
+ { nvaprint_uint16_array, NULL },
+ { nvaprint_int32_array, NULL },
+ { nvaprint_uint32_array, NULL },
+ { nvaprint_int64_array, NULL },
+ { nvaprint_uint64_array, NULL },
+ { nvaprint_string_array, NULL },
+ { nvaprint_nvlist_array, NULL },
+};
+
+static void
+prtctl_defaults(FILE *fp, struct nvlist_prtctl *pctl,
+ struct nvlist_printops *ops)
+{
+ pctl->nvprt_fp = fp;
+ pctl->nvprt_indent_mode = NVLIST_INDENT_TABBED;
+ pctl->nvprt_indent = 0;
+ pctl->nvprt_indentinc = 1;
+ pctl->nvprt_nmfmt = "%s = ";
+ pctl->nvprt_eomfmt = "\n";
+ pctl->nvprt_btwnarrfmt = " ";
+ pctl->nvprt_btwnarrfmt_nl = 0;
+
+ pctl->nvprt_dfltops = (struct nvlist_printops *)&defprtops;
+ pctl->nvprt_custops = ops;
+}
+
+nvlist_prtctl_t
+nvlist_prtctl_alloc(void)
+{
+ struct nvlist_prtctl *pctl;
+ struct nvlist_printops *ops;
+
+ if ((pctl = malloc(sizeof (*pctl))) == NULL)
+ return (NULL);
+
+ if ((ops = calloc(1, sizeof (*ops))) == NULL) {
+ free(pctl);
+ return (NULL);
+ }
+
+ prtctl_defaults(stdout, pctl, ops);
+
+ return (pctl);
+}
+
void
-nvlist_print_with_indent(FILE *fp, nvlist_t *nvl, int depth)
+nvlist_prtctl_free(nvlist_prtctl_t pctl)
+{
+ if (pctl != NULL) {
+ free(pctl->nvprt_custops);
+ free(pctl);
+ }
+}
+
+/*
+ * ======================================================================
+ * | |
+ * | Top-level print request interfaces. |
+ * | |
+ * ======================================================================
+ */
+
+/*
+ * nvlist_print - Prints elements in an event buffer
+ */
+static void
+nvlist_print_with_indent(nvlist_t *nvl, nvlist_prtctl_t pctl)
{
- int i;
+ FILE *fp = pctl->nvprt_fp;
char *name;
uint_t nelem;
nvpair_t *nvp;
@@ -60,7 +570,7 @@ nvlist_print_with_indent(FILE *fp, nvlist_t *nvl, int depth)
if (nvl == NULL)
return;
- indent(fp, depth);
+ indent(pctl, 0);
(void) fprintf(fp, "nvlist version: %d\n", NVL_VERSION(nvl));
nvp = nvlist_next_nvpair(nvl, NULL);
@@ -68,199 +578,174 @@ nvlist_print_with_indent(FILE *fp, nvlist_t *nvl, int depth)
while (nvp) {
data_type_t type = nvpair_type(nvp);
- indent(fp, depth);
name = nvpair_name(nvp);
- (void) fprintf(fp, "\t%s =", name);
nelem = 0;
+
switch (type) {
case DATA_TYPE_BOOLEAN: {
- (void) fprintf(fp, " 1");
+ RENDER(pctl, boolean, nvl, name, 1);
break;
}
case DATA_TYPE_BOOLEAN_VALUE: {
boolean_t val;
(void) nvpair_value_boolean_value(nvp, &val);
- (void) fprintf(fp, " %d", val);
+ RENDER(pctl, boolean_value, nvl, name, val);
break;
}
case DATA_TYPE_BYTE: {
uchar_t val;
(void) nvpair_value_byte(nvp, &val);
- (void) fprintf(fp, " 0x%2.2x", val);
+ RENDER(pctl, byte, nvl, name, val);
break;
}
case DATA_TYPE_INT8: {
int8_t val;
(void) nvpair_value_int8(nvp, &val);
- (void) fprintf(fp, " %d", val);
+ RENDER(pctl, int8, nvl, name, val);
break;
}
case DATA_TYPE_UINT8: {
uint8_t val;
(void) nvpair_value_uint8(nvp, &val);
- (void) fprintf(fp, " 0x%x", val);
+ RENDER(pctl, uint8, nvl, name, val);
break;
}
case DATA_TYPE_INT16: {
int16_t val;
(void) nvpair_value_int16(nvp, &val);
- (void) fprintf(fp, " %d", val);
+ RENDER(pctl, int16, nvl, name, val);
break;
}
case DATA_TYPE_UINT16: {
uint16_t val;
(void) nvpair_value_uint16(nvp, &val);
- (void) fprintf(fp, " 0x%x", val);
+ RENDER(pctl, uint16, nvl, name, val);
break;
}
case DATA_TYPE_INT32: {
int32_t val;
(void) nvpair_value_int32(nvp, &val);
- (void) fprintf(fp, " %d", val);
+ RENDER(pctl, int32, nvl, name, val);
break;
}
case DATA_TYPE_UINT32: {
uint32_t val;
(void) nvpair_value_uint32(nvp, &val);
- (void) fprintf(fp, " 0x%x", val);
+ RENDER(pctl, uint32, nvl, name, val);
break;
}
case DATA_TYPE_INT64: {
int64_t val;
(void) nvpair_value_int64(nvp, &val);
- (void) fprintf(fp, " %lld", (longlong_t)val);
+ RENDER(pctl, int64, nvl, name, val);
break;
}
case DATA_TYPE_UINT64: {
uint64_t val;
(void) nvpair_value_uint64(nvp, &val);
- (void) fprintf(fp, " 0x%llx", (u_longlong_t)val);
+ RENDER(pctl, uint64, nvl, name, val);
break;
}
case DATA_TYPE_DOUBLE: {
double val;
(void) nvpair_value_double(nvp, &val);
- (void) fprintf(fp, " 0x%llf", val);
+ RENDER(pctl, double, nvl, name, val);
break;
}
case DATA_TYPE_STRING: {
char *val;
(void) nvpair_value_string(nvp, &val);
- (void) fprintf(fp, " %s", val);
+ RENDER(pctl, string, nvl, name, val);
break;
}
case DATA_TYPE_BOOLEAN_ARRAY: {
boolean_t *val;
(void) nvpair_value_boolean_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " %d", val[i]);
+ ARENDER(pctl, boolean_array, nvl, name, val, nelem);
break;
}
case DATA_TYPE_BYTE_ARRAY: {
uchar_t *val;
(void) nvpair_value_byte_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " 0x%2.2x", val[i]);
+ ARENDER(pctl, byte_array, nvl, name, val, nelem);
break;
}
case DATA_TYPE_INT8_ARRAY: {
int8_t *val;
(void) nvpair_value_int8_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " %d", val[i]);
+ ARENDER(pctl, int8_array, nvl, name, val, nelem);
break;
}
case DATA_TYPE_UINT8_ARRAY: {
uint8_t *val;
(void) nvpair_value_uint8_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " 0x%x", val[i]);
+ ARENDER(pctl, uint8_array, nvl, name, val, nelem);
break;
}
case DATA_TYPE_INT16_ARRAY: {
int16_t *val;
(void) nvpair_value_int16_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " %d", val[i]);
+ ARENDER(pctl, int16_array, nvl, name, val, nelem);
break;
}
case DATA_TYPE_UINT16_ARRAY: {
uint16_t *val;
(void) nvpair_value_uint16_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " 0x%x", val[i]);
+ ARENDER(pctl, uint16_array, nvl, name, val, nelem);
break;
}
case DATA_TYPE_INT32_ARRAY: {
int32_t *val;
(void) nvpair_value_int32_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " %d", val[i]);
+ ARENDER(pctl, int32_array, nvl, name, val, nelem);
break;
}
case DATA_TYPE_UINT32_ARRAY: {
uint32_t *val;
(void) nvpair_value_uint32_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " 0x%x", val[i]);
+ ARENDER(pctl, uint32_array, nvl, name, val, nelem);
break;
}
case DATA_TYPE_INT64_ARRAY: {
int64_t *val;
(void) nvpair_value_int64_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " %lld", (longlong_t)val[i]);
+ ARENDER(pctl, int64_array, nvl, name, val, nelem);
break;
}
case DATA_TYPE_UINT64_ARRAY: {
uint64_t *val;
(void) nvpair_value_uint64_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " 0x%llx",
- (u_longlong_t)val[i]);
+ ARENDER(pctl, uint64_array, nvl, name, val, nelem);
break;
}
case DATA_TYPE_STRING_ARRAY: {
char **val;
(void) nvpair_value_string_array(nvp, &val, &nelem);
- for (i = 0; i < nelem; i++)
- (void) fprintf(fp, " %s", val[i]);
+ ARENDER(pctl, string_array, nvl, name, val, nelem);
break;
}
case DATA_TYPE_HRTIME: {
hrtime_t val;
(void) nvpair_value_hrtime(nvp, &val);
- (void) fprintf(fp, " 0x%llx", val);
+ RENDER(pctl, hrtime, nvl, name, val);
break;
}
case DATA_TYPE_NVLIST: {
nvlist_t *val;
(void) nvpair_value_nvlist(nvp, &val);
- (void) fprintf(fp, " (embedded nvlist)\n");
- nvlist_print_with_indent(fp, val, depth + 1);
- indent(fp, depth + 1);
- (void) fprintf(fp, "(end %s)\n", name);
+ RENDER(pctl, nvlist, nvl, name, val);
break;
}
case DATA_TYPE_NVLIST_ARRAY: {
nvlist_t **val;
(void) nvpair_value_nvlist_array(nvp, &val, &nelem);
- (void) fprintf(fp, " (array of embedded nvlists)\n");
- for (i = 0; i < nelem; i++) {
- indent(fp, depth + 1);
- (void) fprintf(fp,
- "(start %s[%d])\n", name, i);
- nvlist_print_with_indent(fp, val[i], depth + 1);
- indent(fp, depth + 1);
- (void) fprintf(fp, "(end %s[%d])\n", name, i);
- }
+ ARENDER(pctl, nvlist_array, nvl, name, val, nelem);
break;
}
default:
(void) fprintf(fp, " unknown data type (%d)", type);
break;
}
- (void) fprintf(fp, "\n");
nvp = nvlist_next_nvpair(nvl, nvp);
}
}
@@ -268,9 +753,175 @@ nvlist_print_with_indent(FILE *fp, nvlist_t *nvl, int depth)
void
nvlist_print(FILE *fp, nvlist_t *nvl)
{
- nvlist_print_with_indent(fp, nvl, 0);
+ struct nvlist_prtctl pc;
+
+ prtctl_defaults(fp, &pc, NULL);
+ nvlist_print_with_indent(nvl, &pc);
+}
+
+void
+nvlist_prt(nvlist_t *nvl, nvlist_prtctl_t pctl)
+{
+ nvlist_print_with_indent(nvl, pctl);
+}
+
+#define NVP(elem, type, vtype, ptype, format) { \
+ vtype value; \
+\
+ (void) nvpair_value_##type(elem, &value); \
+ (void) printf("%*s%s: " format "\n", indent, "", \
+ nvpair_name(elem), (ptype)value); \
}
+#define NVPA(elem, type, vtype, ptype, format) { \
+ uint_t i, count; \
+ vtype *value; \
+\
+ (void) nvpair_value_##type(elem, &value, &count); \
+ for (i = 0; i < count; i++) { \
+ (void) printf("%*s%s[%d]: " format "\n", indent, "", \
+ nvpair_name(elem), i, (ptype)value[i]); \
+ } \
+}
+
+/*
+ * Similar to nvlist_print() but handles arrays slightly differently.
+ */
+void
+dump_nvlist(nvlist_t *list, int indent)
+{
+ nvpair_t *elem = NULL;
+ boolean_t bool_value;
+ nvlist_t *nvlist_value;
+ nvlist_t **nvlist_array_value;
+ uint_t i, count;
+
+ if (list == NULL) {
+ return;
+ }
+
+ while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
+ switch (nvpair_type(elem)) {
+ case DATA_TYPE_BOOLEAN_VALUE:
+ (void) nvpair_value_boolean_value(elem, &bool_value);
+ (void) printf("%*s%s: %s\n", indent, "",
+ nvpair_name(elem), bool_value ? "true" : "false");
+ break;
+
+ case DATA_TYPE_BYTE:
+ NVP(elem, byte, uchar_t, int, "%u");
+ break;
+
+ case DATA_TYPE_INT8:
+ NVP(elem, int8, int8_t, int, "%d");
+ break;
+
+ case DATA_TYPE_UINT8:
+ NVP(elem, uint8, uint8_t, int, "%u");
+ break;
+
+ case DATA_TYPE_INT16:
+ NVP(elem, int16, int16_t, int, "%d");
+ break;
+
+ case DATA_TYPE_UINT16:
+ NVP(elem, uint16, uint16_t, int, "%u");
+ break;
+
+ case DATA_TYPE_INT32:
+ NVP(elem, int32, int32_t, long, "%ld");
+ break;
+
+ case DATA_TYPE_UINT32:
+ NVP(elem, uint32, uint32_t, ulong_t, "%lu");
+ break;
+
+ case DATA_TYPE_INT64:
+ NVP(elem, int64, int64_t, longlong_t, "%lld");
+ break;
+
+ case DATA_TYPE_UINT64:
+ NVP(elem, uint64, uint64_t, u_longlong_t, "%llu");
+ break;
+
+ case DATA_TYPE_STRING:
+ NVP(elem, string, char *, char *, "'%s'");
+ break;
+
+ case DATA_TYPE_BYTE_ARRAY:
+ NVPA(elem, byte_array, uchar_t, int, "%u");
+ break;
+
+ case DATA_TYPE_INT8_ARRAY:
+ NVPA(elem, int8_array, int8_t, int, "%d");
+ break;
+
+ case DATA_TYPE_UINT8_ARRAY:
+ NVPA(elem, uint8_array, uint8_t, int, "%u");
+ break;
+
+ case DATA_TYPE_INT16_ARRAY:
+ NVPA(elem, int16_array, int16_t, int, "%d");
+ break;
+
+ case DATA_TYPE_UINT16_ARRAY:
+ NVPA(elem, uint16_array, uint16_t, int, "%u");
+ break;
+
+ case DATA_TYPE_INT32_ARRAY:
+ NVPA(elem, int32_array, int32_t, long, "%ld");
+ break;
+
+ case DATA_TYPE_UINT32_ARRAY:
+ NVPA(elem, uint32_array, uint32_t, ulong_t, "%lu");
+ break;
+
+ case DATA_TYPE_INT64_ARRAY:
+ NVPA(elem, int64_array, int64_t, longlong_t, "%lld");
+ break;
+
+ case DATA_TYPE_UINT64_ARRAY:
+ NVPA(elem, uint64_array, uint64_t, u_longlong_t,
+ "%llu");
+ break;
+
+ case DATA_TYPE_STRING_ARRAY:
+ NVPA(elem, string_array, char *, char *, "'%s'");
+ break;
+
+ case DATA_TYPE_NVLIST:
+ (void) nvpair_value_nvlist(elem, &nvlist_value);
+ (void) printf("%*s%s:\n", indent, "",
+ nvpair_name(elem));
+ dump_nvlist(nvlist_value, indent + 4);
+ break;
+
+ case DATA_TYPE_NVLIST_ARRAY:
+ (void) nvpair_value_nvlist_array(elem,
+ &nvlist_array_value, &count);
+ for (i = 0; i < count; i++) {
+ (void) printf("%*s%s[%u]:\n", indent, "",
+ nvpair_name(elem), i);
+ dump_nvlist(nvlist_array_value[i], indent + 4);
+ }
+ break;
+
+ default:
+ (void) printf(dgettext(TEXT_DOMAIN, "bad config type "
+ "%d for %s\n"), nvpair_type(elem),
+ nvpair_name(elem));
+ }
+ }
+}
+
+/*
+ * ======================================================================
+ * | |
+ * | Misc private interface. |
+ * | |
+ * ======================================================================
+ */
+
/*
* Determine if string 'value' matches 'nvp' value. The 'value' string is
* converted, depending on the type of 'nvp', prior to match. For numeric
diff --git a/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.h b/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.h
index e655e0d..4c2615d 100644
--- a/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.h
+++ b/cddl/contrib/opensolaris/lib/libnvpair/libnvpair.h
@@ -19,15 +19,12 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _LIBNVPAIR_H
#define _LIBNVPAIR_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/nvpair.h>
#include <stdlib.h>
#include <stdio.h>
@@ -37,9 +34,158 @@
extern "C" {
#endif
-void nvlist_print(FILE *, nvlist_t *);
-int nvpair_value_match(nvpair_t *, int, char *, char **);
-int nvpair_value_match_regex(nvpair_t *, int, char *, regex_t *, char **);
+/*
+ * All interfaces described in this file are private to Solaris, and
+ * are subject to change at any time and without notice. The public
+ * nvlist/nvpair interfaces, as documented in manpage sections 3NVPAIR,
+ * are all imported from <sys/nvpair.h> included above.
+ */
+
+extern int nvpair_value_match(nvpair_t *, int, char *, char **);
+extern int nvpair_value_match_regex(nvpair_t *, int, char *, regex_t *,
+ char **);
+
+extern void nvlist_print(FILE *, nvlist_t *);
+extern void dump_nvlist(nvlist_t *, int);
+
+/*
+ * Private nvlist printing interface that allows the caller some control
+ * over output rendering (as opposed to nvlist_print and dump_nvlist).
+ *
+ * Obtain an opaque nvlist_prtctl_t cookie using nvlist_prtctl_alloc
+ * (NULL on failure); on return the cookie is set up for default formatting
+ * and rendering. Quote the cookie in subsequent customisation functions and
+ * then pass the cookie to nvlist_prt to render the nvlist. Finally,
+ * use nvlist_prtctl_free to release the cookie.
+ *
+ * For all nvlist_lookup_xxx and nvlist_lookup_xxx_array functions
+ * we have a corresponding brace of functions that appoint replacement
+ * rendering functions:
+ *
+ * extern void nvlist_prtctl_xxx(nvlist_prtctl_t,
+ * void (*)(nvlist_prtctl_t ctl, void *private, const char *name,
+ * xxxtype value))
+ *
+ * and
+ *
+ * extern void nvlist_prtctl_xxx_array(nvlist_prtctl_t,
+ * void (*)(nvlist_prtctl_t ctl, void *private, const char *name,
+ * xxxtype value, uint_t count))
+ *
+ * where xxxtype is the C datatype corresponding to xxx, eg int8_t for "int8"
+ * and char * for "string". The function that is appointed to render the
+ * specified datatype receives as arguments the cookie, the nvlist
+ * member name, the value of that member (or a pointer for array function),
+ * and (for array rendering functions) a count of the number of elements.
+ */
+
+typedef struct nvlist_prtctl *nvlist_prtctl_t; /* opaque */
+
+enum nvlist_indent_mode {
+ NVLIST_INDENT_ABS, /* Absolute indentation */
+ NVLIST_INDENT_TABBED /* Indent with tabstops */
+};
+
+extern nvlist_prtctl_t nvlist_prtctl_alloc(void);
+extern void nvlist_prtctl_free(nvlist_prtctl_t);
+extern void nvlist_prt(nvlist_t *, nvlist_prtctl_t);
+
+/* Output stream */
+extern void nvlist_prtctl_setdest(nvlist_prtctl_t, FILE *);
+extern FILE *nvlist_prtctl_getdest(nvlist_prtctl_t);
+
+/* Indentation mode, start indent, indent increment; default tabbed/0/1 */
+extern void nvlist_prtctl_setindent(nvlist_prtctl_t, enum nvlist_indent_mode,
+ int, int);
+extern void nvlist_prtctl_doindent(nvlist_prtctl_t, int);
+
+enum nvlist_prtctl_fmt {
+ NVLIST_FMT_MEMBER_NAME, /* name fmt; default "%s = " */
+ NVLIST_FMT_MEMBER_POSTAMBLE, /* after nvlist member; default "\n" */
+ NVLIST_FMT_BTWN_ARRAY /* between array members; default " " */
+};
+
+extern void nvlist_prtctl_setfmt(nvlist_prtctl_t, enum nvlist_prtctl_fmt,
+ const char *);
+extern void nvlist_prtctl_dofmt(nvlist_prtctl_t, enum nvlist_prtctl_fmt, ...);
+
+/*
+ * Function prototypes for interfaces that appoint a new rendering function
+ * for single-valued nvlist members.
+ *
+ * A replacement function receives arguments as follows:
+ *
+ * nvlist_prtctl_t Print control structure; do not change preferences
+ * for this object from a print callback function.
+ *
+ * void * The function-private cookie argument registered
+ * when the replacement function was appointed.
+ *
+ * nvlist_t * The full nvlist that is being processed. The
+ * rendering function is called to render a single
+ * member (name and value passed as below) but it may
+ * want to reference or incorporate other aspects of
+ * the full nvlist.
+ *
+ * const char * Member name to render
+ *
+ * valtype Value of the member to render
+ *
+ * The function must return non-zero if it has rendered output for this
+ * member, or 0 if it wants to default to standard rendering for this
+ * one member.
+ */
+
+#define NVLIST_PRINTCTL_SVDECL(funcname, valtype) \
+ extern void funcname(nvlist_prtctl_t, \
+ int (*)(nvlist_prtctl_t, void *, nvlist_t *, const char *, valtype), \
+ void *)
+
+NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_boolean, int);
+NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_boolean_value, boolean_t);
+NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_byte, uchar_t);
+NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_int8, int8_t);
+NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_uint8, uint8_t);
+NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_int16, int16_t);
+NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_uint16, uint16_t);
+NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_int32, int32_t);
+NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_uint32, uint32_t);
+NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_int64, int64_t);
+NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_uint64, uint64_t);
+NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_double, double);
+NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_string, char *);
+NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_hrtime, hrtime_t);
+NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_nvlist, nvlist_t *);
+
+#undef NVLIST_PRINTCTL_SVDECL /* was just for "clarity" above */
+
+/*
+ * Function prototypes for interfaces that appoint a new rendering function
+ * for array-valued nvlist members.
+ *
+ * One additional argument is taken: uint_t for the number of array elements
+ *
+ * Return values as above.
+ */
+#define NVLIST_PRINTCTL_AVDECL(funcname, vtype) \
+ extern void funcname(nvlist_prtctl_t, \
+ int (*)(nvlist_prtctl_t, void *, nvlist_t *, const char *, vtype, uint_t), \
+ void *)
+
+NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_boolean_array, boolean_t *);
+NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_byte_array, uchar_t *);
+NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_int8_array, int8_t *);
+NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_uint8_array, uint8_t *);
+NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_int16_array, int16_t *);
+NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_uint16_array, uint16_t *);
+NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_int32_array, int32_t *);
+NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_uint32_array, uint32_t *);
+NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_int64_array, int64_t *);
+NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_uint64_array, uint64_t *);
+NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_string_array, char **);
+NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_nvlist_array, nvlist_t **);
+
+#undef NVLIST_PRINTCTL_AVDECL /* was just for "clarity" above */
#ifdef __cplusplus
}
diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/libuutil.h b/cddl/contrib/opensolaris/lib/libuutil/common/libuutil.h
index 269687e..7a5f8a8 100644
--- a/cddl/contrib/opensolaris/lib/libuutil/common/libuutil.h
+++ b/cddl/contrib/opensolaris/lib/libuutil/common/libuutil.h
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _LIBUUTIL_H
@@ -29,6 +28,7 @@
#include <solaris.h>
#include <sys/types.h>
#include <stdarg.h>
+#include <stdio.h>
#ifdef __cplusplus
extern "C" {
@@ -143,12 +143,21 @@ extern int uu_open_tmp(const char *dir, uint_t uflags);
/*
* Convenience functions.
*/
+#define UU_NELEM(a) (sizeof (a) / sizeof ((a)[0]))
+
/*PRINTFLIKE1*/
extern char *uu_msprintf(const char *format, ...);
extern void *uu_zalloc(size_t);
extern char *uu_strdup(const char *);
extern void uu_free(void *);
+extern boolean_t uu_strcaseeq(const char *a, const char *b);
+extern boolean_t uu_streq(const char *a, const char *b);
+extern char *uu_strndup(const char *s, size_t n);
+extern boolean_t uu_strbw(const char *a, const char *b);
+extern void *uu_memdup(const void *buf, size_t sz);
+extern void uu_dump(FILE *out, const char *prefix, const void *buf, size_t len);
+
/*
* Comparison function type definition.
* Developers should be careful in their use of the _private argument. If you
diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_alloc.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_alloc.c
index 05d8622..2bef759 100644
--- a/cddl/contrib/opensolaris/lib/libuutil/common/uu_alloc.c
+++ b/cddl/contrib/opensolaris/lib/libuutil/common/uu_alloc.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include "libuutil_common.h"
@@ -67,6 +66,44 @@ uu_strdup(const char *str)
return (buf);
}
+/*
+ * Duplicate up to n bytes of a string. Kind of sort of like
+ * strdup(strlcpy(s, n)).
+ */
+char *
+uu_strndup(const char *s, size_t n)
+{
+ size_t len;
+ char *p;
+
+ len = strnlen(s, n);
+ p = uu_zalloc(len + 1);
+ if (p == NULL)
+ return (NULL);
+
+ if (len > 0)
+ (void) memcpy(p, s, len);
+ p[len] = '\0';
+
+ return (p);
+}
+
+/*
+ * Duplicate a block of memory. Combines malloc with memcpy, much as
+ * strdup combines malloc, strlen, and strcpy.
+ */
+void *
+uu_memdup(const void *buf, size_t sz)
+{
+ void *p;
+
+ p = uu_zalloc(sz);
+ if (p == NULL)
+ return (NULL);
+ (void) memcpy(p, buf, sz);
+ return (p);
+}
+
char *
uu_msprintf(const char *format, ...)
{
diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_misc.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_misc.c
index fb0c32b..507d4eb 100644
--- a/cddl/contrib/opensolaris/lib/libuutil/common/uu_misc.c
+++ b/cddl/contrib/opensolaris/lib/libuutil/common/uu_misc.c
@@ -20,12 +20,9 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "libuutil_common.h"
#include <assert.h>
@@ -39,6 +36,7 @@
#include <sys/debug.h>
#include <thread.h>
#include <unistd.h>
+#include <ctype.h>
#if !defined(TEXT_DOMAIN)
#define TEXT_DOMAIN "SYS_TEST"
@@ -248,3 +246,30 @@ uu_init(void)
{
(void) pthread_atfork(uu_lockup, uu_release, uu_release_child);
}
+
+/*
+ * Dump a block of memory in hex+ascii, for debugging
+ */
+void
+uu_dump(FILE *out, const char *prefix, const void *buf, size_t len)
+{
+ const unsigned char *p = buf;
+ int i;
+
+ for (i = 0; i < len; i += 16) {
+ int j;
+
+ (void) fprintf(out, "%s", prefix);
+ for (j = 0; j < 16 && i + j < len; j++) {
+ (void) fprintf(out, "%2.2x ", p[i + j]);
+ }
+ for (; j < 16; j++) {
+ (void) fprintf(out, " ");
+ }
+ for (j = 0; j < 16 && i + j < len; j++) {
+ (void) fprintf(out, "%c",
+ isprint(p[i + j]) ? p[i + j] : '.');
+ }
+ (void) fprintf(out, "\n");
+ }
+}
diff --git a/cddl/contrib/opensolaris/lib/libuutil/common/uu_string.c b/cddl/contrib/opensolaris/lib/libuutil/common/uu_string.c
new file mode 100644
index 0000000..66afba0
--- /dev/null
+++ b/cddl/contrib/opensolaris/lib/libuutil/common/uu_string.c
@@ -0,0 +1,56 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * String helper functions
+ */
+
+#include <string.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <malloc.h>
+#include <ctype.h>
+#include "libuutil.h"
+
+/* Return true if strings are equal */
+boolean_t
+uu_streq(const char *a, const char *b)
+{
+ return (strcmp(a, b) == 0);
+}
+
+/* Return true if strings are equal, case-insensitively */
+boolean_t
+uu_strcaseeq(const char *a, const char *b)
+{
+ return (strcasecmp(a, b) == 0);
+}
+
+/* Return true if string a Begins With string b */
+boolean_t
+uu_strbw(const char *a, const char *b)
+{
+ return (strncmp(a, b, strlen(b)) == 0);
+}
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
index 5fad609..fff63dd 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
@@ -20,8 +20,8 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _LIBZFS_H
@@ -66,7 +66,6 @@ enum {
EZFS_BADSTREAM, /* bad backup stream */
EZFS_DSREADONLY, /* dataset is readonly */
EZFS_VOLTOOBIG, /* volume is too large for 32-bit system */
- EZFS_VOLHASDATA, /* volume already contains data */
EZFS_INVALIDNAME, /* invalid dataset name */
EZFS_BADRESTORE, /* unable to restore to destination */
EZFS_BADBACKUP, /* backup failed */
@@ -85,17 +84,15 @@ enum {
EZFS_UMOUNTFAILED, /* failed to unmount dataset */
EZFS_UNSHARENFSFAILED, /* unshare(1M) failed */
EZFS_SHARENFSFAILED, /* share(1M) failed */
- EZFS_DEVLINKS, /* failed to create zvol links */
EZFS_PERM, /* permission denied */
EZFS_NOSPC, /* out of space */
+ EZFS_FAULT, /* bad address */
EZFS_IO, /* I/O error */
EZFS_INTR, /* signal received */
EZFS_ISSPARE, /* device is a hot spare */
EZFS_INVALCONFIG, /* invalid vdev configuration */
EZFS_RECURSIVE, /* recursive dependency */
EZFS_NOHISTORY, /* no history object */
- EZFS_UNSHAREISCSIFAILED, /* iscsitgtd failed request to unshare */
- EZFS_SHAREISCSIFAILED, /* iscsitgtd failed request to share */
EZFS_POOLPROPS, /* couldn't retrieve pool props */
EZFS_POOL_NOTSUP, /* ops not supported for this type of pool */
EZFS_POOL_INVALARG, /* invalid argument for this pool operation */
@@ -103,12 +100,10 @@ enum {
EZFS_OPENFAILED, /* open of device failed */
EZFS_NOCAP, /* couldn't get capacity */
EZFS_LABELFAILED, /* write of label failed */
- EZFS_ISCSISVCUNAVAIL, /* iscsi service unavailable */
EZFS_BADWHO, /* invalid permission who */
EZFS_BADPERM, /* invalid permission */
EZFS_BADPERMSET, /* invalid permission set name */
EZFS_NODELEGATION, /* delegated administration is disabled */
- EZFS_PERMRDONLY, /* pemissions are readonly */
EZFS_UNSHARESMBFAILED, /* failed to unshare over smb */
EZFS_SHARESMBFAILED, /* failed to share over smb */
EZFS_BADCACHE, /* bad cache file */
@@ -117,6 +112,17 @@ enum {
EZFS_NOTSUP, /* ops not supported on this dataset */
EZFS_ACTIVE_SPARE, /* pool has active shared spare devices */
EZFS_UNPLAYED_LOGS, /* log device has unplayed logs */
+ EZFS_REFTAG_RELE, /* snapshot release: tag not found */
+ EZFS_REFTAG_HOLD, /* snapshot hold: tag already exists */
+ EZFS_TAGTOOLONG, /* snapshot hold/rele: tag too long */
+ EZFS_PIPEFAILED, /* pipe create failed */
+ EZFS_THREADCREATEFAILED, /* thread create failed */
+ EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */
+ EZFS_SCRUBBING, /* currently scrubbing */
+ EZFS_NO_SCRUB, /* no active scrub */
+ EZFS_DIFF, /* general failure of zfs diff */
+ EZFS_DIFFDATA, /* bad zfs diff data */
+ EZFS_POOLREADONLY, /* pool is in read-only mode */
EZFS_UNKNOWN
};
@@ -211,11 +217,19 @@ extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
extern int zpool_destroy(zpool_handle_t *);
extern int zpool_add(zpool_handle_t *, nvlist_t *);
+typedef struct splitflags {
+ /* do not split, but return the config that would be split off */
+ int dryrun : 1;
+
+ /* after splitting, import the pool */
+ int import : 1;
+} splitflags_t;
+
/*
* Functions to manipulate pool and vdev state
*/
-extern int zpool_scrub(zpool_handle_t *, pool_scrub_type_t);
-extern int zpool_clear(zpool_handle_t *, const char *);
+extern int zpool_scan(zpool_handle_t *, pool_scan_func_t);
+extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *);
extern int zpool_vdev_online(zpool_handle_t *, const char *, int,
vdev_state_t *);
@@ -224,13 +238,17 @@ extern int zpool_vdev_attach(zpool_handle_t *, const char *,
const char *, nvlist_t *, int);
extern int zpool_vdev_detach(zpool_handle_t *, const char *);
extern int zpool_vdev_remove(zpool_handle_t *, const char *);
+extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *,
+ splitflags_t);
-extern int zpool_vdev_fault(zpool_handle_t *, uint64_t);
-extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t);
+extern int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t);
+extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t);
extern int zpool_vdev_clear(zpool_handle_t *, uint64_t);
extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
boolean_t *, boolean_t *);
+extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
+ boolean_t *, boolean_t *, boolean_t *);
extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *);
/*
@@ -284,6 +302,7 @@ typedef enum {
ZPOOL_STATUS_VERSION_OLDER, /* older on-disk version */
ZPOOL_STATUS_RESILVERING, /* device being resilvered */
ZPOOL_STATUS_OFFLINE_DEV, /* device online */
+ ZPOOL_STATUS_REMOVED_DEV, /* removed device */
/*
* Finally, the following indicates a healthy pool.
@@ -293,6 +312,7 @@ typedef enum {
extern zpool_status_t zpool_get_status(zpool_handle_t *, char **);
extern zpool_status_t zpool_import_status(nvlist_t *, char **);
+extern void zpool_dump_ddt(const ddt_stat_t *dds, const ddt_histogram_t *ddh);
/*
* Statistics and configuration functions.
@@ -309,35 +329,53 @@ extern int zpool_export_force(zpool_handle_t *);
extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *,
char *altroot);
extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *,
- nvlist_t *, boolean_t);
+ nvlist_t *, int);
/*
* Search for pools to import
*/
+
+typedef struct importargs {
+ char **path; /* a list of paths to search */
+ int paths; /* number of paths to search */
+ char *poolname; /* name of a pool to find */
+ uint64_t guid; /* guid of a pool to find */
+ char *cachefile; /* cachefile to use for import */
+ int can_be_active : 1; /* can the pool be active? */
+ int unique : 1; /* does 'poolname' already exist? */
+ int exists : 1; /* set on return if pool already exists */
+} importargs_t;
+
+extern nvlist_t *zpool_search_import(libzfs_handle_t *, importargs_t *);
+
+/* legacy pool search routines */
extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **);
extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *,
char *, uint64_t);
-extern nvlist_t *zpool_find_import_byname(libzfs_handle_t *, int, char **,
- char *);
-extern nvlist_t *zpool_find_import_byguid(libzfs_handle_t *, int, char **,
- uint64_t);
-extern nvlist_t *zpool_find_import_activeok(libzfs_handle_t *, int, char **);
/*
* Miscellaneous pool functions
*/
struct zfs_cmd;
-extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *);
+extern const char *zfs_history_event_names[LOG_END];
+
+extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *,
+ boolean_t verbose);
extern int zpool_upgrade(zpool_handle_t *, uint64_t);
extern int zpool_get_history(zpool_handle_t *, nvlist_t **);
+extern int zpool_history_unpack(char *, uint64_t, uint64_t *,
+ nvlist_t ***, uint_t *);
extern void zpool_set_history_str(const char *subcommand, int argc,
char **argv, char *history_str);
extern int zpool_stage_history(libzfs_handle_t *, const char *);
extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
size_t len);
extern int zfs_ioctl(libzfs_handle_t *, unsigned long, struct zfs_cmd *);
-extern int zpool_get_physpath(zpool_handle_t *, char *);
+extern int zpool_get_physpath(zpool_handle_t *, char *, size_t);
+extern void zpool_explain_recover(libzfs_handle_t *, const char *, int,
+ nvlist_t *);
+
/*
* Basic handle manipulations. These functions do not create or destroy the
* underlying datasets, only the references to them.
@@ -368,6 +406,8 @@ extern const char *zfs_prop_to_name(zfs_prop_t);
extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t,
zprop_source_t *, char *, size_t, boolean_t);
+extern int zfs_prop_get_recvd(zfs_handle_t *, const char *, char *, size_t,
+ boolean_t);
extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *,
zprop_source_t *, char *, size_t);
extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname,
@@ -375,10 +415,11 @@ extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname,
extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname,
char *propbuf, int proplen, boolean_t literal);
extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
-extern int zfs_prop_inherit(zfs_handle_t *, const char *);
+extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t);
extern const char *zfs_prop_values(zfs_prop_t);
extern int zfs_prop_is_string(zfs_prop_t prop);
extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
+extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *);
typedef struct zprop_list {
int pl_prop;
@@ -386,10 +427,11 @@ typedef struct zprop_list {
struct zprop_list *pl_next;
boolean_t pl_all;
size_t pl_width;
+ size_t pl_recvd_width;
boolean_t pl_fixed;
} zprop_list_t;
-extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **);
+extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **, boolean_t);
extern void zfs_prune_proplist(zfs_handle_t *, uint8_t *);
#define ZFS_MOUNTPOINT_NONE "none"
@@ -413,13 +455,24 @@ extern int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **,
zfs_type_t);
extern void zprop_free_list(zprop_list_t *);
+#define ZFS_GET_NCOLS 5
+
+typedef enum {
+ GET_COL_NONE,
+ GET_COL_NAME,
+ GET_COL_PROPERTY,
+ GET_COL_VALUE,
+ GET_COL_RECVD,
+ GET_COL_SOURCE
+} zfs_get_column_t;
+
/*
* Functions for printing zfs or zpool properties
*/
typedef struct zprop_get_cbdata {
int cb_sources;
- int cb_columns[4];
- int cb_colwidths[5];
+ zfs_get_column_t cb_columns[ZFS_GET_NCOLS];
+ int cb_colwidths[ZFS_GET_NCOLS + 1];
boolean_t cb_scripted;
boolean_t cb_literal;
boolean_t cb_first;
@@ -428,12 +481,8 @@ typedef struct zprop_get_cbdata {
} zprop_get_cbdata_t;
void zprop_print_one_property(const char *, zprop_get_cbdata_t *,
- const char *, const char *, zprop_source_t, const char *);
-
-#define GET_COL_NAME 1
-#define GET_COL_PROPERTY 2
-#define GET_COL_VALUE 3
-#define GET_COL_SOURCE 4
+ const char *, const char *, zprop_source_t, const char *,
+ const char *);
/*
* Iterator functions.
@@ -444,6 +493,18 @@ extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *);
extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *);
extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *);
extern int zfs_iter_snapshots(zfs_handle_t *, zfs_iter_f, void *);
+extern int zfs_iter_snapshots_sorted(zfs_handle_t *, zfs_iter_f, void *);
+
+typedef struct get_all_cb {
+ zfs_handle_t **cb_handles;
+ size_t cb_alloc;
+ size_t cb_used;
+ boolean_t cb_verbose;
+ int (*cb_getone)(zfs_handle_t *, void *);
+} get_all_cb_t;
+
+void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *);
+int libzfs_dataset_cmp(const void *, const void *);
/*
* Functions to create and destroy datasets.
@@ -451,21 +512,54 @@ extern int zfs_iter_snapshots(zfs_handle_t *, zfs_iter_f, void *);
extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t,
nvlist_t *);
extern int zfs_create_ancestors(libzfs_handle_t *, const char *);
-extern int zfs_destroy(zfs_handle_t *);
-extern int zfs_destroy_snaps(zfs_handle_t *, char *);
+extern int zfs_destroy(zfs_handle_t *, boolean_t);
+extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t);
extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *);
extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t);
extern int zfs_rename(zfs_handle_t *, const char *, boolean_t);
-extern int zfs_send(zfs_handle_t *, const char *, const char *,
- boolean_t, boolean_t, boolean_t, boolean_t, int);
+
+typedef struct sendflags {
+ /* print informational messages (ie, -v was specified) */
+ int verbose : 1;
+
+ /* recursive send (ie, -R) */
+ int replicate : 1;
+
+ /* for incrementals, do all intermediate snapshots */
+ int doall : 1; /* (ie, -I) */
+
+ /* if dataset is a clone, do incremental from its origin */
+ int fromorigin : 1;
+
+ /* do deduplication */
+ int dedup : 1;
+
+ /* send properties (ie, -p) */
+ int props : 1;
+} sendflags_t;
+
+typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
+
+extern int zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
+ sendflags_t flags, int outfd, snapfilter_cb_t filter_func,
+ void *cb_arg, nvlist_t **debugnvp);
+
extern int zfs_promote(zfs_handle_t *);
+extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t,
+ boolean_t, boolean_t, int, uint64_t, uint64_t);
+extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
+extern int zfs_get_holds(zfs_handle_t *, nvlist_t **);
+extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *);
typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain,
uid_t rid, uint64_t space);
-extern int zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
- zfs_userspace_cb_t func, void *arg);
+extern int zfs_userspace(zfs_handle_t *, zfs_userquota_prop_t,
+ zfs_userspace_cb_t, void *);
+
+extern int zfs_get_fsacl(zfs_handle_t *, nvlist_t **);
+extern int zfs_set_fsacl(zfs_handle_t *, boolean_t, nvlist_t *);
typedef struct recvflags {
/* print informational messages (ie, -v was specified) */
@@ -474,6 +568,12 @@ typedef struct recvflags {
/* the destination is a prefix, not the exact fs (ie, -d) */
int isprefix : 1;
+ /*
+ * Only the tail of the sent snapshot path is appended to the
+ * destination to determine the received snapshot name (ie, -e).
+ */
+ int istail : 1;
+
/* do not actually do the recv, just check if it would work (ie, -n) */
int dryrun : 1;
@@ -493,6 +593,15 @@ typedef struct recvflags {
extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t,
int, avl_tree_t *);
+typedef enum diff_flags {
+ ZFS_DIFF_PARSEABLE = 0x1,
+ ZFS_DIFF_TIMESTAMP = 0x2,
+ ZFS_DIFF_CLASSIFY = 0x4
+} diff_flags_t;
+
+extern int zfs_show_diffs(zfs_handle_t *, int, const char *, const char *,
+ int);
+
/*
* Miscellaneous functions.
*/
@@ -534,12 +643,6 @@ extern int zfs_unshareall_nfs(zfs_handle_t *);
extern int zfs_unshareall_smb(zfs_handle_t *);
extern int zfs_unshareall_bypath(zfs_handle_t *, const char *);
extern int zfs_unshareall(zfs_handle_t *);
-extern boolean_t zfs_is_shared_iscsi(zfs_handle_t *);
-extern int zfs_share_iscsi(zfs_handle_t *);
-extern int zfs_unshare_iscsi(zfs_handle_t *);
-#ifdef TODO
-extern int zfs_iscsi_perm_check(libzfs_handle_t *, char *, ucred_t *);
-#endif
extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *,
void *, void *, int, zfs_share_op_t);
@@ -572,15 +675,10 @@ extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
boolean_t *);
/*
- * ftyp special. Read the label from a given device.
+ * Label manipulation.
*/
extern int zpool_read_label(int, nvlist_t **);
-
-/*
- * Create and remove zvol /dev links.
- */
-extern int zpool_create_zvol_links(zpool_handle_t *);
-extern int zpool_remove_zvol_links(zpool_handle_t *);
+extern int zpool_clear_label(int);
/* is this zvol valid for use as a dump device? */
extern int zvol_check_dump_config(char *);
@@ -601,10 +699,21 @@ int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, char *);
extern int zpool_enable_datasets(zpool_handle_t *, const char *, int);
extern int zpool_disable_datasets(zpool_handle_t *, boolean_t);
-#ifdef __FreeBSD__
+/*
+ * Mappings between vdev and FRU.
+ */
+extern void libzfs_fru_refresh(libzfs_handle_t *);
+extern const char *libzfs_fru_lookup(libzfs_handle_t *, const char *);
+extern const char *libzfs_fru_devpath(libzfs_handle_t *, const char *);
+extern boolean_t libzfs_fru_compare(libzfs_handle_t *, const char *,
+ const char *);
+extern boolean_t libzfs_fru_notself(libzfs_handle_t *, const char *);
+extern int zpool_fru_set(zpool_handle_t *, uint64_t, const char *);
+
+#ifndef sun
extern int zmount(const char *, const char *, int, char *, char *, int, char *,
int);
-#endif
+#endif /* !sun */
#ifdef __cplusplus
}
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c
index 6fa1967..4328d38 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_changelist.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* Portions Copyright 2007 Ramprakash Jelari
@@ -116,32 +116,7 @@ changelist_prefix(prop_changelist_t *clp)
if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned)
continue;
- if (ZFS_IS_VOLUME(cn->cn_handle)) {
- switch (clp->cl_realprop) {
- case ZFS_PROP_NAME:
- /*
- * If this was a rename, unshare the zvol, and
- * remove the /dev/zvol links.
- */
- (void) zfs_unshare_iscsi(cn->cn_handle);
-
- if (zvol_remove_link(cn->cn_handle->zfs_hdl,
- cn->cn_handle->zfs_name) != 0) {
- ret = -1;
- cn->cn_needpost = B_FALSE;
- (void) zfs_share_iscsi(cn->cn_handle);
- }
- break;
-
- case ZFS_PROP_VOLSIZE:
- /*
- * If this was a change to the volume size, we
- * need to unshare and reshare the volume.
- */
- (void) zfs_unshare_iscsi(cn->cn_handle);
- break;
- }
- } else {
+ if (!ZFS_IS_VOLUME(cn->cn_handle)) {
/*
* Do the property specific processing.
*/
@@ -234,32 +209,8 @@ changelist_postfix(prop_changelist_t *clp)
zfs_refresh_properties(cn->cn_handle);
- if (ZFS_IS_VOLUME(cn->cn_handle)) {
- /*
- * If we're doing a rename, recreate the /dev/zvol
- * links.
- */
- if (clp->cl_realprop == ZFS_PROP_NAME &&
- zvol_create_link(cn->cn_handle->zfs_hdl,
- cn->cn_handle->zfs_name) != 0) {
- errors++;
- } else if (cn->cn_shared ||
- clp->cl_prop == ZFS_PROP_SHAREISCSI) {
- if (zfs_prop_get(cn->cn_handle,
- ZFS_PROP_SHAREISCSI, shareopts,
- sizeof (shareopts), NULL, NULL, 0,
- B_FALSE) == 0 &&
- strcmp(shareopts, "off") == 0) {
- errors +=
- zfs_unshare_iscsi(cn->cn_handle);
- } else {
- errors +=
- zfs_share_iscsi(cn->cn_handle);
- }
- }
-
+ if (ZFS_IS_VOLUME(cn->cn_handle))
continue;
- }
/*
* Remount if previously mounted or mountpoint was legacy,
@@ -508,6 +459,14 @@ change_one(zfs_handle_t *zhp, void *data)
&idx);
uu_list_insert(clp->cl_list, cn, idx);
} else {
+ /*
+ * Add this child to beginning of the list. Children
+ * below this one in the hierarchy will get added above
+ * this one in the list. This produces a list in
+ * reverse dataset name order.
+ * This is necessary when the original mountpoint
+ * is legacy or none.
+ */
ASSERT(!clp->cl_alldependents);
verify(uu_list_insert_before(clp->cl_list,
uu_list_first(clp->cl_list), cn) == 0);
@@ -574,6 +533,7 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags,
zfs_handle_t *temp;
char property[ZFS_MAXPROPLEN];
uu_compare_fn_t *compare = NULL;
+ boolean_t legacy = B_FALSE;
if ((clp = zfs_alloc(zhp->zfs_hdl, sizeof (prop_changelist_t))) == NULL)
return (NULL);
@@ -586,8 +546,19 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags,
if (prop == ZFS_PROP_NAME || prop == ZFS_PROP_ZONED ||
prop == ZFS_PROP_MOUNTPOINT || prop == ZFS_PROP_SHARENFS ||
prop == ZFS_PROP_SHARESMB) {
- compare = compare_mountpoints;
- clp->cl_sorted = B_TRUE;
+
+ if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT,
+ property, sizeof (property),
+ NULL, NULL, 0, B_FALSE) == 0 &&
+ (strcmp(property, "legacy") == 0 ||
+ strcmp(property, "none") == 0)) {
+
+ legacy = B_TRUE;
+ }
+ if (!legacy) {
+ compare = compare_mountpoints;
+ clp->cl_sorted = B_TRUE;
+ }
}
clp->cl_pool = uu_list_pool_create("changelist_pool",
@@ -638,8 +609,7 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags,
if (clp->cl_prop != ZFS_PROP_MOUNTPOINT &&
clp->cl_prop != ZFS_PROP_SHARENFS &&
- clp->cl_prop != ZFS_PROP_SHARESMB &&
- clp->cl_prop != ZFS_PROP_SHAREISCSI)
+ clp->cl_prop != ZFS_PROP_SHARESMB)
return (clp);
/*
@@ -695,6 +665,12 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags,
(void) uu_list_find(clp->cl_list, cn, NULL, &idx);
uu_list_insert(clp->cl_list, cn, idx);
} else {
+ /*
+ * Add the target dataset to the end of the list.
+ * The list is not really unsorted. The list will be
+ * in reverse dataset name order. This is necessary
+ * when the original mountpoint is legacy or none.
+ */
verify(uu_list_insert_after(clp->cl_list,
uu_list_last(clp->cl_list), cn) == 0);
}
@@ -703,11 +679,7 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags,
* If the mountpoint property was previously 'legacy', or 'none',
* record it as the behavior of changelist_postfix() will be different.
*/
- if ((clp->cl_prop == ZFS_PROP_MOUNTPOINT) &&
- (zfs_prop_get(zhp, prop, property, sizeof (property),
- NULL, NULL, 0, B_FALSE) == 0 &&
- (strcmp(property, "legacy") == 0 ||
- strcmp(property, "none") == 0))) {
+ if ((clp->cl_prop == ZFS_PROP_MOUNTPOINT) && legacy) {
/*
* do not automatically mount ex-legacy datasets if
* we specifically set canmount to noauto
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_config.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_config.c
index 94640d1..dc27238 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_config.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_config.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* The pool configuration repository is stored in /etc/zfs/zpool.cache as a
* single packed nvlist. While it would be nice to just read in this
@@ -313,21 +311,33 @@ zpool_iter(libzfs_handle_t *hdl, zpool_iter_f func, void *data)
zpool_handle_t *zhp;
int ret;
- if (namespace_reload(hdl) != 0)
+ /*
+ * If someone makes a recursive call to zpool_iter(), we want to avoid
+ * refreshing the namespace because that will invalidate the parent
+ * context. We allow recursive calls, but simply re-use the same
+ * namespace AVL tree.
+ */
+ if (!hdl->libzfs_pool_iter && namespace_reload(hdl) != 0)
return (-1);
+ hdl->libzfs_pool_iter++;
for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) {
- if (zpool_open_silent(hdl, cn->cn_name, &zhp) != 0)
+ if (zpool_open_silent(hdl, cn->cn_name, &zhp) != 0) {
+ hdl->libzfs_pool_iter--;
return (-1);
+ }
if (zhp == NULL)
continue;
- if ((ret = func(zhp, data)) != 0)
+ if ((ret = func(zhp, data)) != 0) {
+ hdl->libzfs_pool_iter--;
return (ret);
+ }
}
+ hdl->libzfs_pool_iter--;
return (0);
}
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
index 803746a..824834e 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
@@ -20,11 +20,10 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
*/
-#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <libintl.h>
@@ -38,13 +37,13 @@
#include <fcntl.h>
#include <sys/mntent.h>
#include <sys/mount.h>
-#include <sys/avl.h>
#include <priv.h>
#include <pwd.h>
#include <grp.h>
#include <stddef.h>
#include <idmap.h>
+#include <sys/dnode.h>
#include <sys/spa.h>
#include <sys/zap.h>
#include <sys/misc.h>
@@ -55,7 +54,6 @@
#include "libzfs_impl.h"
#include "zfs_deleg.h"
-static int zvol_create_link_common(libzfs_handle_t *, const char *, int);
static int userquota_propname_decode(const char *propname, boolean_t zoned,
zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp);
@@ -126,13 +124,14 @@ path_to_str(const char *path, int types)
* provide a more meaningful error message. We call zfs_error_aux() to
* explain exactly why the name was not valid.
*/
-static int
+int
zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type,
boolean_t modifying)
{
namecheck_err_t why;
char what;
+ (void) zfs_prop_get_table();
if (dataset_namecheck(path, &why, &what) != 0) {
if (hdl != NULL) {
switch (why) {
@@ -318,6 +317,7 @@ zpool_free_handles(libzfs_handle_t *hdl)
/*
* Utility function to gather stats (objset and zpl) for the given object.
*/
+static int
get_stats_ioctl(zfs_handle_t *zhp, zfs_cmd_t *zc)
{
libzfs_handle_t *hdl = zhp->zfs_hdl;
@@ -336,6 +336,44 @@ get_stats_ioctl(zfs_handle_t *zhp, zfs_cmd_t *zc)
return (0);
}
+/*
+ * Utility function to get the received properties of the given object.
+ */
+static int
+get_recvd_props_ioctl(zfs_handle_t *zhp)
+{
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
+ nvlist_t *recvdprops;
+ zfs_cmd_t zc = { 0 };
+ int err;
+
+ if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
+ return (-1);
+
+ (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+ while (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_RECVD_PROPS, &zc) != 0) {
+ if (errno == ENOMEM) {
+ if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+ return (-1);
+ }
+ } else {
+ zcmd_free_nvlists(&zc);
+ return (-1);
+ }
+ }
+
+ err = zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &recvdprops);
+ zcmd_free_nvlists(&zc);
+ if (err != 0)
+ return (-1);
+
+ nvlist_free(zhp->zfs_recvd_props);
+ zhp->zfs_recvd_props = recvdprops;
+
+ return (0);
+}
+
static int
put_stats_zhdl(zfs_handle_t *zhp, zfs_cmd_t *zc)
{
@@ -397,70 +435,8 @@ zfs_refresh_properties(zfs_handle_t *zhp)
static int
make_dataset_handle_common(zfs_handle_t *zhp, zfs_cmd_t *zc)
{
- char *logstr;
- libzfs_handle_t *hdl = zhp->zfs_hdl;
-
- /*
- * Preserve history log string.
- * any changes performed here will be
- * logged as an internal event.
- */
- logstr = zhp->zfs_hdl->libzfs_log_str;
- zhp->zfs_hdl->libzfs_log_str = NULL;
-
-top:
- if (put_stats_zhdl(zhp, zc) != 0) {
- zhp->zfs_hdl->libzfs_log_str = logstr;
+ if (put_stats_zhdl(zhp, zc) != 0)
return (-1);
- }
-
-
- if (zhp->zfs_dmustats.dds_inconsistent) {
- zfs_cmd_t zc2 = { 0 };
-
- /*
- * If it is dds_inconsistent, then we've caught it in
- * the middle of a 'zfs receive' or 'zfs destroy', and
- * it is inconsistent from the ZPL's point of view, so
- * can't be mounted. However, it could also be that we
- * have crashed in the middle of one of those
- * operations, in which case we need to get rid of the
- * inconsistent state. We do that by either rolling
- * back to the previous snapshot (which will fail if
- * there is none), or destroying the filesystem. Note
- * that if we are still in the middle of an active
- * 'receive' or 'destroy', then the rollback and destroy
- * will fail with EBUSY and we will drive on as usual.
- */
-
- (void) strlcpy(zc2.zc_name, zhp->zfs_name,
- sizeof (zc2.zc_name));
-
- if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) {
- (void) zvol_remove_link(hdl, zhp->zfs_name);
- zc2.zc_objset_type = DMU_OST_ZVOL;
- } else {
- zc2.zc_objset_type = DMU_OST_ZFS;
- }
-
- /*
- * If we can successfully destroy it, pretend that it
- * never existed.
- */
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc2) == 0) {
- zhp->zfs_hdl->libzfs_log_str = logstr;
- errno = ENOENT;
- return (-1);
- }
- /* If we can successfully roll it back, reset the stats */
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc2) == 0) {
- if (get_stats_ioctl(zhp, zc) != 0) {
- zhp->zfs_hdl->libzfs_log_str = logstr;
- return (-1);
- }
- goto top;
- }
- }
/*
* We've managed to open the dataset and gather statistics. Determine
@@ -482,8 +458,9 @@ top:
else
abort(); /* we should never see any other types */
- zhp->zfs_hdl->libzfs_log_str = logstr;
- zhp->zpool_hdl = zpool_handle(zhp);
+ if ((zhp->zpool_hdl = zpool_handle(zhp)) == NULL)
+ return (-1);
+
return (0);
}
@@ -585,6 +562,7 @@ zfs_close(zfs_handle_t *zhp)
free(zhp->zfs_mntopts);
nvlist_free(zhp->zfs_props);
nvlist_free(zhp->zfs_user_props);
+ nvlist_free(zhp->zfs_recvd_props);
free(zhp);
}
@@ -878,9 +856,14 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
goto error;
}
+ /*
+ * Encode the prop name as
+ * userquota@<hex-rid>-domain, to make it easy
+ * for the kernel to decode.
+ */
(void) snprintf(newpropname, sizeof (newpropname),
- "%s%s", zfs_userquota_prop_prefixes[uqtype],
- domain);
+ "%s%llx-%s", zfs_userquota_prop_prefixes[uqtype],
+ (longlong_t)rid, domain);
valary[0] = uqtype;
valary[1] = rid;
valary[2] = intval;
@@ -956,19 +939,66 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
}
break;
- case ZFS_PROP_SHAREISCSI:
- if (strcmp(strval, "off") != 0 &&
- strcmp(strval, "on") != 0 &&
- strcmp(strval, "type=disk") != 0) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' must be 'on', 'off', or 'type=disk'"),
- propname);
- (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
- goto error;
+ case ZFS_PROP_MLSLABEL:
+ {
+#ifdef sun
+ /*
+ * Verify the mlslabel string and convert to
+ * internal hex label string.
+ */
+
+ m_label_t *new_sl;
+ char *hex = NULL; /* internal label string */
+
+ /* Default value is already OK. */
+ if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
+ break;
+
+ /* Verify the label can be converted to binary form */
+ if (((new_sl = m_label_alloc(MAC_LABEL)) == NULL) ||
+ (str_to_label(strval, &new_sl, MAC_LABEL,
+ L_NO_CORRECTION, NULL) == -1)) {
+ goto badlabel;
+ }
+
+ /* Now translate to hex internal label string */
+ if (label_to_str(new_sl, &hex, M_INTERNAL,
+ DEF_NAMES) != 0) {
+ if (hex)
+ free(hex);
+ goto badlabel;
}
+ m_label_free(new_sl);
+
+ /* If string is already in internal form, we're done. */
+ if (strcmp(strval, hex) == 0) {
+ free(hex);
+ break;
+ }
+
+ /* Replace the label string with the internal form. */
+ (void) nvlist_remove(ret, zfs_prop_to_name(prop),
+ DATA_TYPE_STRING);
+ verify(nvlist_add_string(ret, zfs_prop_to_name(prop),
+ hex) == 0);
+ free(hex);
break;
+badlabel:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "invalid mlslabel '%s'"), strval);
+ (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+ m_label_free(new_sl); /* OK if null */
+#else /* !sun */
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "mlslabel is not supported on FreeBSD"));
+ (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+#endif /* !sun */
+ goto error;
+
+ }
+
case ZFS_PROP_MOUNTPOINT:
{
namecheck_err_t why;
@@ -1187,39 +1217,130 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
goto error;
}
+ return (ret);
+
+error:
+ nvlist_free(ret);
+ return (NULL);
+}
+
+int
+zfs_add_synthetic_resv(zfs_handle_t *zhp, nvlist_t *nvl)
+{
+ uint64_t old_volsize;
+ uint64_t new_volsize;
+ uint64_t old_reservation;
+ uint64_t new_reservation;
+ zfs_prop_t resv_prop;
/*
* If this is an existing volume, and someone is setting the volsize,
* make sure that it matches the reservation, or add it if necessary.
*/
- if (zhp != NULL && type == ZFS_TYPE_VOLUME &&
- nvlist_lookup_uint64(ret, zfs_prop_to_name(ZFS_PROP_VOLSIZE),
- &intval) == 0) {
- uint64_t old_volsize = zfs_prop_get_int(zhp,
- ZFS_PROP_VOLSIZE);
- uint64_t old_reservation;
- uint64_t new_reservation;
- zfs_prop_t resv_prop;
+ old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
+ if (zfs_which_resv_prop(zhp, &resv_prop) < 0)
+ return (-1);
+ old_reservation = zfs_prop_get_int(zhp, resv_prop);
+ if ((zvol_volsize_to_reservation(old_volsize, zhp->zfs_props) !=
+ old_reservation) || nvlist_lookup_uint64(nvl,
+ zfs_prop_to_name(resv_prop), &new_reservation) != ENOENT) {
+ return (0);
+ }
+ if (nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_VOLSIZE),
+ &new_volsize) != 0)
+ return (-1);
+ new_reservation = zvol_volsize_to_reservation(new_volsize,
+ zhp->zfs_props);
+ if (nvlist_add_uint64(nvl, zfs_prop_to_name(resv_prop),
+ new_reservation) != 0) {
+ (void) no_memory(zhp->zfs_hdl);
+ return (-1);
+ }
+ return (1);
+}
- if (zfs_which_resv_prop(zhp, &resv_prop) < 0)
- goto error;
- old_reservation = zfs_prop_get_int(zhp, resv_prop);
+void
+zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err,
+ char *errbuf)
+{
+ switch (err) {
- if (old_volsize == old_reservation &&
- nvlist_lookup_uint64(ret, zfs_prop_to_name(resv_prop),
- &new_reservation) != 0) {
- if (nvlist_add_uint64(ret,
- zfs_prop_to_name(resv_prop), intval) != 0) {
- (void) no_memory(hdl);
- goto error;
- }
+ case ENOSPC:
+ /*
+ * For quotas and reservations, ENOSPC indicates
+ * something different; setting a quota or reservation
+ * doesn't use any disk space.
+ */
+ switch (prop) {
+ case ZFS_PROP_QUOTA:
+ case ZFS_PROP_REFQUOTA:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "size is less than current used or "
+ "reserved space"));
+ (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
+ break;
+
+ case ZFS_PROP_RESERVATION:
+ case ZFS_PROP_REFRESERVATION:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "size is greater than available space"));
+ (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
+ break;
+
+ default:
+ (void) zfs_standard_error(hdl, err, errbuf);
+ break;
}
- }
- return (ret);
+ break;
-error:
- nvlist_free(ret);
- return (NULL);
+ case EBUSY:
+ (void) zfs_standard_error(hdl, EBUSY, errbuf);
+ break;
+
+ case EROFS:
+ (void) zfs_error(hdl, EZFS_DSREADONLY, errbuf);
+ break;
+
+ case ENOTSUP:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "pool and or dataset must be upgraded to set this "
+ "property or value"));
+ (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
+ break;
+
+ case ERANGE:
+ if (prop == ZFS_PROP_COMPRESSION) {
+ (void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "property setting is not allowed on "
+ "bootable datasets"));
+ (void) zfs_error(hdl, EZFS_NOTSUP, errbuf);
+ } else {
+ (void) zfs_standard_error(hdl, err, errbuf);
+ }
+ break;
+
+ case EINVAL:
+ if (prop == ZPROP_INVAL) {
+ (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+ } else {
+ (void) zfs_standard_error(hdl, err, errbuf);
+ }
+ break;
+
+ case EOVERFLOW:
+ /*
+ * This platform can't address a volume this big.
+ */
+#ifdef _ILP32
+ if (prop == ZFS_PROP_VOLSIZE) {
+ (void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf);
+ break;
+ }
+#endif
+ /* FALLTHROUGH */
+ default:
+ (void) zfs_standard_error(hdl, err, errbuf);
+ }
}
/*
@@ -1237,6 +1358,7 @@ zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)
zfs_prop_t prop;
boolean_t do_prefix;
uint64_t idx;
+ int added_resv;
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
@@ -1260,17 +1382,22 @@ zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)
/* We don't support those properties on FreeBSD. */
switch (prop) {
case ZFS_PROP_DEVICES:
- case ZFS_PROP_SHAREISCSI:
case ZFS_PROP_ISCSIOPTIONS:
case ZFS_PROP_XATTR:
case ZFS_PROP_VSCAN:
case ZFS_PROP_NBMAND:
+ case ZFS_PROP_MLSLABEL:
(void) snprintf(errbuf, sizeof (errbuf),
"property '%s' not supported on FreeBSD", propname);
ret = zfs_error(hdl, EZFS_PERM, errbuf);
goto error;
}
+ if (prop == ZFS_PROP_VOLSIZE) {
+ if ((added_resv = zfs_add_synthetic_resv(zhp, nvl)) == -1)
+ goto error;
+ }
+
if ((cl = changelist_gather(zhp, prop, 0, 0)) == NULL)
goto error;
@@ -1304,78 +1431,22 @@ zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)
ret = zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
if (ret != 0) {
- switch (errno) {
-
- case ENOSPC:
- /*
- * For quotas and reservations, ENOSPC indicates
- * something different; setting a quota or reservation
- * doesn't use any disk space.
- */
- switch (prop) {
- case ZFS_PROP_QUOTA:
- case ZFS_PROP_REFQUOTA:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "size is less than current used or "
- "reserved space"));
- (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
- break;
-
- case ZFS_PROP_RESERVATION:
- case ZFS_PROP_REFRESERVATION:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "size is greater than available space"));
- (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
- break;
-
- default:
- (void) zfs_standard_error(hdl, errno, errbuf);
- break;
- }
- break;
-
- case EBUSY:
- if (prop == ZFS_PROP_VOLBLOCKSIZE)
- (void) zfs_error(hdl, EZFS_VOLHASDATA, errbuf);
- else
- (void) zfs_standard_error(hdl, EBUSY, errbuf);
- break;
-
- case EROFS:
- (void) zfs_error(hdl, EZFS_DSREADONLY, errbuf);
- break;
-
- case ENOTSUP:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "pool and or dataset must be upgraded to set this "
- "property or value"));
- (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
- break;
-
- case ERANGE:
- if (prop == ZFS_PROP_COMPRESSION) {
- (void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "property setting is not allowed on "
- "bootable datasets"));
- (void) zfs_error(hdl, EZFS_NOTSUP, errbuf);
- } else {
- (void) zfs_standard_error(hdl, errno, errbuf);
- }
- break;
-
- case EOVERFLOW:
- /*
- * This platform can't address a volume this big.
- */
-#ifdef _ILP32
- if (prop == ZFS_PROP_VOLSIZE) {
- (void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf);
- break;
- }
-#endif
- /* FALLTHROUGH */
- default:
- (void) zfs_standard_error(hdl, errno, errbuf);
+ zfs_setprop_error(hdl, prop, errno, errbuf);
+ if (added_resv && errno == ENOSPC) {
+ /* clean up the volsize property we tried to set */
+ uint64_t old_volsize = zfs_prop_get_int(zhp,
+ ZFS_PROP_VOLSIZE);
+ nvlist_free(nvl);
+ zcmd_free_nvlists(&zc);
+ if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
+ goto error;
+ if (nvlist_add_uint64(nvl,
+ zfs_prop_to_name(ZFS_PROP_VOLSIZE),
+ old_volsize) != 0)
+ goto error;
+ if (zcmd_write_src_nvlist(hdl, &zc, nvl) != 0)
+ goto error;
+ (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
}
} else {
if (do_prefix)
@@ -1398,10 +1469,11 @@ error:
}
/*
- * Given a property, inherit the value from the parent dataset.
+ * Given a property, inherit the value from the parent dataset, or if received
+ * is TRUE, revert to the received value, if any.
*/
int
-zfs_prop_inherit(zfs_handle_t *zhp, const char *propname)
+zfs_prop_inherit(zfs_handle_t *zhp, const char *propname, boolean_t received)
{
zfs_cmd_t zc = { 0 };
int ret;
@@ -1413,6 +1485,7 @@ zfs_prop_inherit(zfs_handle_t *zhp, const char *propname)
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot inherit %s for '%s'"), propname, zhp->zfs_name);
+ zc.zc_cookie = received;
if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
/*
* For user properties, the amount of work we have to do is very
@@ -1439,7 +1512,7 @@ zfs_prop_inherit(zfs_handle_t *zhp, const char *propname)
if (zfs_prop_readonly(prop))
return (zfs_error(hdl, EZFS_PROPREADONLY, errbuf));
- if (!zfs_prop_inheritable(prop))
+ if (!zfs_prop_inheritable(prop) && !received)
return (zfs_error(hdl, EZFS_PROPNONINHERIT, errbuf));
/*
@@ -1449,7 +1522,7 @@ zfs_prop_inherit(zfs_handle_t *zhp, const char *propname)
return (zfs_error(hdl, EZFS_PROPTYPE, errbuf));
/*
- * Normalize the name, to get rid of shorthand abbrevations.
+ * Normalize the name, to get rid of shorthand abbreviations.
*/
propname = zfs_prop_to_name(prop);
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
@@ -1544,6 +1617,26 @@ getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
return (value);
}
+static boolean_t
+zfs_is_recvd_props_mode(zfs_handle_t *zhp)
+{
+ return (zhp->zfs_props == zhp->zfs_recvd_props);
+}
+
+static void
+zfs_set_recvd_props_mode(zfs_handle_t *zhp, uint64_t *cookie)
+{
+ *cookie = (uint64_t)(uintptr_t)zhp->zfs_props;
+ zhp->zfs_props = zhp->zfs_recvd_props;
+}
+
+static void
+zfs_unset_recvd_props_mode(zfs_handle_t *zhp, uint64_t *cookie)
+{
+ zhp->zfs_props = (nvlist_t *)(uintptr_t)*cookie;
+ *cookie = 0;
+}
+
/*
* Internal function for getting a numeric property. Both zfs_prop_get() and
* zfs_prop_get_int() are built using this interface.
@@ -1562,6 +1655,7 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
struct mnttab mnt;
char *mntopt_on = NULL;
char *mntopt_off = NULL;
+ boolean_t received = zfs_is_recvd_props_mode(zhp);
*source = NULL;
@@ -1637,6 +1731,9 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
case ZFS_PROP_NBMAND:
*val = getprop_uint64(zhp, prop, source);
+ if (received)
+ break;
+
if (hasmntopt(&mnt, mntopt_on) && !*val) {
*val = B_TRUE;
if (src)
@@ -1649,22 +1746,17 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
break;
case ZFS_PROP_CANMOUNT:
- *val = getprop_uint64(zhp, prop, source);
- if (*val != ZFS_CANMOUNT_ON)
- *source = zhp->zfs_name;
- else
- *source = ""; /* default */
- break;
-
+ case ZFS_PROP_VOLSIZE:
case ZFS_PROP_QUOTA:
case ZFS_PROP_REFQUOTA:
case ZFS_PROP_RESERVATION:
case ZFS_PROP_REFRESERVATION:
*val = getprop_uint64(zhp, prop, source);
- if (*val == 0)
- *source = ""; /* default */
- else
+
+ if (*source == NULL) {
+ /* not default, must be local */
*source = zhp->zfs_name;
+ }
break;
case ZFS_PROP_MOUNTED:
@@ -1685,21 +1777,13 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_ZPLPROPS, &zc)) {
zcmd_free_nvlists(&zc);
- zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
- "unable to get %s property"),
- zfs_prop_to_name(prop));
- return (zfs_error(zhp->zfs_hdl, EZFS_BADVERSION,
- dgettext(TEXT_DOMAIN, "internal error")));
+ return (-1);
}
if (zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &zplprops) != 0 ||
nvlist_lookup_uint64(zplprops, zfs_prop_to_name(prop),
val) != 0) {
zcmd_free_nvlists(&zc);
- zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
- "unable to get %s property"),
- zfs_prop_to_name(prop));
- return (zfs_error(zhp->zfs_hdl, EZFS_NOMEM,
- dgettext(TEXT_DOMAIN, "internal error")));
+ return (-1);
}
if (zplprops)
nvlist_free(zplprops);
@@ -1714,11 +1798,11 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
/*
* If we tried to use a default value for a
* readonly property, it means that it was not
- * present; return an error.
+ * present.
*/
if (zfs_prop_readonly(prop) &&
- *source && (*source)[0] == '\0') {
- return (-1);
+ *source != NULL && (*source)[0] == '\0') {
+ *source = NULL;
}
break;
@@ -1748,6 +1832,8 @@ get_source(zfs_handle_t *zhp, zprop_source_t *srctype, char *source,
*srctype = ZPROP_SRC_NONE;
} else if (source[0] == '\0') {
*srctype = ZPROP_SRC_DEFAULT;
+ } else if (strstr(source, ZPROP_SOURCE_VAL_RECVD) != NULL) {
+ *srctype = ZPROP_SRC_RECEIVED;
} else {
if (strcmp(source, zhp->zfs_name) == 0) {
*srctype = ZPROP_SRC_LOCAL;
@@ -1759,6 +1845,43 @@ get_source(zfs_handle_t *zhp, zprop_source_t *srctype, char *source,
}
+int
+zfs_prop_get_recvd(zfs_handle_t *zhp, const char *propname, char *propbuf,
+ size_t proplen, boolean_t literal)
+{
+ zfs_prop_t prop;
+ int err = 0;
+
+ if (zhp->zfs_recvd_props == NULL)
+ if (get_recvd_props_ioctl(zhp) != 0)
+ return (-1);
+
+ prop = zfs_name_to_prop(propname);
+
+ if (prop != ZPROP_INVAL) {
+ uint64_t cookie;
+ if (!nvlist_exists(zhp->zfs_recvd_props, propname))
+ return (-1);
+ zfs_set_recvd_props_mode(zhp, &cookie);
+ err = zfs_prop_get(zhp, prop, propbuf, proplen,
+ NULL, NULL, 0, literal);
+ zfs_unset_recvd_props_mode(zhp, &cookie);
+ } else if (zfs_prop_userquota(propname)) {
+ return (-1);
+ } else {
+ nvlist_t *propval;
+ char *recvdval;
+ if (nvlist_lookup_nvlist(zhp->zfs_recvd_props,
+ propname, &propval) != 0)
+ return (-1);
+ verify(nvlist_lookup_string(propval, ZPROP_VALUE,
+ &recvdval) == 0);
+ (void) strlcpy(propbuf, recvdval, proplen);
+ }
+
+ return (err == 0 ? 0 : -1);
+}
+
/*
* Retrieve a property from the given object. If 'literal' is specified, then
* numbers are left as exact values. Otherwise, numbers are converted to a
@@ -1774,6 +1897,7 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
uint64_t val;
char *str;
const char *strval;
+ boolean_t received = zfs_is_recvd_props_mode(zhp);
/*
* Check to see if this property applies to our object
@@ -1781,6 +1905,9 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
return (-1);
+ if (received && zfs_prop_readonly(prop))
+ return (-1);
+
if (src)
*src = ZPROP_SRC_NONE;
@@ -1820,10 +1947,22 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
if (str[0] == '/') {
char buf[MAXPATHLEN];
char *root = buf;
- const char *relpath = zhp->zfs_name + strlen(source);
+ const char *relpath;
- if (relpath[0] == '/')
- relpath++;
+ /*
+ * If we inherit the mountpoint, even from a dataset
+ * with a received value, the source will be the path of
+ * the dataset we inherit from. If source is
+ * ZPROP_SOURCE_VAL_RECVD, the received value is not
+ * inherited.
+ */
+ if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0) {
+ relpath = "";
+ } else {
+ relpath = zhp->zfs_name + strlen(source);
+ if (relpath[0] == '/')
+ relpath++;
+ }
if ((zpool_get_prop(zhp->zpool_hdl,
ZPOOL_PROP_ALTROOT, buf, MAXPATHLEN, NULL)) ||
@@ -1902,8 +2041,9 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
case ZFS_PROP_COMPRESSRATIO:
if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
return (-1);
- (void) snprintf(propbuf, proplen, "%lld.%02lldx", (longlong_t)
- val / 100, (longlong_t)val % 100);
+ (void) snprintf(propbuf, proplen, "%llu.%02llux",
+ (u_longlong_t)(val / 100),
+ (u_longlong_t)(val % 100));
break;
case ZFS_PROP_TYPE:
@@ -1948,6 +2088,48 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
(void) strlcpy(propbuf, zhp->zfs_name, proplen);
break;
+ case ZFS_PROP_MLSLABEL:
+ {
+#ifdef sun
+ m_label_t *new_sl = NULL;
+ char *ascii = NULL; /* human readable label */
+
+ (void) strlcpy(propbuf,
+ getprop_string(zhp, prop, &source), proplen);
+
+ if (literal || (strcasecmp(propbuf,
+ ZFS_MLSLABEL_DEFAULT) == 0))
+ break;
+
+ /*
+ * Try to translate the internal hex string to
+ * human-readable output. If there are any
+ * problems just use the hex string.
+ */
+
+ if (str_to_label(propbuf, &new_sl, MAC_LABEL,
+ L_NO_CORRECTION, NULL) == -1) {
+ m_label_free(new_sl);
+ break;
+ }
+
+ if (label_to_str(new_sl, &ascii, M_LABEL,
+ DEF_NAMES) != 0) {
+ if (ascii)
+ free(ascii);
+ m_label_free(new_sl);
+ break;
+ }
+ m_label_free(new_sl);
+
+ (void) strlcpy(propbuf, ascii, proplen);
+ free(ascii);
+#else /* !sun */
+ propbuf[0] = '\0';
+#endif /* !sun */
+ }
+ break;
+
default:
switch (zfs_prop_get_type(prop)) {
case PROP_TYPE_NUMBER:
@@ -2044,14 +2226,11 @@ idmap_id_to_numeric_domain_rid(uid_t id, boolean_t isuser,
char **domainp, idmap_rid_t *ridp)
{
#ifdef sun
- idmap_handle_t *idmap_hdl = NULL;
idmap_get_handle_t *get_hdl = NULL;
idmap_stat status;
int err = EINVAL;
- if (idmap_init(&idmap_hdl) != IDMAP_SUCCESS)
- goto out;
- if (idmap_get_create(idmap_hdl, &get_hdl) != IDMAP_SUCCESS)
+ if (idmap_get_create(&get_hdl) != IDMAP_SUCCESS)
goto out;
if (isuser) {
@@ -2070,29 +2249,12 @@ idmap_id_to_numeric_domain_rid(uid_t id, boolean_t isuser,
out:
if (get_hdl)
idmap_get_destroy(get_hdl);
- if (idmap_hdl)
- (void) idmap_fini(idmap_hdl);
return (err);
#else /* !sun */
assert(!"invalid code path");
#endif /* !sun */
}
-#ifndef sun
-/* Check if a string contains only digits */
-static int
-string_is_digits(char *cp)
-{
- int i;
-
- for(i = 0; i < strlen(cp); i++)
- if(!isdigit(cp[i]))
- return (0);
- return (1);
-}
-
-#endif /* !sun */
-
/*
* convert the propname into parameters needed by kernel
* Eg: userquota@ahrens -> ZFS_PROP_USERQUOTA, "", 126829
@@ -2131,7 +2293,6 @@ userquota_propname_decode(const char *propname, boolean_t zoned,
* turned into S-1-domainID-RID.
*/
directory_error_t e;
-
if (zoned && getzoneid() == GLOBAL_ZONEID)
return (ENOENT);
if (isuser) {
@@ -2150,7 +2311,7 @@ userquota_propname_decode(const char *propname, boolean_t zoned,
cp = numericsid;
/* will be further decoded below */
#else /* !sun */
- return (ENOENT);
+ return (ENOENT);
#endif /* !sun */
}
@@ -2169,15 +2330,7 @@ userquota_propname_decode(const char *propname, boolean_t zoned,
}
if (errno != 0 || *end != '\0')
return (EINVAL);
-#ifdef sun
} else if (!isdigit(*cp)) {
-#else /* sun */
- /*
- * In FreeBSD user and group names can begin with a digit so treat
- * as a uid/gid if string contains digits only
- */
- } else if (!string_is_digits(cp)) {
-#endif /* sun */
/*
* It's a user/group name (eg "user") that needs to be
* turned into a uid/gid
@@ -2309,13 +2462,6 @@ top:
(void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name));
rc = ioctl(zhp->zfs_hdl->libzfs_fd, arg, zc);
- /*
- * FreeBSD compatibility with pre-v15 kernel module.
- * Ignore private dataset names.
- */
- if (strchr(zc->zc_name, '$') != NULL)
- rc = 0;
-
if (rc == -1) {
switch (errno) {
case ENOMEM:
@@ -2363,14 +2509,6 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_DATASET_LIST_NEXT,
&zc)) == 0) {
-
- /*
- * FreeBSD compatibility with pre-v15 kernel module.
- * Ignore private dataset names.
- */
- if (strchr(zc.zc_name, '$') != NULL)
- continue;
-
/*
* Silently ignore errors, as the only plausible explanation is
* that the pool has since been removed.
@@ -2407,13 +2545,6 @@ zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data)
while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_SNAPSHOT_LIST_NEXT,
&zc)) == 0) {
- /*
- * FreeBSD compatibility with pre-v15 kernel module.
- * Ignore private dataset names.
- */
- if (strchr(zc.zc_name, '$') != NULL)
- continue;
-
if ((nzhp = make_dataset_handle_zc(zhp->zfs_hdl,
&zc)) == NULL) {
continue;
@@ -2443,6 +2574,27 @@ zfs_iter_children(zfs_handle_t *zhp, zfs_iter_f func, void *data)
}
/*
+ * Is one dataset name a child dataset of another?
+ *
+ * Needs to handle these cases:
+ * Dataset 1 "a/foo" "a/foo" "a/foo" "a/foo"
+ * Dataset 2 "a/fo" "a/foobar" "a/bar/baz" "a/foo/bar"
+ * Descendant? No. No. No. Yes.
+ */
+static boolean_t
+is_descendant(const char *ds1, const char *ds2)
+{
+ size_t d1len = strlen(ds1);
+
+ /* ds2 can't be a descendant if it's smaller */
+ if (strlen(ds2) < d1len)
+ return (B_FALSE);
+
+ /* otherwise, compare strings and verify that there's a '/' char */
+ return (ds2[d1len] == '/' && (strncmp(ds1, ds2, d1len) == 0));
+}
+
+/*
* Given a complete name, return just the portion that refers to the parent.
* Can return NULL if this is a pool.
*/
@@ -2477,6 +2629,7 @@ check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned,
char *slash;
zfs_handle_t *zhp;
char errbuf[1024];
+ uint64_t is_zoned;
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot create '%s'"), path);
@@ -2519,9 +2672,12 @@ check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned,
return (zfs_standard_error(hdl, errno, errbuf));
}
- *zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
+ is_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
+ if (zoned != NULL)
+ *zoned = is_zoned;
+
/* we are in a non-global zone, but parent is in the global zone */
- if (getzoneid() != GLOBAL_ZONEID && !(*zoned)) {
+ if (getzoneid() != GLOBAL_ZONEID && !is_zoned) {
(void) zfs_standard_error(hdl, EPERM, errbuf);
zfs_close(zhp);
return (-1);
@@ -2653,11 +2809,10 @@ int
zfs_create_ancestors(libzfs_handle_t *hdl, const char *path)
{
int prefix;
- uint64_t zoned;
char *path_copy;
int rc;
- if (check_parents(hdl, path, &zoned, B_TRUE, &prefix) != 0)
+ if (check_parents(hdl, path, NULL, B_TRUE, &prefix) != 0)
return (-1);
if ((path_copy = strdup(path)) != NULL) {
@@ -2771,18 +2926,6 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
/* create the dataset */
ret = zfs_ioctl(hdl, ZFS_IOC_CREATE, &zc);
- if (ret == 0 && type == ZFS_TYPE_VOLUME) {
- ret = zvol_create_link(hdl, path);
- if (ret) {
- (void) zfs_standard_error(hdl, errno,
- dgettext(TEXT_DOMAIN,
- "Volume successfully created, but device links "
- "were not created"));
- zcmd_free_nvlists(&zc);
- return (-1);
- }
- }
-
zcmd_free_nvlists(&zc);
/* check for failure */
@@ -2838,30 +2981,19 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
* isn't mounted, and that there are no active dependents.
*/
int
-zfs_destroy(zfs_handle_t *zhp)
+zfs_destroy(zfs_handle_t *zhp, boolean_t defer)
{
zfs_cmd_t zc = { 0 };
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
if (ZFS_IS_VOLUME(zhp)) {
- /*
- * If user doesn't have permissions to unshare volume, then
- * abort the request. This would only happen for a
- * non-privileged user.
- */
- if (zfs_unshare_iscsi(zhp) != 0) {
- return (-1);
- }
-
- if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
- return (-1);
-
zc.zc_objset_type = DMU_OST_ZVOL;
} else {
zc.zc_objset_type = DMU_OST_ZFS;
}
+ zc.zc_defer_destroy = defer;
if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY, &zc) != 0) {
return (zfs_standard_error_fmt(zhp->zfs_hdl, errno,
dgettext(TEXT_DOMAIN, "cannot destroy '%s'"),
@@ -2880,13 +3012,13 @@ struct destroydata {
};
static int
-zfs_remove_link_cb(zfs_handle_t *zhp, void *arg)
+zfs_check_snap_cb(zfs_handle_t *zhp, void *arg)
{
struct destroydata *dd = arg;
zfs_handle_t *szhp;
char name[ZFS_MAXNAMELEN];
boolean_t closezhp = dd->closezhp;
- int rv;
+ int rv = 0;
(void) strlcpy(name, zhp->zfs_name, sizeof (name));
(void) strlcat(name, "@", sizeof (name));
@@ -2898,17 +3030,9 @@ zfs_remove_link_cb(zfs_handle_t *zhp, void *arg)
zfs_close(szhp);
}
- if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
- (void) zvol_remove_link(zhp->zfs_hdl, name);
- /*
- * NB: this is simply a best-effort. We don't want to
- * return an error, because then we wouldn't visit all
- * the volumes.
- */
- }
-
dd->closezhp = B_TRUE;
- rv = zfs_iter_filesystems(zhp, zfs_remove_link_cb, arg);
+ if (!dd->gotone)
+ rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, arg);
if (closezhp)
zfs_close(zhp);
return (rv);
@@ -2918,14 +3042,14 @@ zfs_remove_link_cb(zfs_handle_t *zhp, void *arg)
* Destroys all snapshots with the given name in zhp & descendants.
*/
int
-zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname)
+zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer)
{
zfs_cmd_t zc = { 0 };
int ret;
struct destroydata dd = { 0 };
dd.snapname = snapname;
- (void) zfs_remove_link_cb(zhp, &dd);
+ (void) zfs_check_snap_cb(zhp, &dd);
if (!dd.gotone) {
return (zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT,
@@ -2935,6 +3059,7 @@ zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname)
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
+ zc.zc_defer_destroy = defer;
ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY_SNAPS, &zc);
if (ret != 0) {
@@ -3042,70 +3167,11 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
return (zfs_standard_error(zhp->zfs_hdl, errno,
errbuf));
}
- } else if (ZFS_IS_VOLUME(zhp)) {
- ret = zvol_create_link(zhp->zfs_hdl, target);
}
return (ret);
}
-typedef struct promote_data {
- char cb_mountpoint[MAXPATHLEN];
- const char *cb_target;
- const char *cb_errbuf;
- uint64_t cb_pivot_txg;
-} promote_data_t;
-
-static int
-promote_snap_cb(zfs_handle_t *zhp, void *data)
-{
- promote_data_t *pd = data;
- zfs_handle_t *szhp;
- char snapname[MAXPATHLEN];
- int rv = 0;
-
- /* We don't care about snapshots after the pivot point */
- if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > pd->cb_pivot_txg) {
- zfs_close(zhp);
- return (0);
- }
-
- /* Remove the device link if it's a zvol. */
- if (ZFS_IS_VOLUME(zhp))
- (void) zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name);
-
- /* Check for conflicting names */
- (void) strlcpy(snapname, pd->cb_target, sizeof (snapname));
- (void) strlcat(snapname, strchr(zhp->zfs_name, '@'), sizeof (snapname));
- szhp = make_dataset_handle(zhp->zfs_hdl, snapname);
- if (szhp != NULL) {
- zfs_close(szhp);
- zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
- "snapshot name '%s' from origin \n"
- "conflicts with '%s' from target"),
- zhp->zfs_name, snapname);
- rv = zfs_error(zhp->zfs_hdl, EZFS_EXISTS, pd->cb_errbuf);
- }
- zfs_close(zhp);
- return (rv);
-}
-
-static int
-promote_snap_done_cb(zfs_handle_t *zhp, void *data)
-{
- promote_data_t *pd = data;
-
- /* We don't care about snapshots after the pivot point */
- if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) <= pd->cb_pivot_txg) {
- /* Create the device link if it's a zvol. */
- if (ZFS_IS_VOLUME(zhp))
- (void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
- }
-
- zfs_close(zhp);
- return (0);
-}
-
/*
* Promotes the given clone fs to be the clone parent.
*/
@@ -3115,10 +3181,7 @@ zfs_promote(zfs_handle_t *zhp)
libzfs_handle_t *hdl = zhp->zfs_hdl;
zfs_cmd_t zc = { 0 };
char parent[MAXPATHLEN];
- char *cp;
int ret;
- zfs_handle_t *pzhp;
- promote_data_t pd;
char errbuf[1024];
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
@@ -3136,29 +3199,7 @@ zfs_promote(zfs_handle_t *zhp)
"not a cloned filesystem"));
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
}
- cp = strchr(parent, '@');
- *cp = '\0';
- /* Walk the snapshots we will be moving */
- pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
- if (pzhp == NULL)
- return (-1);
- pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG);
- zfs_close(pzhp);
- pd.cb_target = zhp->zfs_name;
- pd.cb_errbuf = errbuf;
- pzhp = zfs_open(hdl, parent, ZFS_TYPE_DATASET);
- if (pzhp == NULL)
- return (-1);
- (void) zfs_prop_get(pzhp, ZFS_PROP_MOUNTPOINT, pd.cb_mountpoint,
- sizeof (pd.cb_mountpoint), NULL, NULL, 0, FALSE);
- ret = zfs_iter_snapshots(pzhp, promote_snap_cb, &pd);
- if (ret != 0) {
- zfs_close(pzhp);
- return (-1);
- }
-
- /* issue the ioctl */
(void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_origin,
sizeof (zc.zc_value));
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
@@ -3167,62 +3208,18 @@ zfs_promote(zfs_handle_t *zhp)
if (ret != 0) {
int save_errno = errno;
- (void) zfs_iter_snapshots(pzhp, promote_snap_done_cb, &pd);
- zfs_close(pzhp);
-
switch (save_errno) {
case EEXIST:
- /*
- * There is a conflicting snapshot name. We
- * should have caught this above, but they could
- * have renamed something in the mean time.
- */
+ /* There is a conflicting snapshot name. */
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "conflicting snapshot name from parent '%s'"),
- parent);
+ "conflicting snapshot '%s' from parent '%s'"),
+ zc.zc_string, parent);
return (zfs_error(hdl, EZFS_EXISTS, errbuf));
default:
return (zfs_standard_error(hdl, save_errno, errbuf));
}
- } else {
- (void) zfs_iter_snapshots(zhp, promote_snap_done_cb, &pd);
- }
-
- zfs_close(pzhp);
- return (ret);
-}
-
-struct createdata {
- const char *cd_snapname;
- int cd_ifexists;
-};
-
-static int
-zfs_create_link_cb(zfs_handle_t *zhp, void *arg)
-{
- struct createdata *cd = arg;
- int ret;
-
- if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
- char name[MAXPATHLEN];
-
- (void) strlcpy(name, zhp->zfs_name, sizeof (name));
- (void) strlcat(name, "@", sizeof (name));
- (void) strlcat(name, cd->cd_snapname, sizeof (name));
- (void) zvol_create_link_common(zhp->zfs_hdl, name,
- cd->cd_ifexists);
- /*
- * NB: this is simply a best-effort. We don't want to
- * return an error, because then we wouldn't visit all
- * the volumes.
- */
}
-
- ret = zfs_iter_filesystems(zhp, zfs_create_link_cb, cd);
-
- zfs_close(zhp);
-
return (ret);
}
@@ -3286,31 +3283,11 @@ zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive,
* if it was recursive, the one that actually failed will be in
* zc.zc_name.
*/
- if (ret != 0)
+ if (ret != 0) {
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value);
-
- if (ret == 0 && recursive) {
- struct createdata cd;
-
- cd.cd_snapname = delim + 1;
- cd.cd_ifexists = B_FALSE;
- (void) zfs_iter_filesystems(zhp, zfs_create_link_cb, &cd);
- }
- if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) {
- ret = zvol_create_link(zhp->zfs_hdl, path);
- if (ret != 0) {
- (void) zfs_standard_error(hdl, errno,
- dgettext(TEXT_DOMAIN,
- "Volume successfully snapshotted, but device links "
- "were not created"));
- zfs_close(zhp);
- return (-1);
- }
- }
-
- if (ret != 0)
(void) zfs_standard_error(hdl, errno, errbuf);
+ }
zfs_close(zhp);
@@ -3350,7 +3327,7 @@ rollback_destroy(zfs_handle_t *zhp, void *data)
logstr = zhp->zfs_hdl->libzfs_log_str;
zhp->zfs_hdl->libzfs_log_str = NULL;
- cbp->cb_error |= zfs_destroy(zhp);
+ cbp->cb_error |= zfs_destroy(zhp, B_FALSE);
zhp->zfs_hdl->libzfs_log_str = logstr;
}
} else {
@@ -3364,7 +3341,7 @@ rollback_destroy(zfs_handle_t *zhp, void *data)
zfs_close(zhp);
return (0);
}
- if (zfs_destroy(zhp) != 0)
+ if (zfs_destroy(zhp, B_FALSE) != 0)
cbp->cb_error = B_TRUE;
else
changelist_remove(clp, zhp->zfs_name);
@@ -3413,8 +3390,6 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force)
*/
if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
- if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
- return (-1);
if (zfs_which_resv_prop(zhp, &resv_prop) < 0)
return (-1);
old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
@@ -3452,10 +3427,6 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force)
*/
if ((zhp->zfs_type == ZFS_TYPE_VOLUME) &&
(zhp = make_dataset_handle(zhp->zfs_hdl, zhp->zfs_name))) {
- if (err = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name)) {
- zfs_close(zhp);
- return (err);
- }
if (restore_resv) {
new_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
if (old_volsize != new_volsize)
@@ -3570,14 +3541,11 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE))
return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
- uint64_t unused;
/* validate parents */
- if (check_parents(hdl, target, &unused, B_FALSE, NULL) != 0)
+ if (check_parents(hdl, target, NULL, B_FALSE, NULL) != 0)
return (-1);
- (void) parent_name(target, parent, sizeof (parent));
-
/* make sure we're in the same pool */
verify((delim = strchr(target, '/')) != NULL);
if (strncmp(zhp->zfs_name, target, delim - target) != 0 ||
@@ -3588,10 +3556,9 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
}
/* new name cannot be a child of the current dataset name */
- if (strncmp(parent, zhp->zfs_name,
- strlen(zhp->zfs_name)) == 0) {
+ if (is_descendant(zhp->zfs_name, target)) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "New dataset name cannot be a descendent of "
+ "New dataset name cannot be a descendant of "
"current dataset name"));
return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
}
@@ -3608,7 +3575,6 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
}
if (recursive) {
- struct destroydata dd;
parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name);
if (parentname == NULL) {
@@ -3623,15 +3589,6 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
goto error;
}
- dd.snapname = delim + 1;
- dd.gotone = B_FALSE;
- dd.closezhp = B_TRUE;
-
- /* We remove any zvol links prior to renaming them */
- ret = zfs_iter_filesystems(zhrp, zfs_remove_link_cb, &dd);
- if (ret) {
- goto error;
- }
} else {
if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0)) == NULL)
return (-1);
@@ -3679,27 +3636,10 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
* On failure, we still want to remount any filesystems that
* were previously mounted, so we don't alter the system state.
*/
- if (recursive) {
- struct createdata cd;
-
- /* only create links for datasets that had existed */
- cd.cd_snapname = delim + 1;
- cd.cd_ifexists = B_TRUE;
- (void) zfs_iter_filesystems(zhrp, zfs_create_link_cb,
- &cd);
- } else {
+ if (!recursive)
(void) changelist_postfix(cl);
- }
} else {
- if (recursive) {
- struct createdata cd;
-
- /* only create links for datasets that had existed */
- cd.cd_snapname = strchr(target, '@') + 1;
- cd.cd_ifexists = B_TRUE;
- ret = zfs_iter_filesystems(zhrp, zfs_create_link_cb,
- &cd);
- } else {
+ if (!recursive) {
changelist_rename(cl, zfs_get_name(zhp), target);
ret = changelist_postfix(cl);
}
@@ -3718,147 +3658,19 @@ error:
return (ret);
}
-/*
- * Given a zvol dataset, issue the ioctl to create the appropriate minor node,
- * poke devfsadm to create the /dev link, and then wait for the link to appear.
- */
-int
-zvol_create_link(libzfs_handle_t *hdl, const char *dataset)
-{
- return (zvol_create_link_common(hdl, dataset, B_FALSE));
-}
-
-static int
-zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists)
-{
- zfs_cmd_t zc = { 0 };
-#if 0
- di_devlink_handle_t dhdl;
- priv_set_t *priv_effective;
- int privileged;
-#endif
-
- (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
-
- /*
- * Issue the appropriate ioctl.
- */
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE_MINOR, &zc) != 0) {
- switch (errno) {
- case EEXIST:
- /*
- * Silently ignore the case where the link already
- * exists. This allows 'zfs volinit' to be run multiple
- * times without errors.
- */
- return (0);
-
- case ENOENT:
- /*
- * Dataset does not exist in the kernel. If we
- * don't care (see zfs_rename), then ignore the
- * error quietly.
- */
- if (ifexists) {
- return (0);
- }
-
- /* FALLTHROUGH */
-
- default:
- return (zfs_standard_error_fmt(hdl, errno,
- dgettext(TEXT_DOMAIN, "cannot create device links "
- "for '%s'"), dataset));
- }
- }
-
-#if 0
- /*
- * If privileged call devfsadm and wait for the links to
- * magically appear.
- * Otherwise, print out an informational message.
- */
-
- priv_effective = priv_allocset();
- (void) getppriv(PRIV_EFFECTIVE, priv_effective);
- privileged = (priv_isfullset(priv_effective) == B_TRUE);
- priv_freeset(priv_effective);
-
- if (privileged) {
- if ((dhdl = di_devlink_init(ZFS_DRIVER,
- DI_MAKE_LINK)) == NULL) {
- zfs_error_aux(hdl, strerror(errno));
- (void) zfs_error_fmt(hdl, errno,
- dgettext(TEXT_DOMAIN, "cannot create device links "
- "for '%s'"), dataset);
- (void) ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc);
- return (-1);
- } else {
- (void) di_devlink_fini(&dhdl);
- }
- } else {
- char pathname[MAXPATHLEN];
- struct stat64 statbuf;
- int i;
-
-#define MAX_WAIT 10
-
- /*
- * This is the poor mans way of waiting for the link
- * to show up. If after 10 seconds we still don't
- * have it, then print out a message.
- */
- (void) snprintf(pathname, sizeof (pathname), "/dev/zvol/dsk/%s",
- dataset);
-
- for (i = 0; i != MAX_WAIT; i++) {
- if (stat64(pathname, &statbuf) == 0)
- break;
- (void) sleep(1);
- }
- if (i == MAX_WAIT)
- (void) printf(gettext("%s may not be immediately "
- "available\n"), pathname);
- }
-#endif
-
- return (0);
-}
-
-/*
- * Remove a minor node for the given zvol and the associated /dev links.
- */
-int
-zvol_remove_link(libzfs_handle_t *hdl, const char *dataset)
+nvlist_t *
+zfs_get_user_props(zfs_handle_t *zhp)
{
- zfs_cmd_t zc = { 0 };
-
- (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
-
- if (ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc) != 0) {
- switch (errno) {
- case ENXIO:
- /*
- * Silently ignore the case where the link no longer
- * exists, so that 'zfs volfini' can be run multiple
- * times without errors.
- */
- return (0);
-
- default:
- return (zfs_standard_error_fmt(hdl, errno,
- dgettext(TEXT_DOMAIN, "cannot remove device "
- "links for '%s'"), dataset));
- }
- }
-
- return (0);
+ return (zhp->zfs_user_props);
}
nvlist_t *
-zfs_get_user_props(zfs_handle_t *zhp)
+zfs_get_recvd_props(zfs_handle_t *zhp)
{
- return (zhp->zfs_user_props);
+ if (zhp->zfs_recvd_props == NULL)
+ if (get_recvd_props_ioctl(zhp) != 0)
+ return (NULL);
+ return (zhp->zfs_recvd_props);
}
/*
@@ -3870,10 +3682,12 @@ zfs_get_user_props(zfs_handle_t *zhp)
* for new unique user properties and add them to the list.
*
* - For non fixed-width properties, keep track of the maximum width seen
- * so that we can size the column appropriately.
+ * so that we can size the column appropriately. If the user has
+ * requested received property values, we also need to compute the width
+ * of the RECEIVED column.
*/
int
-zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp)
+zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received)
{
libzfs_handle_t *hdl = zhp->zfs_hdl;
zprop_list_t *entry;
@@ -3944,66 +3758,30 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp)
if (strlen(buf) > entry->pl_width)
entry->pl_width = strlen(buf);
}
- } else if (nvlist_lookup_nvlist(userprops,
- entry->pl_user_prop, &propval) == 0) {
- verify(nvlist_lookup_string(propval,
- ZPROP_VALUE, &strval) == 0);
- if (strlen(strval) > entry->pl_width)
- entry->pl_width = strlen(strval);
+ if (received && zfs_prop_get_recvd(zhp,
+ zfs_prop_to_name(entry->pl_prop),
+ buf, sizeof (buf), B_FALSE) == 0)
+ if (strlen(buf) > entry->pl_recvd_width)
+ entry->pl_recvd_width = strlen(buf);
+ } else {
+ if (nvlist_lookup_nvlist(userprops, entry->pl_user_prop,
+ &propval) == 0) {
+ verify(nvlist_lookup_string(propval,
+ ZPROP_VALUE, &strval) == 0);
+ if (strlen(strval) > entry->pl_width)
+ entry->pl_width = strlen(strval);
+ }
+ if (received && zfs_prop_get_recvd(zhp,
+ entry->pl_user_prop,
+ buf, sizeof (buf), B_FALSE) == 0)
+ if (strlen(buf) > entry->pl_recvd_width)
+ entry->pl_recvd_width = strlen(buf);
}
}
return (0);
}
-#ifdef TODO
-int
-zfs_iscsi_perm_check(libzfs_handle_t *hdl, char *dataset, ucred_t *cred)
-{
- zfs_cmd_t zc = { 0 };
- nvlist_t *nvp;
- gid_t gid;
- uid_t uid;
- const gid_t *groups;
- int group_cnt;
- int error;
-
- if (nvlist_alloc(&nvp, NV_UNIQUE_NAME, 0) != 0)
- return (no_memory(hdl));
-
- uid = ucred_geteuid(cred);
- gid = ucred_getegid(cred);
- group_cnt = ucred_getgroups(cred, &groups);
-
- if (uid == (uid_t)-1 || gid == (uid_t)-1 || group_cnt == (uid_t)-1)
- return (1);
-
- if (nvlist_add_uint32(nvp, ZFS_DELEG_PERM_UID, uid) != 0) {
- nvlist_free(nvp);
- return (1);
- }
-
- if (nvlist_add_uint32(nvp, ZFS_DELEG_PERM_GID, gid) != 0) {
- nvlist_free(nvp);
- return (1);
- }
-
- if (nvlist_add_uint32_array(nvp,
- ZFS_DELEG_PERM_GROUPS, (uint32_t *)groups, group_cnt) != 0) {
- nvlist_free(nvp);
- return (1);
- }
- (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
-
- if (zcmd_write_src_nvlist(hdl, &zc, nvp))
- return (-1);
-
- error = ioctl(hdl->libzfs_fd, ZFS_IOC_ISCSI_PERM_CHECK, &zc);
- nvlist_free(nvp);
- return (error);
-}
-#endif
-
int
zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path,
char *resource, void *export, void *sharetab,
@@ -4042,9 +3820,11 @@ zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props)
nvpair_t *next = nvlist_next_nvpair(zhp->zfs_props, curr);
/*
- * We leave user:props in the nvlist, so there will be
- * some ZPROP_INVAL. To be extra safe, don't prune
- * those.
+ * User properties will result in ZPROP_INVAL, and since we
+ * only know how to prune standard ZFS properties, we always
+ * leave these in the list. This can also happen if we
+ * encounter an unknown DSL property (when running older
+ * software, for example).
*/
if (zfs_prop != ZPROP_INVAL && props[zfs_prop] == B_FALSE)
(void) nvlist_remove(zhp->zfs_props,
@@ -4173,6 +3953,331 @@ zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
return (error);
}
+int
+zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
+ boolean_t recursive, boolean_t temphold, boolean_t enoent_ok,
+ int cleanup_fd, uint64_t dsobj, uint64_t createtxg)
+{
+ zfs_cmd_t zc = { 0 };
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+ ASSERT(!recursive || dsobj == 0);
+
+ (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+ (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
+ if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string))
+ >= sizeof (zc.zc_string))
+ return (zfs_error(hdl, EZFS_TAGTOOLONG, tag));
+ zc.zc_cookie = recursive;
+ zc.zc_temphold = temphold;
+ zc.zc_cleanup_fd = cleanup_fd;
+ zc.zc_sendobj = dsobj;
+ zc.zc_createtxg = createtxg;
+
+ if (zfs_ioctl(hdl, ZFS_IOC_HOLD, &zc) != 0) {
+ char errbuf[ZFS_MAXNAMELEN+32];
+
+ /*
+ * if it was recursive, the one that actually failed will be in
+ * zc.zc_name.
+ */
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+ "cannot hold '%s@%s'"), zc.zc_name, snapname);
+ switch (errno) {
+ case E2BIG:
+ /*
+ * Temporary tags wind up having the ds object id
+ * prepended. So even if we passed the length check
+ * above, it's still possible for the tag to wind
+ * up being slightly too long.
+ */
+ return (zfs_error(hdl, EZFS_TAGTOOLONG, errbuf));
+ case ENOTSUP:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "pool must be upgraded"));
+ return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
+ case EINVAL:
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+ case EEXIST:
+ return (zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf));
+ case ENOENT:
+ if (enoent_ok)
+ return (ENOENT);
+ /* FALLTHROUGH */
+ default:
+ return (zfs_standard_error_fmt(hdl, errno, errbuf));
+ }
+ }
+
+ return (0);
+}
+
+int
+zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
+ boolean_t recursive)
+{
+ zfs_cmd_t zc = { 0 };
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+ (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+ (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
+ if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string))
+ >= sizeof (zc.zc_string))
+ return (zfs_error(hdl, EZFS_TAGTOOLONG, tag));
+ zc.zc_cookie = recursive;
+
+ if (zfs_ioctl(hdl, ZFS_IOC_RELEASE, &zc) != 0) {
+ char errbuf[ZFS_MAXNAMELEN+32];
+
+ /*
+ * if it was recursive, the one that actually failed will be in
+ * zc.zc_name.
+ */
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+ "cannot release '%s' from '%s@%s'"), tag, zc.zc_name,
+ snapname);
+ switch (errno) {
+ case ESRCH:
+ return (zfs_error(hdl, EZFS_REFTAG_RELE, errbuf));
+ case ENOTSUP:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "pool must be upgraded"));
+ return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
+ case EINVAL:
+ return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+ default:
+ return (zfs_standard_error_fmt(hdl, errno, errbuf));
+ }
+ }
+
+ return (0);
+}
+
+int
+zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl)
+{
+ zfs_cmd_t zc = { 0 };
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
+ int nvsz = 2048;
+ void *nvbuf;
+ int err = 0;
+ char errbuf[ZFS_MAXNAMELEN+32];
+
+ assert(zhp->zfs_type == ZFS_TYPE_VOLUME ||
+ zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
+
+tryagain:
+
+ nvbuf = malloc(nvsz);
+ if (nvbuf == NULL) {
+ err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno)));
+ goto out;
+ }
+
+ zc.zc_nvlist_dst_size = nvsz;
+ zc.zc_nvlist_dst = (uintptr_t)nvbuf;
+
+ (void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN);
+
+ if (zfs_ioctl(hdl, ZFS_IOC_GET_FSACL, &zc) != 0) {
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "cannot get permissions on '%s'"),
+ zc.zc_name);
+ switch (errno) {
+ case ENOMEM:
+ free(nvbuf);
+ nvsz = zc.zc_nvlist_dst_size;
+ goto tryagain;
+
+ case ENOTSUP:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "pool must be upgraded"));
+ err = zfs_error(hdl, EZFS_BADVERSION, errbuf);
+ break;
+ case EINVAL:
+ err = zfs_error(hdl, EZFS_BADTYPE, errbuf);
+ break;
+ case ENOENT:
+ err = zfs_error(hdl, EZFS_NOENT, errbuf);
+ break;
+ default:
+ err = zfs_standard_error_fmt(hdl, errno, errbuf);
+ break;
+ }
+ } else {
+ /* success */
+ int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0);
+ if (rc) {
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(
+ TEXT_DOMAIN, "cannot get permissions on '%s'"),
+ zc.zc_name);
+ err = zfs_standard_error_fmt(hdl, rc, errbuf);
+ }
+ }
+
+ free(nvbuf);
+out:
+ return (err);
+}
+
+int
+zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl)
+{
+ zfs_cmd_t zc = { 0 };
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
+ char *nvbuf;
+ char errbuf[ZFS_MAXNAMELEN+32];
+ size_t nvsz;
+ int err;
+
+ assert(zhp->zfs_type == ZFS_TYPE_VOLUME ||
+ zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
+
+ err = nvlist_size(nvl, &nvsz, NV_ENCODE_NATIVE);
+ assert(err == 0);
+
+ nvbuf = malloc(nvsz);
+
+ err = nvlist_pack(nvl, &nvbuf, &nvsz, NV_ENCODE_NATIVE, 0);
+ assert(err == 0);
+
+ zc.zc_nvlist_src_size = nvsz;
+ zc.zc_nvlist_src = (uintptr_t)nvbuf;
+ zc.zc_perm_action = un;
+
+ (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+ if (zfs_ioctl(hdl, ZFS_IOC_SET_FSACL, &zc) != 0) {
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "cannot set permissions on '%s'"),
+ zc.zc_name);
+ switch (errno) {
+ case ENOTSUP:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "pool must be upgraded"));
+ err = zfs_error(hdl, EZFS_BADVERSION, errbuf);
+ break;
+ case EINVAL:
+ err = zfs_error(hdl, EZFS_BADTYPE, errbuf);
+ break;
+ case ENOENT:
+ err = zfs_error(hdl, EZFS_NOENT, errbuf);
+ break;
+ default:
+ err = zfs_standard_error_fmt(hdl, errno, errbuf);
+ break;
+ }
+ }
+
+ free(nvbuf);
+
+ return (err);
+}
+
+int
+zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl)
+{
+ zfs_cmd_t zc = { 0 };
+ libzfs_handle_t *hdl = zhp->zfs_hdl;
+ int nvsz = 2048;
+ void *nvbuf;
+ int err = 0;
+ char errbuf[ZFS_MAXNAMELEN+32];
+
+ assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
+
+tryagain:
+
+ nvbuf = malloc(nvsz);
+ if (nvbuf == NULL) {
+ err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno)));
+ goto out;
+ }
+
+ zc.zc_nvlist_dst_size = nvsz;
+ zc.zc_nvlist_dst = (uintptr_t)nvbuf;
+
+ (void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN);
+
+ if (zfs_ioctl(hdl, ZFS_IOC_GET_HOLDS, &zc) != 0) {
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"),
+ zc.zc_name);
+ switch (errno) {
+ case ENOMEM:
+ free(nvbuf);
+ nvsz = zc.zc_nvlist_dst_size;
+ goto tryagain;
+
+ case ENOTSUP:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "pool must be upgraded"));
+ err = zfs_error(hdl, EZFS_BADVERSION, errbuf);
+ break;
+ case EINVAL:
+ err = zfs_error(hdl, EZFS_BADTYPE, errbuf);
+ break;
+ case ENOENT:
+ err = zfs_error(hdl, EZFS_NOENT, errbuf);
+ break;
+ default:
+ err = zfs_standard_error_fmt(hdl, errno, errbuf);
+ break;
+ }
+ } else {
+ /* success */
+ int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0);
+ if (rc) {
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"),
+ zc.zc_name);
+ err = zfs_standard_error_fmt(hdl, rc, errbuf);
+ }
+ }
+
+ free(nvbuf);
+out:
+ return (err);
+}
+
+uint64_t
+zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props)
+{
+ uint64_t numdb;
+ uint64_t nblocks, volblocksize;
+ int ncopies;
+ char *strval;
+
+ if (nvlist_lookup_string(props,
+ zfs_prop_to_name(ZFS_PROP_COPIES), &strval) == 0)
+ ncopies = atoi(strval);
+ else
+ ncopies = 1;
+ if (nvlist_lookup_uint64(props,
+ zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
+ &volblocksize) != 0)
+ volblocksize = ZVOL_DEFAULT_BLOCKSIZE;
+ nblocks = volsize/volblocksize;
+ /* start with metadnode L0-L6 */
+ numdb = 7;
+ /* calculate number of indirects */
+ while (nblocks > 1) {
+ nblocks += DNODES_PER_LEVEL - 1;
+ nblocks /= DNODES_PER_LEVEL;
+ numdb += nblocks;
+ }
+ numdb *= MIN(SPA_DVAS_PER_BP, ncopies + 1);
+ volsize *= ncopies;
+ /*
+ * this is exactly DN_MAX_INDBLKSHIFT when metadata isn't
+ * compressed, but in practice they compress down to about
+ * 1100 bytes
+ */
+ numdb *= 1ULL << DN_MAX_INDBLKSHIFT;
+ volsize += numdb;
+ return (volsize);
+}
+
/*
* Attach/detach the given filesystem to/from the given jail.
*/
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_diff.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_diff.c
new file mode 100644
index 0000000..ae84285
--- /dev/null
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_diff.c
@@ -0,0 +1,832 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * zfs diff support
+ */
+#include <ctype.h>
+#include <errno.h>
+#include <libintl.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <sys/zfs_ioctl.h>
+#include <libzfs.h>
+#include "libzfs_impl.h"
+
+#define ZDIFF_SNAPDIR "/.zfs/snapshot/"
+#define ZDIFF_SHARESDIR "/.zfs/shares/"
+#define ZDIFF_PREFIX "zfs-diff-%d"
+
+#define ZDIFF_ADDED '+'
+#define ZDIFF_MODIFIED 'M'
+#define ZDIFF_REMOVED '-'
+#define ZDIFF_RENAMED 'R'
+
+static boolean_t
+do_name_cmp(const char *fpath, const char *tpath)
+{
+ char *fname, *tname;
+ fname = strrchr(fpath, '/') + 1;
+ tname = strrchr(tpath, '/') + 1;
+ return (strcmp(fname, tname) == 0);
+}
+
+typedef struct differ_info {
+ zfs_handle_t *zhp;
+ char *fromsnap;
+ char *frommnt;
+ char *tosnap;
+ char *tomnt;
+ char *ds;
+ char *dsmnt;
+ char *tmpsnap;
+ char errbuf[1024];
+ boolean_t isclone;
+ boolean_t scripted;
+ boolean_t classify;
+ boolean_t timestamped;
+ uint64_t shares;
+ int zerr;
+ int cleanupfd;
+ int outputfd;
+ int datafd;
+} differ_info_t;
+
+/*
+ * Given a {dsname, object id}, get the object path
+ */
+static int
+get_stats_for_obj(differ_info_t *di, const char *dsname, uint64_t obj,
+ char *pn, int maxlen, zfs_stat_t *sb)
+{
+ zfs_cmd_t zc = { 0 };
+ int error;
+
+ (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
+ zc.zc_obj = obj;
+
+ errno = 0;
+ error = ioctl(di->zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_STATS, &zc);
+ di->zerr = errno;
+
+ /* we can get stats even if we failed to get a path */
+ (void) memcpy(sb, &zc.zc_stat, sizeof (zfs_stat_t));
+ if (error == 0) {
+ ASSERT(di->zerr == 0);
+ (void) strlcpy(pn, zc.zc_value, maxlen);
+ return (0);
+ }
+
+ if (di->zerr == EPERM) {
+ (void) snprintf(di->errbuf, sizeof (di->errbuf),
+ dgettext(TEXT_DOMAIN,
+ "The sys_config privilege or diff delegated permission "
+ "is needed\nto discover path names"));
+ return (-1);
+ } else {
+ (void) snprintf(di->errbuf, sizeof (di->errbuf),
+ dgettext(TEXT_DOMAIN,
+ "Unable to determine path or stats for "
+ "object %lld in %s"), obj, dsname);
+ return (-1);
+ }
+}
+
+/*
+ * stream_bytes
+ *
+ * Prints a file name out a character at a time. If the character is
+ * not in the range of what we consider "printable" ASCII, display it
+ * as an escaped 3-digit octal value. ASCII values less than a space
+ * are all control characters and we declare the upper end as the
+ * DELete character. This also is the last 7-bit ASCII character.
+ * We choose to treat all 8-bit ASCII as not printable for this
+ * application.
+ */
+static void
+stream_bytes(FILE *fp, const char *string)
+{
+ while (*string) {
+ if (*string > ' ' && *string != '\\' && *string < '\177')
+ (void) fprintf(fp, "%c", *string++);
+ else
+ (void) fprintf(fp, "\\%03o", *string++);
+ }
+}
+
+static void
+print_what(FILE *fp, mode_t what)
+{
+ char symbol;
+
+ switch (what & S_IFMT) {
+ case S_IFBLK:
+ symbol = 'B';
+ break;
+ case S_IFCHR:
+ symbol = 'C';
+ break;
+ case S_IFDIR:
+ symbol = '/';
+ break;
+#ifdef S_IFDOOR
+ case S_IFDOOR:
+ symbol = '>';
+ break;
+#endif
+ case S_IFIFO:
+ symbol = '|';
+ break;
+ case S_IFLNK:
+ symbol = '@';
+ break;
+#ifdef S_IFPORT
+ case S_IFPORT:
+ symbol = 'P';
+ break;
+#endif
+ case S_IFSOCK:
+ symbol = '=';
+ break;
+ case S_IFREG:
+ symbol = 'F';
+ break;
+ default:
+ symbol = '?';
+ break;
+ }
+ (void) fprintf(fp, "%c", symbol);
+}
+
+static void
+print_cmn(FILE *fp, differ_info_t *di, const char *file)
+{
+ stream_bytes(fp, di->dsmnt);
+ stream_bytes(fp, file);
+}
+
+static void
+print_rename(FILE *fp, differ_info_t *di, const char *old, const char *new,
+ zfs_stat_t *isb)
+{
+ if (di->timestamped)
+ (void) fprintf(fp, "%10lld.%09lld\t",
+ (longlong_t)isb->zs_ctime[0],
+ (longlong_t)isb->zs_ctime[1]);
+ (void) fprintf(fp, "%c\t", ZDIFF_RENAMED);
+ if (di->classify) {
+ print_what(fp, isb->zs_mode);
+ (void) fprintf(fp, "\t");
+ }
+ print_cmn(fp, di, old);
+ if (di->scripted)
+ (void) fprintf(fp, "\t");
+ else
+ (void) fprintf(fp, " -> ");
+ print_cmn(fp, di, new);
+ (void) fprintf(fp, "\n");
+}
+
+static void
+print_link_change(FILE *fp, differ_info_t *di, int delta, const char *file,
+ zfs_stat_t *isb)
+{
+ if (di->timestamped)
+ (void) fprintf(fp, "%10lld.%09lld\t",
+ (longlong_t)isb->zs_ctime[0],
+ (longlong_t)isb->zs_ctime[1]);
+ (void) fprintf(fp, "%c\t", ZDIFF_MODIFIED);
+ if (di->classify) {
+ print_what(fp, isb->zs_mode);
+ (void) fprintf(fp, "\t");
+ }
+ print_cmn(fp, di, file);
+ (void) fprintf(fp, "\t(%+d)", delta);
+ (void) fprintf(fp, "\n");
+}
+
+static void
+print_file(FILE *fp, differ_info_t *di, char type, const char *file,
+ zfs_stat_t *isb)
+{
+ if (di->timestamped)
+ (void) fprintf(fp, "%10lld.%09lld\t",
+ (longlong_t)isb->zs_ctime[0],
+ (longlong_t)isb->zs_ctime[1]);
+ (void) fprintf(fp, "%c\t", type);
+ if (di->classify) {
+ print_what(fp, isb->zs_mode);
+ (void) fprintf(fp, "\t");
+ }
+ print_cmn(fp, di, file);
+ (void) fprintf(fp, "\n");
+}
+
+static int
+write_inuse_diffs_one(FILE *fp, differ_info_t *di, uint64_t dobj)
+{
+ struct zfs_stat fsb, tsb;
+ boolean_t same_name;
+ mode_t fmode, tmode;
+ char fobjname[MAXPATHLEN], tobjname[MAXPATHLEN];
+ int fobjerr, tobjerr;
+ int change;
+
+ if (dobj == di->shares)
+ return (0);
+
+ /*
+ * Check the from and to snapshots for info on the object. If
+ * we get ENOENT, then the object just didn't exist in that
+ * snapshot. If we get ENOTSUP, then we tried to get
+ * info on a non-ZPL object, which we don't care about anyway.
+ */
+ fobjerr = get_stats_for_obj(di, di->fromsnap, dobj, fobjname,
+ MAXPATHLEN, &fsb);
+ if (fobjerr && di->zerr != ENOENT && di->zerr != ENOTSUP)
+ return (-1);
+
+ tobjerr = get_stats_for_obj(di, di->tosnap, dobj, tobjname,
+ MAXPATHLEN, &tsb);
+ if (tobjerr && di->zerr != ENOENT && di->zerr != ENOTSUP)
+ return (-1);
+
+ /*
+ * Unallocated object sharing the same meta dnode block
+ */
+ if (fobjerr && tobjerr) {
+ ASSERT(di->zerr == ENOENT || di->zerr == ENOTSUP);
+ di->zerr = 0;
+ return (0);
+ }
+
+ di->zerr = 0; /* negate get_stats_for_obj() from side that failed */
+ fmode = fsb.zs_mode & S_IFMT;
+ tmode = tsb.zs_mode & S_IFMT;
+ if (fmode == S_IFDIR || tmode == S_IFDIR || fsb.zs_links == 0 ||
+ tsb.zs_links == 0)
+ change = 0;
+ else
+ change = tsb.zs_links - fsb.zs_links;
+
+ if (fobjerr) {
+ if (change) {
+ print_link_change(fp, di, change, tobjname, &tsb);
+ return (0);
+ }
+ print_file(fp, di, ZDIFF_ADDED, tobjname, &tsb);
+ return (0);
+ } else if (tobjerr) {
+ if (change) {
+ print_link_change(fp, di, change, fobjname, &fsb);
+ return (0);
+ }
+ print_file(fp, di, ZDIFF_REMOVED, fobjname, &fsb);
+ return (0);
+ }
+
+ if (fmode != tmode && fsb.zs_gen == tsb.zs_gen)
+ tsb.zs_gen++; /* Force a generational difference */
+ same_name = do_name_cmp(fobjname, tobjname);
+
+ /* Simple modification or no change */
+ if (fsb.zs_gen == tsb.zs_gen) {
+ /* No apparent changes. Could we assert !this? */
+ if (fsb.zs_ctime[0] == tsb.zs_ctime[0] &&
+ fsb.zs_ctime[1] == tsb.zs_ctime[1])
+ return (0);
+ if (change) {
+ print_link_change(fp, di, change,
+ change > 0 ? fobjname : tobjname, &tsb);
+ } else if (same_name) {
+ print_file(fp, di, ZDIFF_MODIFIED, fobjname, &tsb);
+ } else {
+ print_rename(fp, di, fobjname, tobjname, &tsb);
+ }
+ return (0);
+ } else {
+ /* file re-created or object re-used */
+ print_file(fp, di, ZDIFF_REMOVED, fobjname, &fsb);
+ print_file(fp, di, ZDIFF_ADDED, tobjname, &tsb);
+ return (0);
+ }
+}
+
+static int
+write_inuse_diffs(FILE *fp, differ_info_t *di, dmu_diff_record_t *dr)
+{
+ uint64_t o;
+ int err;
+
+ for (o = dr->ddr_first; o <= dr->ddr_last; o++) {
+ if (err = write_inuse_diffs_one(fp, di, o))
+ return (err);
+ }
+ return (0);
+}
+
+static int
+describe_free(FILE *fp, differ_info_t *di, uint64_t object, char *namebuf,
+ int maxlen)
+{
+ struct zfs_stat sb;
+
+ if (get_stats_for_obj(di, di->fromsnap, object, namebuf,
+ maxlen, &sb) != 0) {
+ /* Let it slide, if in the delete queue on from side */
+ if (di->zerr == ENOENT && sb.zs_links == 0) {
+ di->zerr = 0;
+ return (0);
+ }
+ return (-1);
+ }
+
+ print_file(fp, di, ZDIFF_REMOVED, namebuf, &sb);
+ return (0);
+}
+
+static int
+write_free_diffs(FILE *fp, differ_info_t *di, dmu_diff_record_t *dr)
+{
+ zfs_cmd_t zc = { 0 };
+ libzfs_handle_t *lhdl = di->zhp->zfs_hdl;
+ char fobjname[MAXPATHLEN];
+
+ (void) strlcpy(zc.zc_name, di->fromsnap, sizeof (zc.zc_name));
+ zc.zc_obj = dr->ddr_first - 1;
+
+ ASSERT(di->zerr == 0);
+
+ while (zc.zc_obj < dr->ddr_last) {
+ int err;
+
+ err = ioctl(lhdl->libzfs_fd, ZFS_IOC_NEXT_OBJ, &zc);
+ if (err == 0) {
+ if (zc.zc_obj == di->shares) {
+ zc.zc_obj++;
+ continue;
+ }
+ if (zc.zc_obj > dr->ddr_last) {
+ break;
+ }
+ err = describe_free(fp, di, zc.zc_obj, fobjname,
+ MAXPATHLEN);
+ if (err)
+ break;
+ } else if (errno == ESRCH) {
+ break;
+ } else {
+ (void) snprintf(di->errbuf, sizeof (di->errbuf),
+ dgettext(TEXT_DOMAIN,
+ "next allocated object (> %lld) find failure"),
+ zc.zc_obj);
+ di->zerr = errno;
+ break;
+ }
+ }
+ if (di->zerr)
+ return (-1);
+ return (0);
+}
+
+static void *
+differ(void *arg)
+{
+ differ_info_t *di = arg;
+ dmu_diff_record_t dr;
+ FILE *ofp;
+ int err = 0;
+
+ if ((ofp = fdopen(di->outputfd, "w")) == NULL) {
+ di->zerr = errno;
+ (void) strerror_r(errno, di->errbuf, sizeof (di->errbuf));
+ (void) close(di->datafd);
+ return ((void *)-1);
+ }
+
+ for (;;) {
+ char *cp = (char *)&dr;
+ int len = sizeof (dr);
+ int rv;
+
+ do {
+ rv = read(di->datafd, cp, len);
+ cp += rv;
+ len -= rv;
+ } while (len > 0 && rv > 0);
+
+ if (rv < 0 || (rv == 0 && len != sizeof (dr))) {
+ di->zerr = EPIPE;
+ break;
+ } else if (rv == 0) {
+ /* end of file at a natural breaking point */
+ break;
+ }
+
+ switch (dr.ddr_type) {
+ case DDR_FREE:
+ err = write_free_diffs(ofp, di, &dr);
+ break;
+ case DDR_INUSE:
+ err = write_inuse_diffs(ofp, di, &dr);
+ break;
+ default:
+ di->zerr = EPIPE;
+ break;
+ }
+
+ if (err || di->zerr)
+ break;
+ }
+
+ (void) fclose(ofp);
+ (void) close(di->datafd);
+ if (err)
+ return ((void *)-1);
+ if (di->zerr) {
+ ASSERT(di->zerr == EINVAL);
+ (void) snprintf(di->errbuf, sizeof (di->errbuf),
+ dgettext(TEXT_DOMAIN,
+ "Internal error: bad data from diff IOCTL"));
+ return ((void *)-1);
+ }
+ return ((void *)0);
+}
+
+static int
+find_shares_object(differ_info_t *di)
+{
+ char fullpath[MAXPATHLEN];
+ struct stat64 sb = { 0 };
+
+ (void) strlcpy(fullpath, di->dsmnt, MAXPATHLEN);
+ (void) strlcat(fullpath, ZDIFF_SHARESDIR, MAXPATHLEN);
+
+ if (stat64(fullpath, &sb) != 0) {
+#ifdef sun
+ (void) snprintf(di->errbuf, sizeof (di->errbuf),
+ dgettext(TEXT_DOMAIN, "Cannot stat %s"), fullpath);
+ return (zfs_error(di->zhp->zfs_hdl, EZFS_DIFF, di->errbuf));
+#else
+ return (0);
+#endif
+ }
+
+ di->shares = (uint64_t)sb.st_ino;
+ return (0);
+}
+
+static int
+make_temp_snapshot(differ_info_t *di)
+{
+ libzfs_handle_t *hdl = di->zhp->zfs_hdl;
+ zfs_cmd_t zc = { 0 };
+
+ (void) snprintf(zc.zc_value, sizeof (zc.zc_value),
+ ZDIFF_PREFIX, getpid());
+ (void) strlcpy(zc.zc_name, di->ds, sizeof (zc.zc_name));
+ zc.zc_cleanup_fd = di->cleanupfd;
+
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_TMP_SNAPSHOT, &zc) != 0) {
+ int err = errno;
+ if (err == EPERM) {
+ (void) snprintf(di->errbuf, sizeof (di->errbuf),
+ dgettext(TEXT_DOMAIN, "The diff delegated "
+ "permission is needed in order\nto create a "
+ "just-in-time snapshot for diffing\n"));
+ return (zfs_error(hdl, EZFS_DIFF, di->errbuf));
+ } else {
+ (void) snprintf(di->errbuf, sizeof (di->errbuf),
+ dgettext(TEXT_DOMAIN, "Cannot create just-in-time "
+ "snapshot of '%s'"), zc.zc_name);
+ return (zfs_standard_error(hdl, err, di->errbuf));
+ }
+ }
+
+ di->tmpsnap = zfs_strdup(hdl, zc.zc_value);
+ di->tosnap = zfs_asprintf(hdl, "%s@%s", di->ds, di->tmpsnap);
+ return (0);
+}
+
+static void
+teardown_differ_info(differ_info_t *di)
+{
+ free(di->ds);
+ free(di->dsmnt);
+ free(di->fromsnap);
+ free(di->frommnt);
+ free(di->tosnap);
+ free(di->tmpsnap);
+ free(di->tomnt);
+ (void) close(di->cleanupfd);
+}
+
+static int
+get_snapshot_names(differ_info_t *di, const char *fromsnap,
+ const char *tosnap)
+{
+ libzfs_handle_t *hdl = di->zhp->zfs_hdl;
+ char *atptrf = NULL;
+ char *atptrt = NULL;
+ int fdslen, fsnlen;
+ int tdslen, tsnlen;
+
+ /*
+ * Can accept
+ * dataset@snap1
+ * dataset@snap1 dataset@snap2
+ * dataset@snap1 @snap2
+ * dataset@snap1 dataset
+ * @snap1 dataset@snap2
+ */
+ if (tosnap == NULL) {
+ /* only a from snapshot given, must be valid */
+ (void) snprintf(di->errbuf, sizeof (di->errbuf),
+ dgettext(TEXT_DOMAIN,
+ "Badly formed snapshot name %s"), fromsnap);
+
+ if (!zfs_validate_name(hdl, fromsnap, ZFS_TYPE_SNAPSHOT,
+ B_FALSE)) {
+ return (zfs_error(hdl, EZFS_INVALIDNAME,
+ di->errbuf));
+ }
+
+ atptrf = strchr(fromsnap, '@');
+ ASSERT(atptrf != NULL);
+ fdslen = atptrf - fromsnap;
+
+ di->fromsnap = zfs_strdup(hdl, fromsnap);
+ di->ds = zfs_strdup(hdl, fromsnap);
+ di->ds[fdslen] = '\0';
+
+ /* the to snap will be a just-in-time snap of the head */
+ return (make_temp_snapshot(di));
+ }
+
+ (void) snprintf(di->errbuf, sizeof (di->errbuf),
+ dgettext(TEXT_DOMAIN,
+ "Unable to determine which snapshots to compare"));
+
+ atptrf = strchr(fromsnap, '@');
+ atptrt = strchr(tosnap, '@');
+ fdslen = atptrf ? atptrf - fromsnap : strlen(fromsnap);
+ tdslen = atptrt ? atptrt - tosnap : strlen(tosnap);
+ fsnlen = strlen(fromsnap) - fdslen; /* includes @ sign */
+ tsnlen = strlen(tosnap) - tdslen; /* includes @ sign */
+
+ if (fsnlen <= 1 || tsnlen == 1 || (fdslen == 0 && tdslen == 0) ||
+ (fsnlen == 0 && tsnlen == 0)) {
+ return (zfs_error(hdl, EZFS_INVALIDNAME, di->errbuf));
+ } else if ((fdslen > 0 && tdslen > 0) &&
+ ((tdslen != fdslen || strncmp(fromsnap, tosnap, fdslen) != 0))) {
+ /*
+ * not the same dataset name, might be okay if
+ * tosnap is a clone of a fromsnap descendant.
+ */
+ char origin[ZFS_MAXNAMELEN];
+ zprop_source_t src;
+ zfs_handle_t *zhp;
+
+ di->ds = zfs_alloc(di->zhp->zfs_hdl, tdslen + 1);
+ (void) strncpy(di->ds, tosnap, tdslen);
+ di->ds[tdslen] = '\0';
+
+ zhp = zfs_open(hdl, di->ds, ZFS_TYPE_FILESYSTEM);
+ while (zhp != NULL) {
+ (void) zfs_prop_get(zhp, ZFS_PROP_ORIGIN,
+ origin, sizeof (origin), &src, NULL, 0, B_FALSE);
+
+ if (strncmp(origin, fromsnap, fsnlen) == 0)
+ break;
+
+ (void) zfs_close(zhp);
+ zhp = zfs_open(hdl, origin, ZFS_TYPE_FILESYSTEM);
+ }
+
+ if (zhp == NULL) {
+ (void) snprintf(di->errbuf, sizeof (di->errbuf),
+ dgettext(TEXT_DOMAIN,
+ "Not an earlier snapshot from the same fs"));
+ return (zfs_error(hdl, EZFS_INVALIDNAME, di->errbuf));
+ } else {
+ (void) zfs_close(zhp);
+ }
+
+ di->isclone = B_TRUE;
+ di->fromsnap = zfs_strdup(hdl, fromsnap);
+ if (tsnlen) {
+ di->tosnap = zfs_strdup(hdl, tosnap);
+ } else {
+ return (make_temp_snapshot(di));
+ }
+ } else {
+ int dslen = fdslen ? fdslen : tdslen;
+
+ di->ds = zfs_alloc(hdl, dslen + 1);
+ (void) strncpy(di->ds, fdslen ? fromsnap : tosnap, dslen);
+ di->ds[dslen] = '\0';
+
+ di->fromsnap = zfs_asprintf(hdl, "%s%s", di->ds, atptrf);
+ if (tsnlen) {
+ di->tosnap = zfs_asprintf(hdl, "%s%s", di->ds, atptrt);
+ } else {
+ return (make_temp_snapshot(di));
+ }
+ }
+ return (0);
+}
+
+static int
+get_mountpoint(differ_info_t *di, char *dsnm, char **mntpt)
+{
+ boolean_t mounted;
+
+ mounted = is_mounted(di->zhp->zfs_hdl, dsnm, mntpt);
+ if (mounted == B_FALSE) {
+ (void) snprintf(di->errbuf, sizeof (di->errbuf),
+ dgettext(TEXT_DOMAIN,
+ "Cannot diff an unmounted snapshot"));
+ return (zfs_error(di->zhp->zfs_hdl, EZFS_BADTYPE, di->errbuf));
+ }
+
+ /* Avoid a double slash at the beginning of root-mounted datasets */
+ if (**mntpt == '/' && *(*mntpt + 1) == '\0')
+ **mntpt = '\0';
+ return (0);
+}
+
+static int
+get_mountpoints(differ_info_t *di)
+{
+ char *strptr;
+ char *frommntpt;
+
+ /*
+ * first get the mountpoint for the parent dataset
+ */
+ if (get_mountpoint(di, di->ds, &di->dsmnt) != 0)
+ return (-1);
+
+ strptr = strchr(di->tosnap, '@');
+ ASSERT3P(strptr, !=, NULL);
+ di->tomnt = zfs_asprintf(di->zhp->zfs_hdl, "%s%s%s", di->dsmnt,
+ ZDIFF_SNAPDIR, ++strptr);
+
+ strptr = strchr(di->fromsnap, '@');
+ ASSERT3P(strptr, !=, NULL);
+
+ frommntpt = di->dsmnt;
+ if (di->isclone) {
+ char *mntpt;
+ int err;
+
+ *strptr = '\0';
+ err = get_mountpoint(di, di->fromsnap, &mntpt);
+ *strptr = '@';
+ if (err != 0)
+ return (-1);
+ frommntpt = mntpt;
+ }
+
+ di->frommnt = zfs_asprintf(di->zhp->zfs_hdl, "%s%s%s", frommntpt,
+ ZDIFF_SNAPDIR, ++strptr);
+
+ if (di->isclone)
+ free(frommntpt);
+
+ return (0);
+}
+
+static int
+setup_differ_info(zfs_handle_t *zhp, const char *fromsnap,
+ const char *tosnap, differ_info_t *di)
+{
+ di->zhp = zhp;
+
+ di->cleanupfd = open(ZFS_DEV, O_RDWR|O_EXCL);
+ VERIFY(di->cleanupfd >= 0);
+
+ if (get_snapshot_names(di, fromsnap, tosnap) != 0)
+ return (-1);
+
+ if (get_mountpoints(di) != 0)
+ return (-1);
+
+ if (find_shares_object(di) != 0)
+ return (-1);
+
+ return (0);
+}
+
+int
+zfs_show_diffs(zfs_handle_t *zhp, int outfd, const char *fromsnap,
+ const char *tosnap, int flags)
+{
+ zfs_cmd_t zc = { 0 };
+ char errbuf[1024];
+ differ_info_t di = { 0 };
+ pthread_t tid;
+ int pipefd[2];
+ int iocerr;
+
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "zfs diff failed"));
+
+ if (setup_differ_info(zhp, fromsnap, tosnap, &di)) {
+ teardown_differ_info(&di);
+ return (-1);
+ }
+
+ if (pipe(pipefd)) {
+ zfs_error_aux(zhp->zfs_hdl, strerror(errno));
+ teardown_differ_info(&di);
+ return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED, errbuf));
+ }
+
+ di.scripted = (flags & ZFS_DIFF_PARSEABLE);
+ di.classify = (flags & ZFS_DIFF_CLASSIFY);
+ di.timestamped = (flags & ZFS_DIFF_TIMESTAMP);
+
+ di.outputfd = outfd;
+ di.datafd = pipefd[0];
+
+ if (pthread_create(&tid, NULL, differ, &di)) {
+ zfs_error_aux(zhp->zfs_hdl, strerror(errno));
+ (void) close(pipefd[0]);
+ (void) close(pipefd[1]);
+ teardown_differ_info(&di);
+ return (zfs_error(zhp->zfs_hdl,
+ EZFS_THREADCREATEFAILED, errbuf));
+ }
+
+ /* do the ioctl() */
+ (void) strlcpy(zc.zc_value, di.fromsnap, strlen(di.fromsnap) + 1);
+ (void) strlcpy(zc.zc_name, di.tosnap, strlen(di.tosnap) + 1);
+ zc.zc_cookie = pipefd[1];
+
+ iocerr = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DIFF, &zc);
+ if (iocerr != 0) {
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "Unable to obtain diffs"));
+ if (errno == EPERM) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "\n The sys_mount privilege or diff delegated "
+ "permission is needed\n to execute the "
+ "diff ioctl"));
+ } else if (errno == EXDEV) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "\n Not an earlier snapshot from the same fs"));
+ } else if (errno != EPIPE || di.zerr == 0) {
+ zfs_error_aux(zhp->zfs_hdl, strerror(errno));
+ }
+ (void) close(pipefd[1]);
+ (void) pthread_cancel(tid);
+ (void) pthread_join(tid, NULL);
+ teardown_differ_info(&di);
+ if (di.zerr != 0 && di.zerr != EPIPE) {
+ zfs_error_aux(zhp->zfs_hdl, strerror(di.zerr));
+ return (zfs_error(zhp->zfs_hdl, EZFS_DIFF, di.errbuf));
+ } else {
+ return (zfs_error(zhp->zfs_hdl, EZFS_DIFFDATA, errbuf));
+ }
+ }
+
+ (void) close(pipefd[1]);
+ (void) pthread_join(tid, NULL);
+
+ if (di.zerr != 0) {
+ zfs_error_aux(zhp->zfs_hdl, strerror(di.zerr));
+ return (zfs_error(zhp->zfs_hdl, EZFS_DIFF, di.errbuf));
+ }
+ teardown_differ_info(&di);
+ return (0);
+}
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_fru.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_fru.c
new file mode 100644
index 0000000..788fa2c
--- /dev/null
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_fru.c
@@ -0,0 +1,452 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <dlfcn.h>
+#include <errno.h>
+#include <libintl.h>
+#include <link.h>
+#include <pthread.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include <libzfs.h>
+
+#include <fm/libtopo.h>
+#include <sys/fm/protocol.h>
+#include <sys/systeminfo.h>
+
+#include "libzfs_impl.h"
+
+/*
+ * This file is responsible for determining the relationship between I/O
+ * devices paths and physical locations. In the world of MPxIO and external
+ * enclosures, the device path is not synonymous with the physical location.
+ * If you remove a drive and insert it into a different slot, it will end up
+ * with the same path under MPxIO. If you recable storage enclosures, the
+ * device paths may change. All of this makes it difficult to implement the
+ * 'autoreplace' property, which is supposed to automatically manage disk
+ * replacement based on physical slot.
+ *
+ * In order to work around these limitations, we have a per-vdev FRU property
+ * that is the libtopo path (minus disk-specific authority information) to the
+ * physical location of the device on the system. This is an optional
+ * property, and is only needed when using the 'autoreplace' property or when
+ * generating FMA faults against vdevs.
+ */
+
+/*
+ * Because the FMA packages depend on ZFS, we have to dlopen() libtopo in case
+ * it is not present. We only need this once per library instance, so it is
+ * not part of the libzfs handle.
+ */
+static void *_topo_dlhandle;
+static topo_hdl_t *(*_topo_open)(int, const char *, int *);
+static void (*_topo_close)(topo_hdl_t *);
+static char *(*_topo_snap_hold)(topo_hdl_t *, const char *, int *);
+static void (*_topo_snap_release)(topo_hdl_t *);
+static topo_walk_t *(*_topo_walk_init)(topo_hdl_t *, const char *,
+ topo_walk_cb_t, void *, int *);
+static int (*_topo_walk_step)(topo_walk_t *, int);
+static void (*_topo_walk_fini)(topo_walk_t *);
+static void (*_topo_hdl_strfree)(topo_hdl_t *, char *);
+static char *(*_topo_node_name)(tnode_t *);
+static int (*_topo_prop_get_string)(tnode_t *, const char *, const char *,
+ char **, int *);
+static int (*_topo_node_fru)(tnode_t *, nvlist_t **, nvlist_t *, int *);
+static int (*_topo_fmri_nvl2str)(topo_hdl_t *, nvlist_t *, char **, int *);
+static int (*_topo_fmri_strcmp_noauth)(topo_hdl_t *, const char *,
+ const char *);
+
+#define ZFS_FRU_HASH_SIZE 257
+
+static size_t
+fru_strhash(const char *key)
+{
+ ulong_t g, h = 0;
+ const char *p;
+
+ for (p = key; *p != '\0'; p++) {
+ h = (h << 4) + *p;
+
+ if ((g = (h & 0xf0000000)) != 0) {
+ h ^= (g >> 24);
+ h ^= g;
+ }
+ }
+
+ return (h % ZFS_FRU_HASH_SIZE);
+}
+
+static int
+libzfs_fru_gather(topo_hdl_t *thp, tnode_t *tn, void *arg)
+{
+ libzfs_handle_t *hdl = arg;
+ nvlist_t *fru;
+ char *devpath, *frustr;
+ int err;
+ libzfs_fru_t *frup;
+ size_t idx;
+
+ /*
+ * If this is the chassis node, and we don't yet have the system
+ * chassis ID, then fill in this value now.
+ */
+ if (hdl->libzfs_chassis_id[0] == '\0' &&
+ strcmp(_topo_node_name(tn), "chassis") == 0) {
+ if (_topo_prop_get_string(tn, FM_FMRI_AUTHORITY,
+ FM_FMRI_AUTH_CHASSIS, &devpath, &err) == 0)
+ (void) strlcpy(hdl->libzfs_chassis_id, devpath,
+ sizeof (hdl->libzfs_chassis_id));
+ }
+
+ /*
+ * Skip non-disk nodes.
+ */
+ if (strcmp(_topo_node_name(tn), "disk") != 0)
+ return (TOPO_WALK_NEXT);
+
+ /*
+ * Get the devfs path and FRU.
+ */
+ if (_topo_prop_get_string(tn, "io", "devfs-path", &devpath, &err) != 0)
+ return (TOPO_WALK_NEXT);
+
+ if (libzfs_fru_lookup(hdl, devpath) != NULL) {
+ _topo_hdl_strfree(thp, devpath);
+ return (TOPO_WALK_NEXT);
+ }
+
+ if (_topo_node_fru(tn, &fru, NULL, &err) != 0) {
+ _topo_hdl_strfree(thp, devpath);
+ return (TOPO_WALK_NEXT);
+ }
+
+ /*
+ * Convert the FRU into a string.
+ */
+ if (_topo_fmri_nvl2str(thp, fru, &frustr, &err) != 0) {
+ nvlist_free(fru);
+ _topo_hdl_strfree(thp, devpath);
+ return (TOPO_WALK_NEXT);
+ }
+
+ nvlist_free(fru);
+
+ /*
+ * Finally, we have a FRU string and device path. Add it to the hash.
+ */
+ if ((frup = calloc(sizeof (libzfs_fru_t), 1)) == NULL) {
+ _topo_hdl_strfree(thp, devpath);
+ _topo_hdl_strfree(thp, frustr);
+ return (TOPO_WALK_NEXT);
+ }
+
+ if ((frup->zf_device = strdup(devpath)) == NULL ||
+ (frup->zf_fru = strdup(frustr)) == NULL) {
+ free(frup->zf_device);
+ free(frup);
+ _topo_hdl_strfree(thp, devpath);
+ _topo_hdl_strfree(thp, frustr);
+ return (TOPO_WALK_NEXT);
+ }
+
+ _topo_hdl_strfree(thp, devpath);
+ _topo_hdl_strfree(thp, frustr);
+
+ idx = fru_strhash(frup->zf_device);
+ frup->zf_chain = hdl->libzfs_fru_hash[idx];
+ hdl->libzfs_fru_hash[idx] = frup;
+ frup->zf_next = hdl->libzfs_fru_list;
+ hdl->libzfs_fru_list = frup;
+
+ return (TOPO_WALK_NEXT);
+}
+
+/*
+ * Called during initialization to setup the dynamic libtopo connection.
+ */
+#pragma init(libzfs_init_fru)
+static void
+libzfs_init_fru(void)
+{
+ char path[MAXPATHLEN];
+ char isa[257];
+
+#if defined(_LP64)
+ if (sysinfo(SI_ARCHITECTURE_64, isa, sizeof (isa)) < 0)
+ isa[0] = '\0';
+#else
+ isa[0] = '\0';
+#endif
+ (void) snprintf(path, sizeof (path),
+ "/usr/lib/fm/%s/libtopo.so", isa);
+
+ if ((_topo_dlhandle = dlopen(path, RTLD_LAZY)) == NULL)
+ return;
+
+ _topo_open = (topo_hdl_t *(*)())
+ dlsym(_topo_dlhandle, "topo_open");
+ _topo_close = (void (*)())
+ dlsym(_topo_dlhandle, "topo_close");
+ _topo_snap_hold = (char *(*)())
+ dlsym(_topo_dlhandle, "topo_snap_hold");
+ _topo_snap_release = (void (*)())
+ dlsym(_topo_dlhandle, "topo_snap_release");
+ _topo_walk_init = (topo_walk_t *(*)())
+ dlsym(_topo_dlhandle, "topo_walk_init");
+ _topo_walk_step = (int (*)())
+ dlsym(_topo_dlhandle, "topo_walk_step");
+ _topo_walk_fini = (void (*)())
+ dlsym(_topo_dlhandle, "topo_walk_fini");
+ _topo_hdl_strfree = (void (*)())
+ dlsym(_topo_dlhandle, "topo_hdl_strfree");
+ _topo_node_name = (char *(*)())
+ dlsym(_topo_dlhandle, "topo_node_name");
+ _topo_prop_get_string = (int (*)())
+ dlsym(_topo_dlhandle, "topo_prop_get_string");
+ _topo_node_fru = (int (*)())
+ dlsym(_topo_dlhandle, "topo_node_fru");
+ _topo_fmri_nvl2str = (int (*)())
+ dlsym(_topo_dlhandle, "topo_fmri_nvl2str");
+ _topo_fmri_strcmp_noauth = (int (*)())
+ dlsym(_topo_dlhandle, "topo_fmri_strcmp_noauth");
+
+ if (_topo_open == NULL || _topo_close == NULL ||
+ _topo_snap_hold == NULL || _topo_snap_release == NULL ||
+ _topo_walk_init == NULL || _topo_walk_step == NULL ||
+ _topo_walk_fini == NULL || _topo_hdl_strfree == NULL ||
+ _topo_node_name == NULL || _topo_prop_get_string == NULL ||
+ _topo_node_fru == NULL || _topo_fmri_nvl2str == NULL ||
+ _topo_fmri_strcmp_noauth == NULL) {
+ (void) dlclose(_topo_dlhandle);
+ _topo_dlhandle = NULL;
+ }
+}
+
+/*
+ * Refresh the mappings from device path -> FMRI. We do this by walking the
+ * hc topology looking for disk nodes, and recording the io/devfs-path and FRU.
+ * Note that we strip out the disk-specific authority information (serial,
+ * part, revision, etc) so that we are left with only the identifying
+ * characteristics of the slot (hc path and chassis-id).
+ */
+void
+libzfs_fru_refresh(libzfs_handle_t *hdl)
+{
+ int err;
+ char *uuid;
+ topo_hdl_t *thp;
+ topo_walk_t *twp;
+
+ if (_topo_dlhandle == NULL)
+ return;
+
+ /*
+ * Clear the FRU hash and initialize our basic structures.
+ */
+ libzfs_fru_clear(hdl, B_FALSE);
+
+ if ((hdl->libzfs_topo_hdl = _topo_open(TOPO_VERSION,
+ NULL, &err)) == NULL)
+ return;
+
+ thp = hdl->libzfs_topo_hdl;
+
+ if ((uuid = _topo_snap_hold(thp, NULL, &err)) == NULL)
+ return;
+
+ _topo_hdl_strfree(thp, uuid);
+
+ if (hdl->libzfs_fru_hash == NULL &&
+ (hdl->libzfs_fru_hash =
+ calloc(ZFS_FRU_HASH_SIZE * sizeof (void *), 1)) == NULL)
+ return;
+
+ /*
+ * We now have a topo snapshot, so iterate over the hc topology looking
+ * for disks to add to the hash.
+ */
+ twp = _topo_walk_init(thp, FM_FMRI_SCHEME_HC,
+ libzfs_fru_gather, hdl, &err);
+ if (twp != NULL) {
+ (void) _topo_walk_step(twp, TOPO_WALK_CHILD);
+ _topo_walk_fini(twp);
+ }
+}
+
+/*
+ * Given a devfs path, return the FRU for the device, if known. This will
+ * automatically call libzfs_fru_refresh() if it hasn't already been called by
+ * the consumer. The string returned is valid until the next call to
+ * libzfs_fru_refresh().
+ */
+const char *
+libzfs_fru_lookup(libzfs_handle_t *hdl, const char *devpath)
+{
+ size_t idx = fru_strhash(devpath);
+ libzfs_fru_t *frup;
+
+ if (hdl->libzfs_fru_hash == NULL)
+ libzfs_fru_refresh(hdl);
+
+ if (hdl->libzfs_fru_hash == NULL)
+ return (NULL);
+
+ for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL;
+ frup = frup->zf_chain) {
+ if (strcmp(devpath, frup->zf_device) == 0)
+ return (frup->zf_fru);
+ }
+
+ return (NULL);
+}
+
+/*
+ * Given a fru path, return the device path. This will automatically call
+ * libzfs_fru_refresh() if it hasn't already been called by the consumer. The
+ * string returned is valid until the next call to libzfs_fru_refresh().
+ */
+const char *
+libzfs_fru_devpath(libzfs_handle_t *hdl, const char *fru)
+{
+ libzfs_fru_t *frup;
+ size_t idx;
+
+ if (hdl->libzfs_fru_hash == NULL)
+ libzfs_fru_refresh(hdl);
+
+ if (hdl->libzfs_fru_hash == NULL)
+ return (NULL);
+
+ for (idx = 0; idx < ZFS_FRU_HASH_SIZE; idx++) {
+ for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL;
+ frup = frup->zf_next) {
+ if (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl,
+ fru, frup->zf_fru))
+ return (frup->zf_device);
+ }
+ }
+
+ return (NULL);
+}
+
+/*
+ * Change the stored FRU for the given vdev.
+ */
+int
+zpool_fru_set(zpool_handle_t *zhp, uint64_t vdev_guid, const char *fru)
+{
+ zfs_cmd_t zc = { 0 };
+
+ (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+ (void) strncpy(zc.zc_value, fru, sizeof (zc.zc_value));
+ zc.zc_guid = vdev_guid;
+
+ if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SETFRU, &zc) != 0)
+ return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
+ dgettext(TEXT_DOMAIN, "cannot set FRU")));
+
+ return (0);
+}
+
+/*
+ * Compare to two FRUs, ignoring any authority information.
+ */
+boolean_t
+libzfs_fru_compare(libzfs_handle_t *hdl, const char *a, const char *b)
+{
+ if (hdl->libzfs_fru_hash == NULL)
+ libzfs_fru_refresh(hdl);
+
+ if (hdl->libzfs_fru_hash == NULL)
+ return (strcmp(a, b) == 0);
+
+ return (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl, a, b));
+}
+
+/*
+ * This special function checks to see whether the FRU indicates it's supposed
+ * to be in the system chassis, but the chassis-id doesn't match. This can
+ * happen in a clustered case, where both head nodes have the same logical
+ * disk, but opening the device on the other head node is meaningless.
+ */
+boolean_t
+libzfs_fru_notself(libzfs_handle_t *hdl, const char *fru)
+{
+ const char *chassisid;
+ size_t len;
+
+ if (hdl->libzfs_fru_hash == NULL)
+ libzfs_fru_refresh(hdl);
+
+ if (hdl->libzfs_chassis_id[0] == '\0')
+ return (B_FALSE);
+
+ if (strstr(fru, "/chassis=0/") == NULL)
+ return (B_FALSE);
+
+ if ((chassisid = strstr(fru, ":chassis-id=")) == NULL)
+ return (B_FALSE);
+
+ chassisid += 12;
+ len = strlen(hdl->libzfs_chassis_id);
+ if (strncmp(chassisid, hdl->libzfs_chassis_id, len) == 0 &&
+ (chassisid[len] == '/' || chassisid[len] == ':'))
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+/*
+ * Clear memory associated with the FRU hash.
+ */
+void
+libzfs_fru_clear(libzfs_handle_t *hdl, boolean_t final)
+{
+ libzfs_fru_t *frup;
+
+ while ((frup = hdl->libzfs_fru_list) != NULL) {
+ hdl->libzfs_fru_list = frup->zf_next;
+ free(frup->zf_device);
+ free(frup->zf_fru);
+ free(frup);
+ }
+
+ hdl->libzfs_fru_list = NULL;
+
+ if (hdl->libzfs_topo_hdl != NULL) {
+ _topo_snap_release(hdl->libzfs_topo_hdl);
+ _topo_close(hdl->libzfs_topo_hdl);
+ hdl->libzfs_topo_hdl = NULL;
+ }
+
+ if (final) {
+ free(hdl->libzfs_fru_hash);
+ } else if (hdl->libzfs_fru_hash != NULL) {
+ bzero(hdl->libzfs_fru_hash,
+ ZFS_FRU_HASH_SIZE * sizeof (void *));
+ }
+}
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h
index 0642033..9d1ecb7 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _LIBFS_IMPL_H
@@ -30,7 +29,6 @@
#include <sys/dmu.h>
#include <sys/fs/zfs.h>
#include <sys/zfs_ioctl.h>
-#include <sys/zfs_acl.h>
#include <sys/spa.h>
#include <sys/nvpair.h>
@@ -38,6 +36,8 @@
#include <libuutil.h>
#include <libzfs.h>
+#include "zfs_ioctl_compat.h"
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -47,6 +47,13 @@ extern "C" {
#endif
#define VERIFY verify
+typedef struct libzfs_fru {
+ char *zf_device;
+ char *zf_fru;
+ struct libzfs_fru *zf_chain;
+ struct libzfs_fru *zf_next;
+} libzfs_fru_t;
+
struct libzfs_handle {
int libzfs_error;
int libzfs_fd;
@@ -61,11 +68,17 @@ struct libzfs_handle {
char libzfs_desc[1024];
char *libzfs_log_str;
int libzfs_printerr;
+ int libzfs_storeerr; /* stuff error messages into buffer */
void *libzfs_sharehdl; /* libshare handle */
uint_t libzfs_shareflags;
boolean_t libzfs_mnttab_enable;
avl_tree_t libzfs_mnttab_cache;
+ int libzfs_pool_iter;
+ libzfs_fru_t **libzfs_fru_hash;
+ libzfs_fru_t *libzfs_fru_list;
+ char libzfs_chassis_id[256];
};
+
#define ZFSSHARE_MISS 0x01 /* Didn't find entry in cache */
struct zfs_handle {
@@ -77,6 +90,7 @@ struct zfs_handle {
dmu_objset_stats_t zfs_dmustats;
nvlist_t *zfs_props;
nvlist_t *zfs_user_props;
+ nvlist_t *zfs_recvd_props;
boolean_t zfs_mntcheck;
char *zfs_mntopts;
uint8_t *zfs_props_table;
@@ -112,7 +126,6 @@ typedef enum {
*/
typedef enum {
SHARED_NOT_SHARED = 0x0,
- SHARED_ISCSI = 0x1,
SHARED_NFS = 0x2,
SHARED_SMB = 0x4
} zfs_share_type_t;
@@ -122,6 +135,7 @@ int zfs_error_fmt(libzfs_handle_t *, int, const char *, ...);
void zfs_error_aux(libzfs_handle_t *, const char *, ...);
void *zfs_alloc(libzfs_handle_t *, size_t);
void *zfs_realloc(libzfs_handle_t *, void *, size_t, size_t);
+char *zfs_asprintf(libzfs_handle_t *, const char *, ...);
char *zfs_strdup(libzfs_handle_t *, const char *);
int no_memory(libzfs_handle_t *);
@@ -172,11 +186,11 @@ zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *);
int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **);
-int zvol_create_link(libzfs_handle_t *, const char *);
-int zvol_remove_link(libzfs_handle_t *, const char *);
-int zpool_iter_zvol(zpool_handle_t *, int (*)(const char *, void *), void *);
boolean_t zpool_name_valid(libzfs_handle_t *, boolean_t, const char *);
+int zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type,
+ boolean_t modifying);
+
void namespace_clear(libzfs_handle_t *);
/*
@@ -190,7 +204,10 @@ extern int zfs_parse_options(char *, zfs_share_proto_t);
extern int zfs_unshare_proto(zfs_handle_t *,
const char *, zfs_share_proto_t *);
-#ifdef __FreeBSD__
+extern void libzfs_fru_clear(libzfs_handle_t *, boolean_t);
+
+#ifndef sun
+static int zfs_kernel_version = 0;
/*
* This is FreeBSD version of ioctl, because Solaris' ioctl() updates
@@ -200,11 +217,23 @@ extern int zfs_unshare_proto(zfs_handle_t *,
static __inline int
zcmd_ioctl(int fd, unsigned long cmd, zfs_cmd_t *zc)
{
- size_t oldsize;
- int ret;
+ size_t oldsize, zfs_kernel_version_size;
+ int version, ret, cflag = ZFS_CMD_COMPAT_NONE;
+
+ zfs_kernel_version_size = sizeof(zfs_kernel_version);
+ if (zfs_kernel_version == 0) {
+ sysctlbyname("vfs.zfs.version.spa", &zfs_kernel_version,
+ &zfs_kernel_version_size, NULL, 0);
+ }
+
+ if (zfs_kernel_version == SPA_VERSION_15 ||
+ zfs_kernel_version == SPA_VERSION_14 ||
+ zfs_kernel_version == SPA_VERSION_13)
+ cflag = ZFS_CMD_COMPAT_V15;
oldsize = zc->zc_nvlist_dst_size;
- ret = ioctl(fd, cmd, zc);
+ ret = zcmd_ioctl_compat(fd, cmd, zc, cflag);
+
if (ret == 0 && oldsize < zc->zc_nvlist_dst_size) {
ret = -1;
errno = ENOMEM;
@@ -213,7 +242,7 @@ zcmd_ioctl(int fd, unsigned long cmd, zfs_cmd_t *zc)
return (ret);
}
#define ioctl(fd, cmd, zc) zcmd_ioctl((fd), (cmd), (zc))
-#endif
+#endif /* !sun */
#ifdef __cplusplus
}
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c
index 166c831..4c31e56 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c
@@ -19,12 +19,9 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Pool import support functions.
*
@@ -41,15 +38,18 @@
* using our derived config, and record the results.
*/
+#include <ctype.h>
#include <devid.h>
#include <dirent.h>
#include <errno.h>
#include <libintl.h>
+#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
+#include <thread_pool.h>
#include <libgeom.h>
#include <sys/vdev_impl.h>
@@ -113,6 +113,7 @@ get_devid(const char *path)
return (ret);
}
+
/*
* Go through and fix up any path and/or devid information for the given vdev
* configuration.
@@ -388,8 +389,6 @@ refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
}
if (err) {
- (void) zpool_standard_error(hdl, errno,
- dgettext(TEXT_DOMAIN, "cannot discover pools"));
zcmd_free_nvlists(&zc);
return (NULL);
}
@@ -404,6 +403,21 @@ refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
}
/*
+ * Determine if the vdev id is a hole in the namespace.
+ */
+boolean_t
+vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id)
+{
+ for (int c = 0; c < holes; c++) {
+
+ /* Top-level is a hole */
+ if (hole_array[c] == id)
+ return (B_TRUE);
+ }
+ return (B_FALSE);
+}
+
+/*
* Convert our list of pools into the definitive set of configurations. We
* start by picking the best config for each toplevel vdev. Once that's done,
* we assemble the toplevel vdevs into a full config for the pool. We make a
@@ -425,17 +439,20 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
uint64_t version, guid;
uint_t children = 0;
nvlist_t **child = NULL;
+ uint_t holes;
+ uint64_t *hole_array, max_id;
uint_t c;
boolean_t isactive;
uint64_t hostid;
nvlist_t *nvl;
boolean_t found_one = B_FALSE;
+ boolean_t valid_top_config = B_FALSE;
if (nvlist_alloc(&ret, 0, 0) != 0)
goto nomem;
for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
- uint64_t id;
+ uint64_t id, max_txg = 0;
if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
goto nomem;
@@ -463,6 +480,42 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
}
}
+ /*
+ * We rely on the fact that the max txg for the
+ * pool will contain the most up-to-date information
+ * about the valid top-levels in the vdev namespace.
+ */
+ if (best_txg > max_txg) {
+ (void) nvlist_remove(config,
+ ZPOOL_CONFIG_VDEV_CHILDREN,
+ DATA_TYPE_UINT64);
+ (void) nvlist_remove(config,
+ ZPOOL_CONFIG_HOLE_ARRAY,
+ DATA_TYPE_UINT64_ARRAY);
+
+ max_txg = best_txg;
+ hole_array = NULL;
+ holes = 0;
+ max_id = 0;
+ valid_top_config = B_FALSE;
+
+ if (nvlist_lookup_uint64(tmp,
+ ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
+ verify(nvlist_add_uint64(config,
+ ZPOOL_CONFIG_VDEV_CHILDREN,
+ max_id) == 0);
+ valid_top_config = B_TRUE;
+ }
+
+ if (nvlist_lookup_uint64_array(tmp,
+ ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
+ &holes) == 0) {
+ verify(nvlist_add_uint64_array(config,
+ ZPOOL_CONFIG_HOLE_ARRAY,
+ hole_array, holes) == 0);
+ }
+ }
+
if (!config_seen) {
/*
* Copy the relevant pieces of data to the pool
@@ -522,6 +575,7 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
&id) == 0);
+
if (id >= children) {
nvlist_t **newchild;
@@ -542,17 +596,82 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
}
+ /*
+ * If we have information about all the top-levels then
+ * clean up the nvlist which we've constructed. This
+ * means removing any extraneous devices that are
+ * beyond the valid range or adding devices to the end
+ * of our array which appear to be missing.
+ */
+ if (valid_top_config) {
+ if (max_id < children) {
+ for (c = max_id; c < children; c++)
+ nvlist_free(child[c]);
+ children = max_id;
+ } else if (max_id > children) {
+ nvlist_t **newchild;
+
+ newchild = zfs_alloc(hdl, (max_id) *
+ sizeof (nvlist_t *));
+ if (newchild == NULL)
+ goto nomem;
+
+ for (c = 0; c < children; c++)
+ newchild[c] = child[c];
+
+ free(child);
+ child = newchild;
+ children = max_id;
+ }
+ }
+
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
&guid) == 0);
/*
+ * The vdev namespace may contain holes as a result of
+ * device removal. We must add them back into the vdev
+ * tree before we process any missing devices.
+ */
+ if (holes > 0) {
+ ASSERT(valid_top_config);
+
+ for (c = 0; c < children; c++) {
+ nvlist_t *holey;
+
+ if (child[c] != NULL ||
+ !vdev_is_hole(hole_array, holes, c))
+ continue;
+
+ if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
+ 0) != 0)
+ goto nomem;
+
+ /*
+ * Holes in the namespace are treated as
+ * "hole" top-level vdevs and have a
+ * special flag set on them.
+ */
+ if (nvlist_add_string(holey,
+ ZPOOL_CONFIG_TYPE,
+ VDEV_TYPE_HOLE) != 0 ||
+ nvlist_add_uint64(holey,
+ ZPOOL_CONFIG_ID, c) != 0 ||
+ nvlist_add_uint64(holey,
+ ZPOOL_CONFIG_GUID, 0ULL) != 0)
+ goto nomem;
+ child[c] = holey;
+ }
+ }
+
+ /*
* Look for any missing top-level vdevs. If this is the case,
* create a faked up 'missing' vdev as a placeholder. We cannot
* simply compress the child array, because the kernel performs
* certain checks to make sure the vdev IDs match their location
* in the configuration.
*/
- for (c = 0; c < children; c++)
+ for (c = 0; c < children; c++) {
if (child[c] == NULL) {
nvlist_t *missing;
if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
@@ -570,6 +689,7 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
}
child[c] = missing;
}
+ }
/*
* Put all of this pool's top-level vdevs into a root vdev.
@@ -636,8 +756,11 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
continue;
}
- if ((nvl = refresh_config(hdl, config)) == NULL)
- goto error;
+ if ((nvl = refresh_config(hdl, config)) == NULL) {
+ nvlist_free(config);
+ config = NULL;
+ continue;
+ }
nvlist_free(config);
config = nvl;
@@ -777,56 +900,216 @@ zpool_read_label(int fd, nvlist_t **config)
return (0);
}
+typedef struct rdsk_node {
+ char *rn_name;
+ int rn_dfd;
+ libzfs_handle_t *rn_hdl;
+ nvlist_t *rn_config;
+ avl_tree_t *rn_avl;
+ avl_node_t rn_node;
+ boolean_t rn_nozpool;
+} rdsk_node_t;
+
static int
-geom_find_import(libzfs_handle_t *hdl, pool_list_t *pools)
+slice_cache_compare(const void *arg1, const void *arg2)
{
- char path[MAXPATHLEN];
- struct gmesh mesh;
- struct gclass *mp;
- struct ggeom *gp;
- struct gprovider *pp;
+ const char *nm1 = ((rdsk_node_t *)arg1)->rn_name;
+ const char *nm2 = ((rdsk_node_t *)arg2)->rn_name;
+ char *nm1slice, *nm2slice;
+ int rv;
+
+ /*
+ * slices zero and two are the most likely to provide results,
+ * so put those first
+ */
+ nm1slice = strstr(nm1, "s0");
+ nm2slice = strstr(nm2, "s0");
+ if (nm1slice && !nm2slice) {
+ return (-1);
+ }
+ if (!nm1slice && nm2slice) {
+ return (1);
+ }
+ nm1slice = strstr(nm1, "s2");
+ nm2slice = strstr(nm2, "s2");
+ if (nm1slice && !nm2slice) {
+ return (-1);
+ }
+ if (!nm1slice && nm2slice) {
+ return (1);
+ }
+
+ rv = strcmp(nm1, nm2);
+ if (rv == 0)
+ return (0);
+ return (rv > 0 ? 1 : -1);
+}
+
+#ifdef sun
+static void
+check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
+ diskaddr_t size, uint_t blksz)
+{
+ rdsk_node_t tmpnode;
+ rdsk_node_t *node;
+ char sname[MAXNAMELEN];
+
+ tmpnode.rn_name = &sname[0];
+ (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
+ diskname, partno);
+ /*
+ * protect against division by zero for disk labels that
+ * contain a bogus sector size
+ */
+ if (blksz == 0)
+ blksz = DEV_BSIZE;
+ /* too small to contain a zpool? */
+ if ((size < (SPA_MINDEVSIZE / blksz)) &&
+ (node = avl_find(r, &tmpnode, NULL)))
+ node->rn_nozpool = B_TRUE;
+}
+#endif /* sun */
+
+static void
+nozpool_all_slices(avl_tree_t *r, const char *sname)
+{
+#ifdef sun
+ char diskname[MAXNAMELEN];
+ char *ptr;
+ int i;
+
+ (void) strncpy(diskname, sname, MAXNAMELEN);
+ if (((ptr = strrchr(diskname, 's')) == NULL) &&
+ ((ptr = strrchr(diskname, 'p')) == NULL))
+ return;
+ ptr[0] = 's';
+ ptr[1] = '\0';
+ for (i = 0; i < NDKMAP; i++)
+ check_one_slice(r, diskname, i, 0, 1);
+ ptr[0] = 'p';
+ for (i = 0; i <= FD_NUMPART; i++)
+ check_one_slice(r, diskname, i, 0, 1);
+#endif /* sun */
+}
+
+static void
+check_slices(avl_tree_t *r, int fd, const char *sname)
+{
+#ifdef sun
+ struct extvtoc vtoc;
+ struct dk_gpt *gpt;
+ char diskname[MAXNAMELEN];
+ char *ptr;
+ int i;
+
+ (void) strncpy(diskname, sname, MAXNAMELEN);
+ if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
+ return;
+ ptr[1] = '\0';
+
+ if (read_extvtoc(fd, &vtoc) >= 0) {
+ for (i = 0; i < NDKMAP; i++)
+ check_one_slice(r, diskname, i,
+ vtoc.v_part[i].p_size, vtoc.v_sectorsz);
+ } else if (efi_alloc_and_read(fd, &gpt) >= 0) {
+ /*
+ * on x86 we'll still have leftover links that point
+ * to slices s[9-15], so use NDKMAP instead
+ */
+ for (i = 0; i < NDKMAP; i++)
+ check_one_slice(r, diskname, i,
+ gpt->efi_parts[i].p_size, gpt->efi_lbasize);
+ /* nodes p[1-4] are never used with EFI labels */
+ ptr[0] = 'p';
+ for (i = 1; i <= FD_NUMPART; i++)
+ check_one_slice(r, diskname, i, 0, 1);
+ efi_free(gpt);
+ }
+#endif /* sun */
+}
+
+static void
+zpool_open_func(void *arg)
+{
+ rdsk_node_t *rn = arg;
+ struct stat64 statbuf;
nvlist_t *config;
- int fd, ret = 0;
+ int fd;
+ if (rn->rn_nozpool)
+ return;
+ if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) {
+ /* symlink to a device that's no longer there */
+ if (errno == ENOENT)
+ nozpool_all_slices(rn->rn_avl, rn->rn_name);
+ return;
+ }
/*
- * Go through and read the label configuration information from every
- * GEOM provider, organizing the information according to pool GUID
- * and toplevel GUID.
+ * Ignore failed stats. We only want regular
+ * files, character devs and block devs.
*/
+ if (fstat64(fd, &statbuf) != 0 ||
+ (!S_ISREG(statbuf.st_mode) &&
+ !S_ISCHR(statbuf.st_mode) &&
+ !S_ISBLK(statbuf.st_mode))) {
+ (void) close(fd);
+ return;
+ }
+ /* this file is too small to hold a zpool */
+ if (S_ISREG(statbuf.st_mode) &&
+ statbuf.st_size < SPA_MINDEVSIZE) {
+ (void) close(fd);
+ return;
+ } else if (!S_ISREG(statbuf.st_mode)) {
+ /*
+ * Try to read the disk label first so we don't have to
+ * open a bunch of minor nodes that can't have a zpool.
+ */
+ check_slices(rn->rn_avl, fd, rn->rn_name);
+ }
- fd = geom_gettree(&mesh);
- assert(fd == 0);
+ if ((zpool_read_label(fd, &config)) != 0) {
+ (void) close(fd);
+ (void) no_memory(rn->rn_hdl);
+ return;
+ }
+ (void) close(fd);
- LIST_FOREACH(mp, &mesh.lg_class, lg_class) {
- LIST_FOREACH(gp, &mp->lg_geom, lg_geom) {
- LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
- if ((fd = g_open(pp->lg_name, 0)) < 0)
- continue;
- (void) snprintf(path, sizeof (path), "%s%s",
- _PATH_DEV, pp->lg_name);
+ rn->rn_config = config;
+ if (config != NULL) {
+ assert(rn->rn_nozpool == B_FALSE);
+ }
+}
- if ((zpool_read_label(fd, &config)) != 0) {
- (void) g_close(fd);
- (void) no_memory(hdl);
- goto error;
- }
+/*
+ * Given a file descriptor, clear (zero) the label information. This function
+ * is currently only used in the appliance stack as part of the ZFS sysevent
+ * module.
+ */
+int
+zpool_clear_label(int fd)
+{
+ struct stat64 statbuf;
+ int l;
+ vdev_label_t *label;
+ uint64_t size;
- (void) g_close(fd);
+ if (fstat64(fd, &statbuf) == -1)
+ return (0);
+ size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
- if (config == NULL)
- continue;
+ if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
+ return (-1);
- if (add_config(hdl, pools, path, config) != 0) {
- ret = -1;
- goto error;
- }
- }
- }
+ for (l = 0; l < VDEV_LABELS; l++) {
+ if (pwrite64(fd, label, sizeof (vdev_label_t),
+ label_offset(size, l)) != sizeof (vdev_label_t))
+ return (-1);
}
-error:
- geom_deletetree(&mesh);
- return (ret);
+
+ free(label);
+ return (0);
}
/*
@@ -837,30 +1120,28 @@ error:
* to import a specific pool.
*/
static nvlist_t *
-zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
- boolean_t active_ok, char *poolname, uint64_t guid)
+zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
{
- int i;
+ int i, dirs = iarg->paths;
DIR *dirp = NULL;
struct dirent64 *dp;
char path[MAXPATHLEN];
- char *end;
+ char *end, **dir = iarg->path;
size_t pathleft;
- struct stat64 statbuf;
- nvlist_t *ret = NULL, *config;
+ nvlist_t *ret = NULL;
static char *default_dir = "/dev/dsk";
- int fd;
pool_list_t pools = { 0 };
pool_entry_t *pe, *penext;
vdev_entry_t *ve, *venext;
config_entry_t *ce, *cenext;
name_entry_t *ne, *nenext;
+ avl_tree_t slice_cache;
+ rdsk_node_t *slice;
+ void *cookie;
- verify(poolname == NULL || guid == 0);
-
- if (argc == 0) {
- argc = 1;
- argv = &default_dir;
+ if (dirs == 0) {
+ dirs = 1;
+ dir = &default_dir;
}
/*
@@ -868,15 +1149,15 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
* possible device, organizing the information according to pool GUID
* and toplevel GUID.
*/
- for (i = 0; i < argc; i++) {
+ for (i = 0; i < dirs; i++) {
+ tpool_t *t;
char *rdsk;
int dfd;
/* use realpath to normalize the path */
- if (realpath(argv[i], path) == 0) {
+ if (realpath(dir[i], path) == 0) {
(void) zfs_error_fmt(hdl, EZFS_BADPATH,
- dgettext(TEXT_DOMAIN, "cannot open '%s'"),
- argv[i]);
+ dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
goto error;
}
end = &path[strlen(path)];
@@ -884,22 +1165,18 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
*end = 0;
pathleft = &path[sizeof (path)] - end;
- if (strcmp(argv[i], default_dir) == 0) {
- geom_find_import(hdl, &pools);
- continue;
- }
-
/*
* Using raw devices instead of block devices when we're
* reading the labels skips a bunch of slow operations during
* close(2) processing, so we replace /dev/dsk with /dev/rdsk.
*/
if (strcmp(path, "/dev/dsk/") == 0)
- rdsk = "/dev/rdsk/";
+ rdsk = "/dev/";
else
rdsk = path;
- if ((dirp = opendir(rdsk)) == NULL) {
+ if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
+ (dirp = fdopendir(dfd)) == NULL) {
zfs_error_aux(hdl, strerror(errno));
(void) zfs_error_fmt(hdl, EZFS_BADPATH,
dgettext(TEXT_DOMAIN, "cannot open '%s'"),
@@ -907,6 +1184,41 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
goto error;
}
+ avl_create(&slice_cache, slice_cache_compare,
+ sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
+
+ if (strcmp(rdsk, "/dev/") == 0) {
+ struct gmesh mesh;
+ struct gclass *mp;
+ struct ggeom *gp;
+ struct gprovider *pp;
+
+ errno = geom_gettree(&mesh);
+ if (errno != 0) {
+ zfs_error_aux(hdl, strerror(errno));
+ (void) zfs_error_fmt(hdl, EZFS_BADPATH,
+ dgettext(TEXT_DOMAIN, "cannot get GEOM tree"));
+ goto error;
+ }
+
+ LIST_FOREACH(mp, &mesh.lg_class, lg_class) {
+ LIST_FOREACH(gp, &mp->lg_geom, lg_geom) {
+ LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
+ slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
+ slice->rn_name = zfs_strdup(hdl, pp->lg_name);
+ slice->rn_avl = &slice_cache;
+ slice->rn_dfd = dfd;
+ slice->rn_hdl = hdl;
+ slice->rn_nozpool = B_FALSE;
+ avl_add(&slice_cache, slice);
+ }
+ }
+ }
+
+ geom_deletetree(&mesh);
+ goto skipdir;
+ }
+
/*
* This is not MT-safe, but we have no MT consumers of libzfs
*/
@@ -916,49 +1228,54 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
(name[1] == 0 || (name[1] == '.' && name[2] == 0)))
continue;
- (void) snprintf(path, sizeof (path), "%s/%s", rdsk,
- dp->d_name);
-
- if ((fd = open64(path, O_RDONLY)) < 0)
- continue;
-
- /*
- * Ignore failed stats. We only want regular
- * files, character devs and block devs.
- */
- if (fstat64(fd, &statbuf) != 0 ||
- (!S_ISREG(statbuf.st_mode) &&
- !S_ISCHR(statbuf.st_mode) &&
- !S_ISBLK(statbuf.st_mode))) {
- (void) close(fd);
- continue;
- }
-
- if ((zpool_read_label(fd, &config)) != 0) {
- (void) close(fd);
- (void) no_memory(hdl);
- goto error;
- }
-
- (void) close(fd);
-
- if (config != NULL) {
+ slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
+ slice->rn_name = zfs_strdup(hdl, name);
+ slice->rn_avl = &slice_cache;
+ slice->rn_dfd = dfd;
+ slice->rn_hdl = hdl;
+ slice->rn_nozpool = B_FALSE;
+ avl_add(&slice_cache, slice);
+ }
+skipdir:
+ /*
+ * create a thread pool to do all of this in parallel;
+ * rn_nozpool is not protected, so this is racy in that
+ * multiple tasks could decide that the same slice can
+ * not hold a zpool, which is benign. Also choose
+ * double the number of processors; we hold a lot of
+ * locks in the kernel, so going beyond this doesn't
+ * buy us much.
+ */
+ t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
+ 0, NULL);
+ for (slice = avl_first(&slice_cache); slice;
+ (slice = avl_walk(&slice_cache, slice,
+ AVL_AFTER)))
+ (void) tpool_dispatch(t, zpool_open_func, slice);
+ tpool_wait(t);
+ tpool_destroy(t);
+
+ cookie = NULL;
+ while ((slice = avl_destroy_nodes(&slice_cache,
+ &cookie)) != NULL) {
+ if (slice->rn_config != NULL) {
+ nvlist_t *config = slice->rn_config;
boolean_t matched = B_TRUE;
- if (poolname != NULL) {
+ if (iarg->poolname != NULL) {
char *pname;
matched = nvlist_lookup_string(config,
ZPOOL_CONFIG_POOL_NAME,
&pname) == 0 &&
- strcmp(poolname, pname) == 0;
- } else if (guid != 0) {
+ strcmp(iarg->poolname, pname) == 0;
+ } else if (iarg->guid != 0) {
uint64_t this_guid;
matched = nvlist_lookup_uint64(config,
ZPOOL_CONFIG_POOL_GUID,
&this_guid) == 0 &&
- guid == this_guid;
+ iarg->guid == this_guid;
}
if (!matched) {
nvlist_free(config);
@@ -966,17 +1283,20 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
continue;
}
/* use the non-raw path for the config */
- (void) strlcpy(end, name, pathleft);
+ (void) strlcpy(end, slice->rn_name, pathleft);
if (add_config(hdl, &pools, path, config) != 0)
goto error;
}
+ free(slice->rn_name);
+ free(slice);
}
+ avl_destroy(&slice_cache);
(void) closedir(dirp);
dirp = NULL;
}
- ret = get_configs(hdl, &pools, active_ok);
+ ret = get_configs(hdl, &pools, iarg->can_be_active);
error:
for (pe = pools.pools; pe != NULL; pe = penext) {
@@ -1010,27 +1330,12 @@ error:
nvlist_t *
zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
{
- return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, 0));
-}
+ importargs_t iarg = { 0 };
-nvlist_t *
-zpool_find_import_byname(libzfs_handle_t *hdl, int argc, char **argv,
- char *pool)
-{
- return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, pool, 0));
-}
+ iarg.paths = argc;
+ iarg.path = argv;
-nvlist_t *
-zpool_find_import_byguid(libzfs_handle_t *hdl, int argc, char **argv,
- uint64_t guid)
-{
- return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, guid));
-}
-
-nvlist_t *
-zpool_find_import_activeok(libzfs_handle_t *hdl, int argc, char **argv)
-{
- return (zpool_find_import_impl(hdl, argc, argv, B_TRUE, NULL, 0));
+ return (zpool_find_import_impl(hdl, &iarg));
}
/*
@@ -1152,6 +1457,46 @@ zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile,
return (pools);
}
+static int
+name_or_guid_exists(zpool_handle_t *zhp, void *data)
+{
+ importargs_t *import = data;
+ int found = 0;
+
+ if (import->poolname != NULL) {
+ char *pool_name;
+
+ verify(nvlist_lookup_string(zhp->zpool_config,
+ ZPOOL_CONFIG_POOL_NAME, &pool_name) == 0);
+ if (strcmp(pool_name, import->poolname) == 0)
+ found = 1;
+ } else {
+ uint64_t pool_guid;
+
+ verify(nvlist_lookup_uint64(zhp->zpool_config,
+ ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0);
+ if (pool_guid == import->guid)
+ found = 1;
+ }
+
+ zpool_close(zhp);
+ return (found);
+}
+
+nvlist_t *
+zpool_search_import(libzfs_handle_t *hdl, importargs_t *import)
+{
+ verify(import->poolname == NULL || import->guid == 0);
+
+ if (import->unique)
+ import->exists = zpool_iter(hdl, name_or_guid_exists, import);
+
+ if (import->cachefile != NULL)
+ return (zpool_find_import_cached(hdl, import->cachefile,
+ import->poolname, import->guid));
+
+ return (zpool_find_import_impl(hdl, import));
+}
boolean_t
find_guid(nvlist_t *nv, uint64_t guid)
@@ -1251,6 +1596,17 @@ zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
switch (stateval) {
case POOL_STATE_EXPORTED:
+ /*
+ * A pool with an exported state may in fact be imported
+ * read-only, so check the in-core state to see if it's
+ * active and imported read-only. If it is, set
+ * its state to active.
+ */
+ if (pool_active(hdl, name, guid, &isactive) == 0 && isactive &&
+ (zhp = zpool_open_canfail(hdl, name)) != NULL &&
+ zpool_get_prop_int(zhp, ZPOOL_PROP_READONLY, NULL))
+ stateval = POOL_STATE_ACTIVE;
+
ret = B_TRUE;
break;
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c
index 56c0968..b2959dd 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -44,17 +43,14 @@
*
* zfs_is_shared_nfs()
* zfs_is_shared_smb()
- * zfs_is_shared_iscsi()
* zfs_share_proto()
* zfs_shareall();
- * zfs_share_iscsi()
* zfs_unshare_nfs()
* zfs_unshare_smb()
* zfs_unshareall_nfs()
* zfs_unshareall_smb()
* zfs_unshareall()
* zfs_unshareall_bypath()
- * zfs_unshare_iscsi()
*
* The following functions are available for pool consumers, and will
* mount/unmount and share/unshare all datasets within pool:
@@ -82,18 +78,12 @@
#include "libzfs_impl.h"
#include <libshare.h>
-
#define MAXISALEN 257 /* based on sysinfo(2) man page */
static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
zfs_share_proto_t);
-static int (*iscsitgt_zfs_share)(const char *);
-static int (*iscsitgt_zfs_unshare)(const char *);
-static int (*iscsitgt_zfs_is_shared)(const char *);
-static int (*iscsitgt_svc_online)();
-
/*
* The share protocols table must be in the same order as the zfs_share_prot_t
* enum in libzfs_impl.h
@@ -125,29 +115,6 @@ zfs_share_proto_t share_all_proto[] = {
PROTO_END
};
-#pragma init(zfs_iscsi_init)
-static void
-zfs_iscsi_init(void)
-{
- void *libiscsitgt;
-
- if ((libiscsitgt = dlopen("/lib/libiscsitgt.so.1",
- RTLD_LAZY | RTLD_GLOBAL)) == NULL ||
- (iscsitgt_zfs_share = (int (*)(const char *))dlsym(libiscsitgt,
- "iscsitgt_zfs_share")) == NULL ||
- (iscsitgt_zfs_unshare = (int (*)(const char *))dlsym(libiscsitgt,
- "iscsitgt_zfs_unshare")) == NULL ||
- (iscsitgt_zfs_is_shared = (int (*)(const char *))dlsym(libiscsitgt,
- "iscsitgt_zfs_is_shared")) == NULL ||
- (iscsitgt_svc_online = (int (*)(const char *))dlsym(libiscsitgt,
- "iscsitgt_svc_online")) == NULL) {
- iscsitgt_zfs_share = NULL;
- iscsitgt_zfs_unshare = NULL;
- iscsitgt_zfs_is_shared = NULL;
- iscsitgt_svc_online = NULL;
- }
-}
-
/*
* Search the sharetab for the given mountpoint and protocol, returning
* a zfs_share_type_t value.
@@ -171,7 +138,7 @@ is_shared(libzfs_handle_t *hdl, const char *mountpoint, zfs_share_proto_t proto)
*tab = '\0';
if (strcmp(buf, mountpoint) == 0) {
-#if defined(sun)
+#ifdef sun
/*
* the protocol field is the third field
* skip over second field
@@ -204,7 +171,7 @@ is_shared(libzfs_handle_t *hdl, const char *mountpoint, zfs_share_proto_t proto)
return (SHARED_NOT_SHARED);
}
-#if 0
+#ifdef sun
/*
* Returns true if the specified directory is empty. If we can't open the
* directory at all, return true so that the mount can fail with a more
@@ -309,6 +276,12 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
else
(void) strlcpy(mntopts, options, sizeof (mntopts));
+ /*
+ * If the pool is imported read-only then all mounts must be read-only
+ */
+ if (zpool_get_prop_int(zhp->zpool_hdl, ZPOOL_PROP_READONLY, NULL))
+ flags |= MS_RDONLY;
+
if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
return (0);
@@ -323,7 +296,7 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
}
}
-#if 0 /* FreeBSD: overlay mounts are not checked. */
+#ifdef sun /* FreeBSD: overlay mounts are not checked. */
/*
* Determine if the mountpoint is empty. If so, refuse to perform the
* mount. We don't perform this check if MS_OVERLAY is specified, which
@@ -354,6 +327,18 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
} else if (errno == EPERM) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"Insufficient privileges"));
+ } else if (errno == ENOTSUP) {
+ char buf[256];
+ int spa_version;
+
+ VERIFY(zfs_spa_version(zhp, &spa_version) == 0);
+ (void) snprintf(buf, sizeof (buf),
+ dgettext(TEXT_DOMAIN, "Can't mount a version %lld "
+ "file system on a version %d pool. Pool must be"
+ " upgraded to mount this file system."),
+ (u_longlong_t)zfs_prop_get_int(zhp,
+ ZFS_PROP_VERSION), spa_version);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, buf));
} else {
zfs_error_aux(hdl, strerror(errno));
}
@@ -374,7 +359,7 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
static int
unmount_one(libzfs_handle_t *hdl, const char *mountpoint, int flags)
{
- if (unmount(mountpoint, flags) != 0) {
+ if (umount2(mountpoint, flags) != 0) {
zfs_error_aux(hdl, strerror(errno));
return (zfs_error_fmt(hdl, EZFS_UMOUNTFAILED,
dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
@@ -454,7 +439,7 @@ zfs_is_shared(zfs_handle_t *zhp)
zfs_share_proto_t *curr_proto;
if (ZFS_IS_VOLUME(zhp))
- return (zfs_is_shared_iscsi(zhp));
+ return (B_FALSE);
for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
curr_proto++)
@@ -466,18 +451,14 @@ zfs_is_shared(zfs_handle_t *zhp)
int
zfs_share(zfs_handle_t *zhp)
{
- if (ZFS_IS_VOLUME(zhp))
- return (zfs_share_iscsi(zhp));
-
+ assert(!ZFS_IS_VOLUME(zhp));
return (zfs_share_proto(zhp, share_all_proto));
}
int
zfs_unshare(zfs_handle_t *zhp)
{
- if (ZFS_IS_VOLUME(zhp))
- return (zfs_unshare_iscsi(zhp));
-
+ assert(!ZFS_IS_VOLUME(zhp));
return (zfs_unshareall(zhp));
}
@@ -525,7 +506,7 @@ zfs_is_shared_smb(zfs_handle_t *zhp, char **where)
* initialized in _zfs_init_libshare() are actually present.
*/
-#if 0
+#ifdef sun
static sa_handle_t (*_sa_init)(int);
static void (*_sa_fini)(sa_handle_t);
static sa_share_t (*_sa_find_share)(sa_handle_t, char *);
@@ -552,7 +533,7 @@ static void (*_sa_update_sharetab_ts)(sa_handle_t);
static void
_zfs_init_libshare(void)
{
-#if 0
+#ifdef sun
void *libshare;
char path[MAXPATHLEN];
char isa[MAXISALEN];
@@ -623,7 +604,7 @@ zfs_init_libshare(libzfs_handle_t *zhandle, int service)
{
int ret = SA_OK;
-#if 0
+#ifdef sun
if (_sa_init == NULL)
ret = SA_CONFIG_ERR;
@@ -664,7 +645,7 @@ void
zfs_uninit_libshare(libzfs_handle_t *zhandle)
{
if (zhandle != NULL && zhandle->libzfs_sharehdl != NULL) {
-#if 0
+#ifdef sun
if (_sa_fini != NULL)
_sa_fini(zhandle->libzfs_sharehdl);
#endif
@@ -681,7 +662,7 @@ zfs_uninit_libshare(libzfs_handle_t *zhandle)
int
zfs_parse_options(char *options, zfs_share_proto_t proto)
{
-#if 0
+#ifdef sun
if (_sa_parse_legacy_options != NULL) {
return (_sa_parse_legacy_options(NULL, options,
proto_table[proto].p_name));
@@ -692,7 +673,7 @@ zfs_parse_options(char *options, zfs_share_proto_t proto)
#endif
}
-#if 0
+#ifdef sun
/*
* zfs_sa_find_share(handle, path)
*
@@ -734,7 +715,7 @@ zfs_sa_disable_share(sa_share_t share, char *proto)
return (_sa_disable_share(share, proto));
return (SA_CONFIG_ERR);
}
-#endif
+#endif /* sun */
/*
* Share the given filesystem according to the options in the specified
@@ -755,6 +736,16 @@ zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
return (0);
+#ifdef sun
+ if ((ret = zfs_init_libshare(hdl, SA_INIT_SHARE_API)) != SA_OK) {
+ (void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
+ dgettext(TEXT_DOMAIN, "cannot share '%s': %s"),
+ zfs_get_name(zhp), _sa_errorstr != NULL ?
+ _sa_errorstr(ret) : "");
+ return (-1);
+ }
+#endif
+
for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) {
/*
* Return success if there are no share options.
@@ -774,13 +765,7 @@ zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))
continue;
- if (*curr_proto != PROTO_NFS) {
- fprintf(stderr, "Unsupported share protocol: %d.\n",
- *curr_proto);
- continue;
- }
-
-#if 0
+#ifdef sun
share = zfs_sa_find_share(hdl->libzfs_sharehdl, mountpoint);
if (share == NULL) {
/*
@@ -819,6 +804,12 @@ zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
}
} else
#else
+ if (*curr_proto != PROTO_NFS) {
+ fprintf(stderr, "Unsupported share protocol: %d.\n",
+ *curr_proto);
+ continue;
+ }
+
if (strcmp(shareopts, "on") == 0)
error = fsshare(ZFS_EXPORTS_PATH, mountpoint, "");
else
@@ -832,6 +823,7 @@ zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
zfs_get_name(zhp));
return (-1);
}
+
}
return (0);
}
@@ -862,23 +854,58 @@ static int
unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint,
zfs_share_proto_t proto)
{
+#ifdef sun
+ sa_share_t share;
+ int err;
+ char *mntpt;
+ /*
+ * Mountpoint could get trashed if libshare calls getmntany
+ * which it does during API initialization, so strdup the
+ * value.
+ */
+ mntpt = zfs_strdup(hdl, mountpoint);
+
+ /* make sure libshare initialized */
+ if ((err = zfs_init_libshare(hdl, SA_INIT_SHARE_API)) != SA_OK) {
+ free(mntpt); /* don't need the copy anymore */
+ return (zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
+ dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
+ name, _sa_errorstr(err)));
+ }
+
+ share = zfs_sa_find_share(hdl->libzfs_sharehdl, mntpt);
+ free(mntpt); /* don't need the copy anymore */
+
+ if (share != NULL) {
+ err = zfs_sa_disable_share(share, proto_table[proto].p_name);
+ if (err != SA_OK) {
+ return (zfs_error_fmt(hdl, EZFS_UNSHARENFSFAILED,
+ dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
+ name, _sa_errorstr(err)));
+ }
+ } else {
+ return (zfs_error_fmt(hdl, EZFS_UNSHARENFSFAILED,
+ dgettext(TEXT_DOMAIN, "cannot unshare '%s': not found"),
+ name));
+ }
+#else
char buf[MAXPATHLEN];
FILE *fp;
- int error;
+ int err;
if (proto != PROTO_NFS) {
fprintf(stderr, "No SMB support in FreeBSD yet.\n");
return (EOPNOTSUPP);
}
- error = fsunshare(ZFS_EXPORTS_PATH, mountpoint);
- if (error != 0) {
- zfs_error_aux(hdl, "%s", strerror(error));
+ err = fsunshare(ZFS_EXPORTS_PATH, mountpoint);
+ if (err != 0) {
+ zfs_error_aux(hdl, "%s", strerror(err));
return (zfs_error_fmt(hdl, EZFS_UNSHARENFSFAILED,
dgettext(TEXT_DOMAIN,
"cannot unshare '%s'"), name));
}
-
+#endif
return (0);
}
@@ -1011,99 +1038,29 @@ remove_mountpoint(zfs_handle_t *zhp)
}
}
-boolean_t
-zfs_is_shared_iscsi(zfs_handle_t *zhp)
-{
-
- /*
- * If iscsi deamon isn't running then we aren't shared
- */
- if (iscsitgt_svc_online && iscsitgt_svc_online() == 1)
- return (B_FALSE);
- else
- return (iscsitgt_zfs_is_shared != NULL &&
- iscsitgt_zfs_is_shared(zhp->zfs_name) != 0);
-}
-
-int
-zfs_share_iscsi(zfs_handle_t *zhp)
-{
- char shareopts[ZFS_MAXPROPLEN];
- const char *dataset = zhp->zfs_name;
- libzfs_handle_t *hdl = zhp->zfs_hdl;
-
- /*
- * Return success if there are no share options.
- */
- if (zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI, shareopts,
- sizeof (shareopts), NULL, NULL, 0, B_FALSE) != 0 ||
- strcmp(shareopts, "off") == 0)
- return (0);
-
-/* We don't support iSCSI on FreeBSD yet. */
-#ifdef TODO
- if (iscsitgt_zfs_share == NULL || iscsitgt_zfs_share(dataset) != 0) {
- int error = EZFS_SHAREISCSIFAILED;
-
- /*
- * If service isn't availabele and EPERM was
- * returned then use special error.
- */
- if (iscsitgt_svc_online && errno == EPERM &&
- (iscsitgt_svc_online() != 0))
- error = EZFS_ISCSISVCUNAVAIL;
-
- return (zfs_error_fmt(hdl, error,
- dgettext(TEXT_DOMAIN, "cannot share '%s'"), dataset));
- }
-#endif
-
- return (0);
-}
-
-int
-zfs_unshare_iscsi(zfs_handle_t *zhp)
+void
+libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp)
{
- const char *dataset = zfs_get_name(zhp);
- libzfs_handle_t *hdl = zhp->zfs_hdl;
-
-/* We don't support iSCSI on FreeBSD yet. */
-#ifdef TODO
- /*
- * Return if the volume is not shared
- */
- if (zfs_is_shared_iscsi(zhp) != SHARED_ISCSI)
- return (0);
+ if (cbp->cb_alloc == cbp->cb_used) {
+ size_t newsz;
+ void *ptr;
- /*
- * If this fails with ENODEV it indicates that zvol wasn't shared so
- * we should return success in that case.
- */
- if (iscsitgt_zfs_unshare == NULL ||
- (iscsitgt_zfs_unshare(dataset) != 0 && errno != ENODEV)) {
- if (errno == EPERM)
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "Insufficient privileges to unshare iscsi"));
- return (zfs_error_fmt(hdl, EZFS_UNSHAREISCSIFAILED,
- dgettext(TEXT_DOMAIN, "cannot unshare '%s'"), dataset));
+ newsz = cbp->cb_alloc ? cbp->cb_alloc * 2 : 64;
+ ptr = zfs_realloc(zhp->zfs_hdl,
+ cbp->cb_handles, cbp->cb_alloc * sizeof (void *),
+ newsz * sizeof (void *));
+ cbp->cb_handles = ptr;
+ cbp->cb_alloc = newsz;
}
-#endif
-
- return (0);
+ cbp->cb_handles[cbp->cb_used++] = zhp;
}
-typedef struct mount_cbdata {
- zfs_handle_t **cb_datasets;
- int cb_used;
- int cb_alloc;
-} mount_cbdata_t;
-
static int
mount_cb(zfs_handle_t *zhp, void *data)
{
- mount_cbdata_t *cbp = data;
+ get_all_cb_t *cbp = data;
- if (!(zfs_get_type(zhp) & (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME))) {
+ if (!(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM)) {
zfs_close(zhp);
return (0);
}
@@ -1113,25 +1070,16 @@ mount_cb(zfs_handle_t *zhp, void *data)
return (0);
}
- if (cbp->cb_alloc == cbp->cb_used) {
- void *ptr;
-
- if ((ptr = zfs_realloc(zhp->zfs_hdl,
- cbp->cb_datasets, cbp->cb_alloc * sizeof (void *),
- cbp->cb_alloc * 2 * sizeof (void *))) == NULL)
- return (-1);
- cbp->cb_datasets = ptr;
-
- cbp->cb_alloc *= 2;
+ libzfs_add_handle(cbp, zhp);
+ if (zfs_iter_filesystems(zhp, mount_cb, cbp) != 0) {
+ zfs_close(zhp);
+ return (-1);
}
-
- cbp->cb_datasets[cbp->cb_used++] = zhp;
-
- return (zfs_iter_filesystems(zhp, mount_cb, cbp));
+ return (0);
}
-static int
-dataset_cmp(const void *a, const void *b)
+int
+libzfs_dataset_cmp(const void *a, const void *b)
{
zfs_handle_t **za = (zfs_handle_t **)a;
zfs_handle_t **zb = (zfs_handle_t **)b;
@@ -1169,7 +1117,7 @@ dataset_cmp(const void *a, const void *b)
int
zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
{
- mount_cbdata_t cb = { 0 };
+ get_all_cb_t cb = { 0 };
libzfs_handle_t *hdl = zhp->zpool_hdl;
zfs_handle_t *zfsp;
int i, ret = -1;
@@ -1178,23 +1126,17 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
/*
* Gather all non-snap datasets within the pool.
*/
- if ((cb.cb_datasets = zfs_alloc(hdl, 4 * sizeof (void *))) == NULL)
- return (-1);
- cb.cb_alloc = 4;
-
if ((zfsp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_DATASET)) == NULL)
goto out;
- cb.cb_datasets[0] = zfsp;
- cb.cb_used = 1;
-
+ libzfs_add_handle(&cb, zfsp);
if (zfs_iter_filesystems(zfsp, mount_cb, &cb) != 0)
goto out;
-
/*
* Sort the datasets by mountpoint.
*/
- qsort(cb.cb_datasets, cb.cb_used, sizeof (void *), dataset_cmp);
+ qsort(cb.cb_handles, cb.cb_used, sizeof (void *),
+ libzfs_dataset_cmp);
/*
* And mount all the datasets, keeping track of which ones
@@ -1206,7 +1148,7 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
ret = 0;
for (i = 0; i < cb.cb_used; i++) {
- if (zfs_mount(cb.cb_datasets[i], mntopts, flags) != 0)
+ if (zfs_mount(cb.cb_handles[i], mntopts, flags) != 0)
ret = -1;
else
good[i] = 1;
@@ -1219,7 +1161,7 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
* zfs_alloc is supposed to exit if memory isn't available.
*/
for (i = 0; i < cb.cb_used; i++) {
- if (good[i] && zfs_share(cb.cb_datasets[i]) != 0)
+ if (good[i] && zfs_share(cb.cb_handles[i]) != 0)
ret = -1;
}
@@ -1227,34 +1169,12 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
out:
for (i = 0; i < cb.cb_used; i++)
- zfs_close(cb.cb_datasets[i]);
- free(cb.cb_datasets);
+ zfs_close(cb.cb_handles[i]);
+ free(cb.cb_handles);
return (ret);
}
-
-static int
-zvol_cb(const char *dataset, void *data)
-{
- libzfs_handle_t *hdl = data;
- zfs_handle_t *zhp;
-
- /*
- * Ignore snapshots and ignore failures from non-existant datasets.
- */
- if (strchr(dataset, '@') != NULL ||
- (zhp = zfs_open(hdl, dataset, ZFS_TYPE_VOLUME)) == NULL)
- return (0);
-
- if (zfs_unshare_iscsi(zhp) != 0)
- return (-1);
-
- zfs_close(zhp);
-
- return (0);
-}
-
static int
mountpoint_compare(const void *a, const void *b)
{
@@ -1264,6 +1184,8 @@ mountpoint_compare(const void *a, const void *b)
return (strcmp(mountb, mounta));
}
+/* alias for 2002/240 */
+#pragma weak zpool_unmount_datasets = zpool_disable_datasets
/*
* Unshare and unmount all datasets within the given pool. We don't want to
* rely on traversing the DSL to discover the filesystems within the pool,
@@ -1271,46 +1193,38 @@ mountpoint_compare(const void *a, const void *b)
* arbitrarily (on I/O error, for example). Instead, we walk /etc/mnttab and
* gather all the filesystems that are currently mounted.
*/
-#pragma weak zpool_unmount_datasets = zpool_disable_datasets
int
zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
{
int used, alloc;
- struct statfs *sfs;
+ struct mnttab entry;
size_t namelen;
char **mountpoints = NULL;
zfs_handle_t **datasets = NULL;
libzfs_handle_t *hdl = zhp->zpool_hdl;
- int i, j, n;
+ int i;
int ret = -1;
int flags = (force ? MS_FORCE : 0);
- /*
- * First unshare all zvols.
- */
- if (zpool_iter_zvol(zhp, zvol_cb, hdl) != 0)
- return (-1);
-
namelen = strlen(zhp->zpool_name);
+ rewind(hdl->libzfs_mnttab);
used = alloc = 0;
- if ((n = getmntinfo(&sfs, MNT_WAIT)) == 0) {
- fprintf(stderr, "getmntinfo(): %s\n", strerror(errno));
- return (-1);
- }
- for (j = 0; j < n; j++) {
+ while (getmntent(hdl->libzfs_mnttab, &entry) == 0) {
/*
* Ignore non-ZFS entries.
*/
- if (strcmp(sfs[j].f_fstypename, MNTTYPE_ZFS) != 0)
+ if (entry.mnt_fstype == NULL ||
+ strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
continue;
/*
* Ignore filesystems not within this pool.
*/
- if (strncmp(sfs[j].f_mntfromname, zhp->zpool_name, namelen) != 0 ||
- (sfs[j].f_mntfromname[namelen] != '/' &&
- sfs[j].f_mntfromname[namelen] != '\0'))
+ if (entry.mnt_mountp == NULL ||
+ strncmp(entry.mnt_special, zhp->zpool_name, namelen) != 0 ||
+ (entry.mnt_special[namelen] != '/' &&
+ entry.mnt_special[namelen] != '\0'))
continue;
/*
@@ -1348,7 +1262,7 @@ zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
}
if ((mountpoints[used] = zfs_strdup(hdl,
- sfs[j].f_mntonname)) == NULL)
+ entry.mnt_mountp)) == NULL)
goto out;
/*
@@ -1356,7 +1270,7 @@ zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
* is only used to determine if we need to remove the underlying
* mountpoint, so failure is not fatal.
*/
- datasets[used] = make_dataset_handle(hdl, sfs[j].f_mntfromname);
+ datasets[used] = make_dataset_handle(hdl, entry.mnt_special);
used++;
}
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
index c7edd2e..c2306ec 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
@@ -20,41 +20,38 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/types.h>
#include <sys/stat.h>
-#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <devid.h>
-#include <dirent.h>
#include <fcntl.h>
#include <libintl.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <unistd.h>
-#include <zone.h>
#include <sys/zfs_ioctl.h>
-#include <sys/zio.h>
-#include <umem.h>
+#include <dlfcn.h>
#include "zfs_namecheck.h"
#include "zfs_prop.h"
#include "libzfs_impl.h"
+#include "zfs_comutil.h"
static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
-#ifdef sun
-#if defined(__i386) || defined(__amd64)
-#define BOOTCMD "installgrub(1M)"
-#else
-#define BOOTCMD "installboot(1M)"
-#endif
-#endif /* sun */
+#define DISK_ROOT "/dev/dsk"
+#define RDISK_ROOT "/dev/rdsk"
+#define BACKUP_SLICE "s2"
+
+typedef struct prop_flags {
+ int create:1; /* Validate property on creation */
+ int import:1; /* Validate property on import */
+} prop_flags_t;
/*
* ====================================================================
@@ -189,6 +186,8 @@ zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
case VDEV_STATE_CANT_OPEN:
if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
return (gettext("FAULTED"));
+ else if (aux == VDEV_AUX_SPLIT_POOL)
+ return (gettext("SPLIT"));
else
return (gettext("UNAVAIL"));
case VDEV_STATE_FAULTED:
@@ -269,8 +268,8 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
switch (prop) {
case ZPOOL_PROP_SIZE:
- case ZPOOL_PROP_USED:
- case ZPOOL_PROP_AVAILABLE:
+ case ZPOOL_PROP_ALLOCATED:
+ case ZPOOL_PROP_FREE:
(void) zfs_nicenum(intval, buf, len);
break;
@@ -279,11 +278,18 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
(u_longlong_t)intval);
break;
+ case ZPOOL_PROP_DEDUPRATIO:
+ (void) snprintf(buf, len, "%llu.%02llux",
+ (u_longlong_t)(intval / 100),
+ (u_longlong_t)(intval % 100));
+ break;
+
case ZPOOL_PROP_HEALTH:
verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
verify(nvlist_lookup_uint64_array(nvroot,
- ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0);
+ ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
+ == 0);
(void) strlcpy(buf, zpool_state_to_name(intval,
vs->vs_aux), len);
@@ -311,17 +317,6 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
return (0);
}
-static boolean_t
-pool_is_bootable(zpool_handle_t *zhp)
-{
- char bootfs[ZPOOL_MAXNAMELEN];
-
- return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
- sizeof (bootfs), NULL) == 0 && strncmp(bootfs, "-",
- sizeof (bootfs)) != 0);
-}
-
-
/*
* Check if the bootfs name has the same pool name as it is set to.
* Assuming bootfs is a valid dataset name.
@@ -364,6 +359,17 @@ pool_uses_efi(nvlist_t *config)
return (B_FALSE);
}
+static boolean_t
+pool_is_bootable(zpool_handle_t *zhp)
+{
+ char bootfs[ZPOOL_MAXNAMELEN];
+
+ return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
+ sizeof (bootfs), NULL) == 0 && strncmp(bootfs, "-",
+ sizeof (bootfs)) != 0);
+}
+
+
/*
* Given an nvlist of zpool properties to be set, validate that they are
* correct, and parse any numeric properties (index, boolean, etc) if they are
@@ -371,7 +377,7 @@ pool_uses_efi(nvlist_t *config)
*/
static nvlist_t *
zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
- nvlist_t *props, uint64_t version, boolean_t create_or_import, char *errbuf)
+ nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
{
nvpair_t *elem;
nvlist_t *retprops;
@@ -428,7 +434,7 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
break;
case ZPOOL_PROP_BOOTFS:
- if (create_or_import) {
+ if (flags.create || flags.import) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"property '%s' cannot be set at creation "
"or import time"), propname);
@@ -465,7 +471,7 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
-#if defined(sun)
+#ifdef sun
/*
* bootfs property cannot be set on a disk which has
* been EFI labeled.
@@ -478,12 +484,12 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
zpool_close(zhp);
goto error;
}
-#endif
+#endif /* sun */
zpool_close(zhp);
break;
case ZPOOL_PROP_ALTROOT:
- if (!create_or_import) {
+ if (!flags.create && !flags.import) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"property '%s' can only be set during pool "
"creation or import"), propname);
@@ -538,6 +544,16 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
*slash = '/';
break;
+
+ case ZPOOL_PROP_READONLY:
+ if (!flags.import) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "property '%s' can only be set at "
+ "import time"), propname);
+ (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+ goto error;
+ }
+ break;
}
}
@@ -559,6 +575,7 @@ zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
nvlist_t *nvl = NULL;
nvlist_t *realprops;
uint64_t version;
+ prop_flags_t flags = { 0 };
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
@@ -574,7 +591,7 @@ zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
- zhp->zpool_name, nvl, version, B_FALSE, errbuf)) == NULL) {
+ zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
nvlist_free(nvl);
return (-1);
}
@@ -633,6 +650,12 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
/*
+ * Don't start the slice at the default block of 34; many storage
+ * devices will use a stripe width of 128k, so start there instead.
+ */
+#define NEW_START_BLOCK 256
+
+/*
* Validate the given pool name, optionally putting an extended error message in
* 'buf'.
*/
@@ -875,8 +898,10 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
return (-1);
if (props) {
+ prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
+
if ((zc_props = zpool_valid_proplist(hdl, pool, props,
- SPA_VERSION_1, B_TRUE, msg)) == NULL) {
+ SPA_VERSION_1, flags, msg)) == NULL) {
goto create_failed;
}
}
@@ -994,16 +1019,12 @@ zpool_destroy(zpool_handle_t *zhp)
char msg[1024];
if (zhp->zpool_state == POOL_STATE_ACTIVE &&
- (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
- ZFS_TYPE_FILESYSTEM)) == NULL)
- return (-1);
-
- if (zpool_remove_zvol_links(zhp) != 0)
+ (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
return (-1);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
+ if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
"cannot destroy '%s'"), zhp->zpool_name);
@@ -1066,7 +1087,8 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"device '%s' contains an EFI label and "
"cannot be used on root pools."),
- zpool_vdev_name(hdl, NULL, spares[s]));
+ zpool_vdev_name(hdl, NULL, spares[s],
+ B_FALSE));
return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
}
}
@@ -1085,7 +1107,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
return (-1);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
+ if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
switch (errno) {
case EBUSY:
/*
@@ -1161,9 +1183,6 @@ zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce)
zfs_cmd_t zc = { 0 };
char msg[1024];
- if (zpool_remove_zvol_links(zhp) != 0)
- return (-1);
-
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
"cannot export '%s'"), zhp->zpool_name);
@@ -1202,6 +1221,132 @@ zpool_export_force(zpool_handle_t *zhp)
return (zpool_export_common(zhp, B_TRUE, B_TRUE));
}
+static void
+zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
+ nvlist_t *config)
+{
+ nvlist_t *nv = NULL;
+ uint64_t rewindto;
+ int64_t loss = -1;
+ struct tm t;
+ char timestr[128];
+
+ if (!hdl->libzfs_printerr || config == NULL)
+ return;
+
+ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0)
+ return;
+
+ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
+ return;
+ (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
+
+ if (localtime_r((time_t *)&rewindto, &t) != NULL &&
+ strftime(timestr, 128, 0, &t) != 0) {
+ if (dryrun) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Would be able to return %s "
+ "to its state as of %s.\n"),
+ name, timestr);
+ } else {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Pool %s returned to its state as of %s.\n"),
+ name, timestr);
+ }
+ if (loss > 120) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s approximately %lld "),
+ dryrun ? "Would discard" : "Discarded",
+ (loss + 30) / 60);
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "minutes of transactions.\n"));
+ } else if (loss > 0) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "%s approximately %lld "),
+ dryrun ? "Would discard" : "Discarded", loss);
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "seconds of transactions.\n"));
+ }
+ }
+}
+
+void
+zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
+ nvlist_t *config)
+{
+ nvlist_t *nv = NULL;
+ int64_t loss = -1;
+ uint64_t edata = UINT64_MAX;
+ uint64_t rewindto;
+ struct tm t;
+ char timestr[128];
+
+ if (!hdl->libzfs_printerr)
+ return;
+
+ if (reason >= 0)
+ (void) printf(dgettext(TEXT_DOMAIN, "action: "));
+ else
+ (void) printf(dgettext(TEXT_DOMAIN, "\t"));
+
+ /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
+ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
+ nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
+ goto no_info;
+
+ (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
+ (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
+ &edata);
+
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Recovery is possible, but will result in some data loss.\n"));
+
+ if (localtime_r((time_t *)&rewindto, &t) != NULL &&
+ strftime(timestr, 128, 0, &t) != 0) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "\tReturning the pool to its state as of %s\n"
+ "\tshould correct the problem. "),
+ timestr);
+ } else {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "\tReverting the pool to an earlier state "
+ "should correct the problem.\n\t"));
+ }
+
+ if (loss > 120) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Approximately %lld minutes of data\n"
+ "\tmust be discarded, irreversibly. "), (loss + 30) / 60);
+ } else if (loss > 0) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Approximately %lld seconds of data\n"
+ "\tmust be discarded, irreversibly. "), loss);
+ }
+ if (edata != 0 && edata != UINT64_MAX) {
+ if (edata == 1) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "After rewind, at least\n"
+ "\tone persistent user-data error will remain. "));
+ } else {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "After rewind, several\n"
+ "\tpersistent user-data errors will remain. "));
+ }
+ }
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Recovery can be attempted\n\tby executing 'zpool %s -F %s'. "),
+ reason >= 0 ? "clear" : "import", name);
+
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "A scrub of the pool\n"
+ "\tis strongly recommended after recovery.\n"));
+ return;
+
+no_info:
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "Destroy and re-create the pool from\n\ta backup source.\n"));
+}
+
/*
* zpool_import() is a contracted interface. Should be kept the same
* if possible.
@@ -1234,12 +1379,40 @@ zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
}
}
- ret = zpool_import_props(hdl, config, newname, props, B_FALSE);
+ ret = zpool_import_props(hdl, config, newname, props,
+ ZFS_IMPORT_NORMAL);
if (props)
nvlist_free(props);
return (ret);
}
+static void
+print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
+ int indent)
+{
+ nvlist_t **child;
+ uint_t c, children;
+ char *vname;
+ uint64_t is_log = 0;
+
+ (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
+ &is_log);
+
+ if (name != NULL)
+ (void) printf("\t%*s%s%s\n", indent, "", name,
+ is_log ? " [log]" : "");
+
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) != 0)
+ return;
+
+ for (c = 0; c < children; c++) {
+ vname = zpool_vdev_name(hdl, NULL, child[c], B_TRUE);
+ print_vdev_tree(hdl, vname, child[c], indent + 2);
+ free(vname);
+ }
+}
+
/*
* Import the given pool using the known configuration and a list of
* properties to be set. The configuration should have come from
@@ -1248,12 +1421,17 @@ zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
*/
int
zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
- nvlist_t *props, boolean_t importfaulted)
+ nvlist_t *props, int flags)
{
zfs_cmd_t zc = { 0 };
+ zpool_rewind_policy_t policy;
+ nvlist_t *nv = NULL;
+ nvlist_t *nvinfo = NULL;
+ nvlist_t *missing = NULL;
char *thename;
char *origname;
int ret;
+ int error = 0;
char errbuf[1024];
verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
@@ -1274,12 +1452,13 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
if (props) {
uint64_t version;
+ prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
&version) == 0);
if ((props = zpool_valid_proplist(hdl, origname,
- props, version, B_TRUE, errbuf)) == NULL) {
+ props, version, flags, errbuf)) == NULL) {
return (-1);
} else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
nvlist_free(props);
@@ -1296,11 +1475,39 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
nvlist_free(props);
return (-1);
}
+ if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
+ nvlist_free(props);
+ return (-1);
+ }
- zc.zc_cookie = (uint64_t)importfaulted;
- ret = 0;
- if (zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
+ zc.zc_cookie = flags;
+ while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
+ errno == ENOMEM) {
+ if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+ zcmd_free_nvlists(&zc);
+ return (-1);
+ }
+ }
+ if (ret != 0)
+ error = errno;
+
+ (void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
+ zpool_get_rewind_policy(config, &policy);
+
+ if (error) {
char desc[1024];
+
+ /*
+ * Dry-run failed, but we print out what success
+ * looks like if we found a best txg
+ */
+ if (policy.zrp_request & ZPOOL_TRY_REWIND) {
+ zpool_rewind_exclaim(hdl, newname ? origname : thename,
+ B_TRUE, nv);
+ nvlist_free(nv);
+ return (-1);
+ }
+
if (newname == NULL)
(void) snprintf(desc, sizeof (desc),
dgettext(TEXT_DOMAIN, "cannot import '%s'"),
@@ -1310,7 +1517,7 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
origname, thename);
- switch (errno) {
+ switch (error) {
case ENOTSUP:
/*
* Unsupported version.
@@ -1322,10 +1529,38 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
break;
+ case EROFS:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "one or more devices is read only"));
+ (void) zfs_error(hdl, EZFS_BADDEV, desc);
+ break;
+
+ case ENXIO:
+ if (nv && nvlist_lookup_nvlist(nv,
+ ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
+ nvlist_lookup_nvlist(nvinfo,
+ ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "The devices below are missing, use "
+ "'-m' to import the pool anyway:\n"));
+ print_vdev_tree(hdl, NULL, missing, 2);
+ (void) printf("\n");
+ }
+ (void) zpool_standard_error(hdl, error, desc);
+ break;
+
+ case EEXIST:
+ (void) zpool_standard_error(hdl, error, desc);
+ break;
+
default:
- (void) zpool_standard_error(hdl, errno, desc);
+ (void) zpool_standard_error(hdl, error, desc);
+ zpool_explain_recover(hdl,
+ newname ? origname : thename, -error, nv);
+ break;
}
+ nvlist_free(nv);
ret = -1;
} else {
zpool_handle_t *zhp;
@@ -1333,13 +1568,17 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
/*
* This should never fail, but play it safe anyway.
*/
- if (zpool_open_silent(hdl, thename, &zhp) != 0) {
+ if (zpool_open_silent(hdl, thename, &zhp) != 0)
ret = -1;
- } else if (zhp != NULL) {
- ret = zpool_create_zvol_links(zhp);
+ else if (zhp != NULL)
zpool_close(zhp);
+ if (policy.zrp_request &
+ (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
+ zpool_rewind_exclaim(hdl, newname ? origname : thename,
+ ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0), nv);
}
-
+ nvlist_free(nv);
+ return (0);
}
zcmd_free_nvlists(&zc);
@@ -1349,71 +1588,235 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
}
/*
- * Scrub the pool.
+ * Scan the pool.
*/
int
-zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
+zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
{
zfs_cmd_t zc = { 0 };
char msg[1024];
libzfs_handle_t *hdl = zhp->zpool_hdl;
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- zc.zc_cookie = type;
+ zc.zc_cookie = func;
- if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SCRUB, &zc) == 0)
+ if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 ||
+ (errno == ENOENT && func != POOL_SCAN_NONE))
return (0);
- (void) snprintf(msg, sizeof (msg),
- dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
+ if (func == POOL_SCAN_SCRUB) {
+ (void) snprintf(msg, sizeof (msg),
+ dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
+ } else if (func == POOL_SCAN_NONE) {
+ (void) snprintf(msg, sizeof (msg),
+ dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
+ zc.zc_name);
+ } else {
+ assert(!"unexpected result");
+ }
- if (errno == EBUSY)
- return (zfs_error(hdl, EZFS_RESILVERING, msg));
- else
+ if (errno == EBUSY) {
+ nvlist_t *nvroot;
+ pool_scan_stat_t *ps = NULL;
+ uint_t psc;
+
+ verify(nvlist_lookup_nvlist(zhp->zpool_config,
+ ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+ (void) nvlist_lookup_uint64_array(nvroot,
+ ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
+ if (ps && ps->pss_func == POOL_SCAN_SCRUB)
+ return (zfs_error(hdl, EZFS_SCRUBBING, msg));
+ else
+ return (zfs_error(hdl, EZFS_RESILVERING, msg));
+ } else if (errno == ENOENT) {
+ return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
+ } else {
return (zpool_standard_error(hdl, errno, msg));
+ }
+}
+
+/*
+ * This provides a very minimal check whether a given string is likely a
+ * c#t#d# style string. Users of this are expected to do their own
+ * verification of the s# part.
+ */
+#define CTD_CHECK(str) (str && str[0] == 'c' && isdigit(str[1]))
+
+/*
+ * More elaborate version for ones which may start with "/dev/dsk/"
+ * and the like.
+ */
+static int
+ctd_check_path(char *str) {
+ /*
+ * If it starts with a slash, check the last component.
+ */
+ if (str && str[0] == '/') {
+ char *tmp = strrchr(str, '/');
+
+ /*
+ * If it ends in "/old", check the second-to-last
+ * component of the string instead.
+ */
+ if (tmp != str && strcmp(tmp, "/old") == 0) {
+ for (tmp--; *tmp != '/'; tmp--)
+ ;
+ }
+ str = tmp + 1;
+ }
+ return (CTD_CHECK(str));
}
/*
+ * Find a vdev that matches the search criteria specified. We use the
+ * the nvpair name to determine how we should look for the device.
* 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
* spare; but FALSE if its an INUSE spare.
*/
static nvlist_t *
-vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
- boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
+vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
+ boolean_t *l2cache, boolean_t *log)
{
uint_t c, children;
nvlist_t **child;
- uint64_t theguid, present;
- char *path;
- uint64_t wholedisk = 0;
nvlist_t *ret;
uint64_t is_log;
+ char *srchkey;
+ nvpair_t *pair = nvlist_next_nvpair(search, NULL);
- verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
+ /* Nothing to look for */
+ if (search == NULL || pair == NULL)
+ return (NULL);
+
+ /* Obtain the key we will use to search */
+ srchkey = nvpair_name(pair);
+
+ switch (nvpair_type(pair)) {
+ case DATA_TYPE_UINT64:
+ if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
+ uint64_t srchval, theguid;
+
+ verify(nvpair_value_uint64(pair, &srchval) == 0);
+ verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
+ &theguid) == 0);
+ if (theguid == srchval)
+ return (nv);
+ }
+ break;
+
+ case DATA_TYPE_STRING: {
+ char *srchval, *val;
+
+ verify(nvpair_value_string(pair, &srchval) == 0);
+ if (nvlist_lookup_string(nv, srchkey, &val) != 0)
+ break;
- if (search == NULL &&
- nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
/*
- * If the device has never been present since import, the only
- * reliable way to match the vdev is by GUID.
+ * Search for the requested value. Special cases:
+ *
+ * - ZPOOL_CONFIG_PATH for whole disk entries. These end in
+ * "s0" or "s0/old". The "s0" part is hidden from the user,
+ * but included in the string, so this matches around it.
+ * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
+ *
+ * Otherwise, all other searches are simple string compares.
*/
- if (theguid == guid)
- return (nv);
- } else if (search != NULL &&
- nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
- (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
- &wholedisk);
- if (wholedisk) {
+ if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0 &&
+ ctd_check_path(val)) {
+ uint64_t wholedisk = 0;
+
+ (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+ &wholedisk);
+ if (wholedisk) {
+ int slen = strlen(srchval);
+ int vlen = strlen(val);
+
+ if (slen != vlen - 2)
+ break;
+
+ /*
+ * make_leaf_vdev() should only set
+ * wholedisk for ZPOOL_CONFIG_PATHs which
+ * will include "/dev/dsk/", giving plenty of
+ * room for the indices used next.
+ */
+ ASSERT(vlen >= 6);
+
+ /*
+ * strings identical except trailing "s0"
+ */
+ if (strcmp(&val[vlen - 2], "s0") == 0 &&
+ strncmp(srchval, val, slen) == 0)
+ return (nv);
+
+ /*
+ * strings identical except trailing "s0/old"
+ */
+ if (strcmp(&val[vlen - 6], "s0/old") == 0 &&
+ strcmp(&srchval[slen - 4], "/old") == 0 &&
+ strncmp(srchval, val, slen - 4) == 0)
+ return (nv);
+
+ break;
+ }
+ } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
+ char *type, *idx, *end, *p;
+ uint64_t id, vdev_id;
+
+ /*
+ * Determine our vdev type, keeping in mind
+ * that the srchval is composed of a type and
+ * vdev id pair (i.e. mirror-4).
+ */
+ if ((type = strdup(srchval)) == NULL)
+ return (NULL);
+
+ if ((p = strrchr(type, '-')) == NULL) {
+ free(type);
+ break;
+ }
+ idx = p + 1;
+ *p = '\0';
+
+ /*
+ * If the types don't match then keep looking.
+ */
+ if (strncmp(val, type, strlen(val)) != 0) {
+ free(type);
+ break;
+ }
+
+ verify(strncmp(type, VDEV_TYPE_RAIDZ,
+ strlen(VDEV_TYPE_RAIDZ)) == 0 ||
+ strncmp(type, VDEV_TYPE_MIRROR,
+ strlen(VDEV_TYPE_MIRROR)) == 0);
+ verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
+ &id) == 0);
+
+ errno = 0;
+ vdev_id = strtoull(idx, &end, 10);
+
+ free(type);
+ if (errno != 0)
+ return (NULL);
+
/*
- * For whole disks, the internal path has 's0', but the
- * path passed in by the user doesn't.
+ * Now verify that we have the correct vdev id.
*/
- if (strlen(search) == strlen(path) - 2 &&
- strncmp(search, path, strlen(search)) == 0)
+ if (vdev_id == id)
return (nv);
- } else if (strcmp(search, path) == 0) {
- return (nv);
}
+
+ /*
+ * Common case
+ */
+ if (strcmp(srchval, val) == 0)
+ return (nv);
+ break;
+ }
+
+ default:
+ break;
}
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
@@ -1421,7 +1824,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
return (NULL);
for (c = 0; c < children; c++) {
- if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+ if ((ret = vdev_to_nvlist_iter(child[c], search,
avail_spare, l2cache, NULL)) != NULL) {
/*
* The 'is_log' value is only set for the toplevel
@@ -1442,7 +1845,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
&child, &children) == 0) {
for (c = 0; c < children; c++) {
- if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+ if ((ret = vdev_to_nvlist_iter(child[c], search,
avail_spare, l2cache, NULL)) != NULL) {
*avail_spare = B_TRUE;
return (ret);
@@ -1453,7 +1856,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
&child, &children) == 0) {
for (c = 0; c < children; c++) {
- if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+ if ((ret = vdev_to_nvlist_iter(child[c], search,
avail_spare, l2cache, NULL)) != NULL) {
*l2cache = B_TRUE;
return (ret);
@@ -1464,24 +1867,65 @@ vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
return (NULL);
}
+/*
+ * Given a physical path (minus the "/devices" prefix), find the
+ * associated vdev.
+ */
+nvlist_t *
+zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
+ boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
+{
+ nvlist_t *search, *nvroot, *ret;
+
+ verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
+
+ verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
+ &nvroot) == 0);
+
+ *avail_spare = B_FALSE;
+ *l2cache = B_FALSE;
+ if (log != NULL)
+ *log = B_FALSE;
+ ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
+ nvlist_free(search);
+
+ return (ret);
+}
+
+/*
+ * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
+ */
+boolean_t
+zpool_vdev_is_interior(const char *name)
+{
+ if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
+ strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
+ return (B_TRUE);
+ return (B_FALSE);
+}
+
nvlist_t *
zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
boolean_t *l2cache, boolean_t *log)
{
char buf[MAXPATHLEN];
- const char *search;
char *end;
- nvlist_t *nvroot;
+ nvlist_t *nvroot, *search, *ret;
uint64_t guid;
+ verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
guid = strtoull(path, &end, 10);
if (guid != 0 && *end == '\0') {
- search = NULL;
+ verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
+ } else if (zpool_vdev_is_interior(path)) {
+ verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
} else if (path[0] != '/') {
(void) snprintf(buf, sizeof (buf), "%s%s", _PATH_DEV, path);
- search = buf;
+ verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0);
} else {
- search = path;
+ verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
}
verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
@@ -1491,8 +1935,10 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
*l2cache = B_FALSE;
if (log != NULL)
*log = B_FALSE;
- return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare,
- l2cache, log));
+ ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
+ nvlist_free(search);
+
+ return (ret);
}
static int
@@ -1509,106 +1955,180 @@ vdev_online(nvlist_t *nv)
}
/*
- * Get phys_path for a root pool
- * Return 0 on success; non-zeron on failure.
+ * Helper function for zpool_get_physpaths().
*/
-int
-zpool_get_physpath(zpool_handle_t *zhp, char *physpath)
+static int
+vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
+ size_t *bytes_written)
{
+ size_t bytes_left, pos, rsz;
+ char *tmppath;
+ const char *format;
+
+ if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
+ &tmppath) != 0)
+ return (EZFS_NODEVICE);
+
+ pos = *bytes_written;
+ bytes_left = physpath_size - pos;
+ format = (pos == 0) ? "%s" : " %s";
+
+ rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
+ *bytes_written += rsz;
+
+ if (rsz >= bytes_left) {
+ /* if physpath was not copied properly, clear it */
+ if (bytes_left != 0) {
+ physpath[pos] = 0;
+ }
+ return (EZFS_NOSPC);
+ }
+ return (0);
+}
+
+static int
+vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
+ size_t *rsz, boolean_t is_spare)
+{
+ char *type;
+ int ret;
+
+ if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
+ return (EZFS_INVALCONFIG);
+
+ if (strcmp(type, VDEV_TYPE_DISK) == 0) {
+ /*
+ * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
+ * For a spare vdev, we only want to boot from the active
+ * spare device.
+ */
+ if (is_spare) {
+ uint64_t spare = 0;
+ (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
+ &spare);
+ if (!spare)
+ return (EZFS_INVALCONFIG);
+ }
+
+ if (vdev_online(nv)) {
+ if ((ret = vdev_get_one_physpath(nv, physpath,
+ phypath_size, rsz)) != 0)
+ return (ret);
+ }
+ } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
+ strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
+ (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
+ nvlist_t **child;
+ uint_t count;
+ int i, ret;
+
+ if (nvlist_lookup_nvlist_array(nv,
+ ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
+ return (EZFS_INVALCONFIG);
+
+ for (i = 0; i < count; i++) {
+ ret = vdev_get_physpaths(child[i], physpath,
+ phypath_size, rsz, is_spare);
+ if (ret == EZFS_NOSPC)
+ return (ret);
+ }
+ }
+
+ return (EZFS_POOL_INVALARG);
+}
+
+/*
+ * Get phys_path for a root pool config.
+ * Return 0 on success; non-zero on failure.
+ */
+static int
+zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
+{
+ size_t rsz;
nvlist_t *vdev_root;
nvlist_t **child;
uint_t count;
- int i;
+ char *type;
- /*
- * Make sure this is a root pool, as phys_path doesn't mean
- * anything to a non-root pool.
- */
- if (!pool_is_bootable(zhp))
- return (-1);
+ rsz = 0;
- verify(nvlist_lookup_nvlist(zhp->zpool_config,
- ZPOOL_CONFIG_VDEV_TREE, &vdev_root) == 0);
+ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+ &vdev_root) != 0)
+ return (EZFS_INVALCONFIG);
- if (nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
+ if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
+ nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
&child, &count) != 0)
- return (-2);
+ return (EZFS_INVALCONFIG);
- for (i = 0; i < count; i++) {
- nvlist_t **child2;
- uint_t count2;
- char *type;
- char *tmppath;
- int j;
+ /*
+ * root pool can not have EFI labeled disks and can only have
+ * a single top-level vdev.
+ */
+ if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1 ||
+ pool_uses_efi(vdev_root))
+ return (EZFS_POOL_INVALARG);
- if (nvlist_lookup_string(child[i], ZPOOL_CONFIG_TYPE, &type)
- != 0)
- return (-3);
-
- if (strcmp(type, VDEV_TYPE_DISK) == 0) {
- if (!vdev_online(child[i]))
- return (-8);
- verify(nvlist_lookup_string(child[i],
- ZPOOL_CONFIG_PHYS_PATH, &tmppath) == 0);
- (void) strncpy(physpath, tmppath, strlen(tmppath));
- } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0) {
- if (nvlist_lookup_nvlist_array(child[i],
- ZPOOL_CONFIG_CHILDREN, &child2, &count2) != 0)
- return (-4);
-
- for (j = 0; j < count2; j++) {
- if (!vdev_online(child2[j]))
- return (-8);
- if (nvlist_lookup_string(child2[j],
- ZPOOL_CONFIG_PHYS_PATH, &tmppath) != 0)
- return (-5);
-
- if ((strlen(physpath) + strlen(tmppath)) >
- MAXNAMELEN)
- return (-6);
-
- if (strlen(physpath) == 0) {
- (void) strncpy(physpath, tmppath,
- strlen(tmppath));
- } else {
- (void) strcat(physpath, " ");
- (void) strcat(physpath, tmppath);
- }
- }
- } else {
- return (-7);
- }
- }
+ (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
+ B_FALSE);
+
+ /* No online devices */
+ if (rsz == 0)
+ return (EZFS_NODEVICE);
return (0);
}
/*
- * Returns TRUE if the given guid corresponds to the given type.
- * This is used to check for hot spares (INUSE or not), and level 2 cache
- * devices.
+ * Get phys_path for a root pool
+ * Return 0 on success; non-zero on failure.
*/
-static boolean_t
-is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type)
+int
+zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
{
- uint64_t target_guid;
- nvlist_t *nvroot;
- nvlist_t **list;
- uint_t count;
- int i;
+ return (zpool_get_config_physpath(zhp->zpool_config, physpath,
+ phypath_size));
+}
- verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
- if (nvlist_lookup_nvlist_array(nvroot, type, &list, &count) == 0) {
- for (i = 0; i < count; i++) {
- verify(nvlist_lookup_uint64(list[i], ZPOOL_CONFIG_GUID,
- &target_guid) == 0);
- if (guid == target_guid)
- return (B_TRUE);
- }
+/*
+ * If the device has being dynamically expanded then we need to relabel
+ * the disk to use the new unallocated space.
+ */
+static int
+zpool_relabel_disk(libzfs_handle_t *hdl, const char *name)
+{
+#ifdef sun
+ char path[MAXPATHLEN];
+ char errbuf[1024];
+ int fd, error;
+ int (*_efi_use_whole_disk)(int);
+
+ if ((_efi_use_whole_disk = (int (*)(int))dlsym(RTLD_DEFAULT,
+ "efi_use_whole_disk")) == NULL)
+ return (-1);
+
+ (void) snprintf(path, sizeof (path), "%s/%s", RDISK_ROOT, name);
+
+ if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
+ "relabel '%s': unable to open device"), name);
+ return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
}
- return (B_FALSE);
+ /*
+ * It's possible that we might encounter an error if the device
+ * does not have any unallocated space left. If so, we simply
+ * ignore that error and continue on.
+ */
+ error = _efi_use_whole_disk(fd);
+ (void) close(fd);
+ if (error && error != VT_ENOSPC) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
+ "relabel '%s': unable to read disk capacity"), name);
+ return (zfs_error(hdl, EZFS_NOCAP, errbuf));
+ }
+#endif /* sun */
+ return (0);
}
/*
@@ -1622,28 +2142,64 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
zfs_cmd_t zc = { 0 };
char msg[1024];
nvlist_t *tgt;
- boolean_t avail_spare, l2cache;
+ boolean_t avail_spare, l2cache, islog;
libzfs_handle_t *hdl = zhp->zpool_hdl;
- (void) snprintf(msg, sizeof (msg),
- dgettext(TEXT_DOMAIN, "cannot online %s"), path);
+ if (flags & ZFS_ONLINE_EXPAND) {
+ (void) snprintf(msg, sizeof (msg),
+ dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
+ } else {
+ (void) snprintf(msg, sizeof (msg),
+ dgettext(TEXT_DOMAIN, "cannot online %s"), path);
+ }
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
- NULL)) == NULL)
+ &islog)) == NULL)
return (zfs_error(hdl, EZFS_NODEVICE, msg));
verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
- if (avail_spare ||
- is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
+ if (avail_spare)
return (zfs_error(hdl, EZFS_ISSPARE, msg));
+ if (flags & ZFS_ONLINE_EXPAND ||
+ zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
+ char *pathname = NULL;
+ uint64_t wholedisk = 0;
+
+ (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
+ &wholedisk);
+ verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
+ &pathname) == 0);
+
+ /*
+ * XXX - L2ARC 1.0 devices can't support expansion.
+ */
+ if (l2cache) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "cannot expand cache devices"));
+ return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
+ }
+
+ if (wholedisk) {
+ pathname += strlen(DISK_ROOT) + 1;
+ (void) zpool_relabel_disk(hdl, pathname);
+ }
+ }
+
zc.zc_cookie = VDEV_STATE_ONLINE;
zc.zc_obj = flags;
- if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0)
+ if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
+ if (errno == EINVAL) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
+ "from this pool into a new one. Use '%s' "
+ "instead"), "zpool detach");
+ return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
+ }
return (zpool_standard_error(hdl, errno, msg));
+ }
*newstate = zc.zc_cookie;
return (0);
@@ -1671,14 +2227,13 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
- if (avail_spare ||
- is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
+ if (avail_spare)
return (zfs_error(hdl, EZFS_ISSPARE, msg));
zc.zc_cookie = VDEV_STATE_OFFLINE;
zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
- if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
+ if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
return (0);
switch (errno) {
@@ -1689,6 +2244,12 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
*/
return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
+ case EEXIST:
+ /*
+ * The log device has unplayed logs
+ */
+ return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
+
default:
return (zpool_standard_error(hdl, errno, msg));
}
@@ -1698,7 +2259,7 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
* Mark the given vdev faulted.
*/
int
-zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid)
+zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
{
zfs_cmd_t zc = { 0 };
char msg[1024];
@@ -1710,8 +2271,9 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid)
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
zc.zc_guid = guid;
zc.zc_cookie = VDEV_STATE_FAULTED;
+ zc.zc_obj = aux;
- if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
return (0);
switch (errno) {
@@ -1722,12 +2284,6 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid)
*/
return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
- case EEXIST:
- /*
- * The log device has unplayed logs
- */
- return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
-
default:
return (zpool_standard_error(hdl, errno, msg));
}
@@ -1738,7 +2294,7 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid)
* Mark the given vdev degraded.
*/
int
-zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid)
+zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
{
zfs_cmd_t zc = { 0 };
char msg[1024];
@@ -1750,8 +2306,9 @@ zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid)
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
zc.zc_guid = guid;
zc.zc_cookie = VDEV_STATE_DEGRADED;
+ zc.zc_obj = aux;
- if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
return (0);
return (zpool_standard_error(hdl, errno, msg));
@@ -1799,7 +2356,7 @@ zpool_vdev_attach(zpool_handle_t *zhp,
nvlist_t *tgt;
boolean_t avail_spare, l2cache, islog;
uint64_t val;
- char *path, *newname;
+ char *newname;
nvlist_t **child;
uint_t children;
nvlist_t *config_root;
@@ -1847,7 +2404,7 @@ zpool_vdev_attach(zpool_handle_t *zhp,
verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
- if ((newname = zpool_vdev_name(NULL, NULL, child[0])) == NULL)
+ if ((newname = zpool_vdev_name(NULL, NULL, child[0], B_FALSE)) == NULL)
return (-1);
/*
@@ -1865,32 +2422,25 @@ zpool_vdev_attach(zpool_handle_t *zhp,
return (zfs_error(hdl, EZFS_BADTARGET, msg));
}
- /*
- * If we are attempting to replace a spare, it canot be applied to an
- * already spared device.
- */
- if (replacing &&
- nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
- zpool_find_vdev(zhp, newname, &avail_spare,
- &l2cache, NULL) != NULL && avail_spare &&
- is_replacing_spare(config_root, tgt, 0)) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "device has already been replaced with a spare"));
- free(newname);
- return (zfs_error(hdl, EZFS_BADTARGET, msg));
- }
-
free(newname);
if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
return (-1);
- ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ATTACH, &zc);
+ ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
zcmd_free_nvlists(&zc);
if (ret == 0) {
if (rootpool) {
+ /*
+ * XXX need a better way to prevent user from
+ * booting up a half-baked vdev.
+ */
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
+ "sure to wait until resilver is done "
+ "before rebooting.\n"));
+ (void) fprintf(stderr, "\n");
(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "If "
"you boot from pool '%s', you may need to update\n"
"boot code on newly attached disk '%s'.\n\n"
@@ -1910,9 +2460,16 @@ zpool_vdev_attach(zpool_handle_t *zhp,
* Can't attach to or replace this type of vdev.
*/
if (replacing) {
+ uint64_t version = zpool_get_prop_int(zhp,
+ ZPOOL_PROP_VERSION, NULL);
+
if (islog)
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"cannot replace a log with a spare"));
+ else if (version >= SPA_VERSION_MULTI_REPLACE)
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "already in replacing/spare config; wait "
+ "for completion or use 'zpool detach'"));
else
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"cannot replace a replacing device"));
@@ -2010,7 +2567,7 @@ zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
*/
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
"applicable to mirror and replacing vdevs"));
- (void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg);
+ (void) zfs_error(hdl, EZFS_BADTARGET, msg);
break;
case EBUSY:
@@ -2028,6 +2585,258 @@ zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
}
/*
+ * Find a mirror vdev in the source nvlist.
+ *
+ * The mchild array contains a list of disks in one of the top-level mirrors
+ * of the source pool. The schild array contains a list of disks that the
+ * user specified on the command line. We loop over the mchild array to
+ * see if any entry in the schild array matches.
+ *
+ * If a disk in the mchild array is found in the schild array, we return
+ * the index of that entry. Otherwise we return -1.
+ */
+static int
+find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
+ nvlist_t **schild, uint_t schildren)
+{
+ uint_t mc;
+
+ for (mc = 0; mc < mchildren; mc++) {
+ uint_t sc;
+ char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
+ mchild[mc], B_FALSE);
+
+ for (sc = 0; sc < schildren; sc++) {
+ char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
+ schild[sc], B_FALSE);
+ boolean_t result = (strcmp(mpath, spath) == 0);
+
+ free(spath);
+ if (result) {
+ free(mpath);
+ return (mc);
+ }
+ }
+
+ free(mpath);
+ }
+
+ return (-1);
+}
+
+/*
+ * Split a mirror pool. If newroot points to null, then a new nvlist
+ * is generated and it is the responsibility of the caller to free it.
+ */
+int
+zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
+ nvlist_t *props, splitflags_t flags)
+{
+ zfs_cmd_t zc = { 0 };
+ char msg[1024];
+ nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
+ nvlist_t **varray = NULL, *zc_props = NULL;
+ uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
+ libzfs_handle_t *hdl = zhp->zpool_hdl;
+ uint64_t vers;
+ boolean_t freelist = B_FALSE, memory_err = B_TRUE;
+ int retval = 0;
+
+ (void) snprintf(msg, sizeof (msg),
+ dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
+
+ if (!zpool_name_valid(hdl, B_FALSE, newname))
+ return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
+
+ if ((config = zpool_get_config(zhp, NULL)) == NULL) {
+ (void) fprintf(stderr, gettext("Internal error: unable to "
+ "retrieve pool configuration\n"));
+ return (-1);
+ }
+
+ verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
+ == 0);
+ verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
+
+ if (props) {
+ prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
+ if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
+ props, vers, flags, msg)) == NULL)
+ return (-1);
+ }
+
+ if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
+ &children) != 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Source pool is missing vdev tree"));
+ if (zc_props)
+ nvlist_free(zc_props);
+ return (-1);
+ }
+
+ varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
+ vcount = 0;
+
+ if (*newroot == NULL ||
+ nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
+ &newchild, &newchildren) != 0)
+ newchildren = 0;
+
+ for (c = 0; c < children; c++) {
+ uint64_t is_log = B_FALSE, is_hole = B_FALSE;
+ char *type;
+ nvlist_t **mchild, *vdev;
+ uint_t mchildren;
+ int entry;
+
+ /*
+ * Unlike cache & spares, slogs are stored in the
+ * ZPOOL_CONFIG_CHILDREN array. We filter them out here.
+ */
+ (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+ &is_log);
+ (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
+ &is_hole);
+ if (is_log || is_hole) {
+ /*
+ * Create a hole vdev and put it in the config.
+ */
+ if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
+ goto out;
+ if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
+ VDEV_TYPE_HOLE) != 0)
+ goto out;
+ if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
+ 1) != 0)
+ goto out;
+ if (lastlog == 0)
+ lastlog = vcount;
+ varray[vcount++] = vdev;
+ continue;
+ }
+ lastlog = 0;
+ verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
+ == 0);
+ if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Source pool must be composed only of mirrors\n"));
+ retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
+ goto out;
+ }
+
+ verify(nvlist_lookup_nvlist_array(child[c],
+ ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
+
+ /* find or add an entry for this top-level vdev */
+ if (newchildren > 0 &&
+ (entry = find_vdev_entry(zhp, mchild, mchildren,
+ newchild, newchildren)) >= 0) {
+ /* We found a disk that the user specified. */
+ vdev = mchild[entry];
+ ++found;
+ } else {
+ /* User didn't specify a disk for this vdev. */
+ vdev = mchild[mchildren - 1];
+ }
+
+ if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
+ goto out;
+ }
+
+ /* did we find every disk the user specified? */
+ if (found != newchildren) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
+ "include at most one disk from each mirror"));
+ retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
+ goto out;
+ }
+
+ /* Prepare the nvlist for populating. */
+ if (*newroot == NULL) {
+ if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
+ goto out;
+ freelist = B_TRUE;
+ if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
+ VDEV_TYPE_ROOT) != 0)
+ goto out;
+ } else {
+ verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
+ }
+
+ /* Add all the children we found */
+ if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
+ lastlog == 0 ? vcount : lastlog) != 0)
+ goto out;
+
+ /*
+ * If we're just doing a dry run, exit now with success.
+ */
+ if (flags.dryrun) {
+ memory_err = B_FALSE;
+ freelist = B_FALSE;
+ goto out;
+ }
+
+ /* now build up the config list & call the ioctl */
+ if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
+ goto out;
+
+ if (nvlist_add_nvlist(newconfig,
+ ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
+ nvlist_add_string(newconfig,
+ ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
+ nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
+ goto out;
+
+ /*
+ * The new pool is automatically part of the namespace unless we
+ * explicitly export it.
+ */
+ if (!flags.import)
+ zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
+ (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+ (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
+ if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
+ goto out;
+ if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
+ goto out;
+
+ if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
+ retval = zpool_standard_error(hdl, errno, msg);
+ goto out;
+ }
+
+ freelist = B_FALSE;
+ memory_err = B_FALSE;
+
+out:
+ if (varray != NULL) {
+ int v;
+
+ for (v = 0; v < vcount; v++)
+ nvlist_free(varray[v]);
+ free(varray);
+ }
+ zcmd_free_nvlists(&zc);
+ if (zc_props)
+ nvlist_free(zc_props);
+ if (newconfig)
+ nvlist_free(newconfig);
+ if (freelist) {
+ nvlist_free(*newroot);
+ *newroot = NULL;
+ }
+
+ if (retval != 0)
+ return (retval);
+
+ if (memory_err)
+ return (no_memory(hdl));
+
+ return (0);
+}
+
+/*
* Remove the given device. Currently, this is supported only for hot spares
* and level 2 cache devices.
*/
@@ -2037,24 +2846,34 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
zfs_cmd_t zc = { 0 };
char msg[1024];
nvlist_t *tgt;
- boolean_t avail_spare, l2cache;
+ boolean_t avail_spare, l2cache, islog;
libzfs_handle_t *hdl = zhp->zpool_hdl;
+ uint64_t version;
(void) snprintf(msg, sizeof (msg),
dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
- NULL)) == 0)
+ &islog)) == 0)
return (zfs_error(hdl, EZFS_NODEVICE, msg));
-
- if (!avail_spare && !l2cache) {
+ /*
+ * XXX - this should just go away.
+ */
+ if (!avail_spare && !l2cache && !islog) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "only inactive hot spares or cache devices "
- "can be removed"));
+ "only inactive hot spares, cache, top-level, "
+ "or log devices can be removed"));
return (zfs_error(hdl, EZFS_NODEVICE, msg));
}
+ version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
+ if (islog && version < SPA_VERSION_HOLES) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "pool must be upgrade to support log removal"));
+ return (zfs_error(hdl, EZFS_BADVERSION, msg));
+ }
+
verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
@@ -2067,13 +2886,16 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
* Clear the errors for the pool, or the particular device if specified.
*/
int
-zpool_clear(zpool_handle_t *zhp, const char *path)
+zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
{
zfs_cmd_t zc = { 0 };
char msg[1024];
nvlist_t *tgt;
+ zpool_rewind_policy_t policy;
boolean_t avail_spare, l2cache;
libzfs_handle_t *hdl = zhp->zpool_hdl;
+ nvlist_t *nvi = NULL;
+ int error;
if (path)
(void) snprintf(msg, sizeof (msg),
@@ -2101,9 +2923,38 @@ zpool_clear(zpool_handle_t *zhp, const char *path)
&zc.zc_guid) == 0);
}
- if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0)
+ zpool_get_rewind_policy(rewindnvl, &policy);
+ zc.zc_cookie = policy.zrp_request;
+
+ if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
+ return (-1);
+
+ if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
+ return (-1);
+
+ while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
+ errno == ENOMEM) {
+ if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+ zcmd_free_nvlists(&zc);
+ return (-1);
+ }
+ }
+
+ if (!error || ((policy.zrp_request & ZPOOL_TRY_REWIND) &&
+ errno != EPERM && errno != EACCES)) {
+ if (policy.zrp_request &
+ (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
+ (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
+ zpool_rewind_exclaim(hdl, zc.zc_name,
+ ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0),
+ nvi);
+ nvlist_free(nvi);
+ }
+ zcmd_free_nvlists(&zc);
return (0);
+ }
+ zcmd_free_nvlists(&zc);
return (zpool_standard_error(hdl, errno, msg));
}
@@ -2123,6 +2974,7 @@ zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
zc.zc_guid = guid;
+ zc.zc_cookie = ZPOOL_NO_REWIND;
if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
return (0);
@@ -2131,173 +2983,6 @@ zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
}
/*
- * Iterate over all zvols in a given pool by walking the /dev/zvol/dsk/<pool>
- * hierarchy.
- */
-int
-zpool_iter_zvol(zpool_handle_t *zhp, int (*cb)(const char *, void *),
- void *data)
-{
- libzfs_handle_t *hdl = zhp->zpool_hdl;
- char (*paths)[MAXPATHLEN];
- char path[MAXPATHLEN];
- size_t size = 4;
- int curr, fd, base, ret = 0;
- DIR *dirp;
- struct dirent *dp;
- struct stat st;
-
- if ((base = open(ZVOL_FULL_DEV_DIR, O_RDONLY)) < 0)
- return (errno == ENOENT ? 0 : -1);
-
- snprintf(path, sizeof(path), "%s/%s", ZVOL_FULL_DEV_DIR,
- zhp->zpool_name);
- if (stat(path, &st) != 0) {
- int err = errno;
- (void) close(base);
- return (err == ENOENT ? 0 : -1);
- }
-
- /*
- * Oddly this wasn't a directory -- ignore that failure since we
- * know there are no links lower in the (non-existant) hierarchy.
- */
- if (!S_ISDIR(st.st_mode)) {
- (void) close(base);
- return (0);
- }
-
- if ((paths = zfs_alloc(hdl, size * sizeof (paths[0]))) == NULL) {
- (void) close(base);
- return (-1);
- }
-
- (void) strlcpy(paths[0], zhp->zpool_name, sizeof (paths[0]));
- curr = 0;
-
- while (curr >= 0) {
- snprintf(path, sizeof(path), "%s/%s", ZVOL_FULL_DEV_DIR,
- paths[curr]);
- if (lstat(path, &st) != 0)
- goto err;
-
- if (S_ISDIR(st.st_mode)) {
- if ((dirp = opendir(path)) == NULL) {
- goto err;
- }
-
- while ((dp = readdir(dirp)) != NULL) {
- if (dp->d_name[0] == '.')
- continue;
-
- if (curr + 1 == size) {
- paths = zfs_realloc(hdl, paths,
- size * sizeof (paths[0]),
- size * 2 * sizeof (paths[0]));
- if (paths == NULL) {
- (void) closedir(dirp);
- goto err;
- }
-
- size *= 2;
- }
-
- (void) strlcpy(paths[curr + 1], paths[curr],
- sizeof (paths[curr + 1]));
- (void) strlcat(paths[curr], "/",
- sizeof (paths[curr]));
- (void) strlcat(paths[curr], dp->d_name,
- sizeof (paths[curr]));
- curr++;
- }
-
- (void) closedir(dirp);
-
- } else {
- if ((ret = cb(paths[curr], data)) != 0)
- break;
- }
-
- curr--;
- }
-
- free(paths);
- (void) close(base);
-
- return (ret);
-
-err:
- free(paths);
- (void) close(base);
- return (-1);
-}
-
-typedef struct zvol_cb {
- zpool_handle_t *zcb_pool;
- boolean_t zcb_create;
-} zvol_cb_t;
-
-/*ARGSUSED*/
-static int
-do_zvol_create(zfs_handle_t *zhp, void *data)
-{
- int ret = 0;
-
- if (ZFS_IS_VOLUME(zhp)) {
- (void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
- ret = zfs_iter_snapshots(zhp, do_zvol_create, NULL);
- }
-
- if (ret == 0)
- ret = zfs_iter_filesystems(zhp, do_zvol_create, NULL);
-
- zfs_close(zhp);
-
- return (ret);
-}
-
-/*
- * Iterate over all zvols in the pool and make any necessary minor nodes.
- */
-int
-zpool_create_zvol_links(zpool_handle_t *zhp)
-{
- zfs_handle_t *zfp;
- int ret;
-
- /*
- * If the pool is unavailable, just return success.
- */
- if ((zfp = make_dataset_handle(zhp->zpool_hdl,
- zhp->zpool_name)) == NULL)
- return (0);
-
- ret = zfs_iter_filesystems(zfp, do_zvol_create, NULL);
-
- zfs_close(zfp);
- return (ret);
-}
-
-static int
-do_zvol_remove(const char *dataset, void *data)
-{
- zpool_handle_t *zhp = data;
-
- return (zvol_remove_link(zhp->zpool_hdl, dataset));
-}
-
-/*
- * Iterate over all zvols in the pool and remove any minor nodes. We iterate
- * by examining the /dev links so that a corrupted pool doesn't impede this
- * operation.
- */
-int
-zpool_remove_zvol_links(zpool_handle_t *zhp)
-{
- return (zpool_iter_zvol(zhp, do_zvol_remove, zhp));
-}
-
-/*
* Convert from a devid string to a path.
*/
static char *
@@ -2389,7 +3074,8 @@ set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
* of these checks.
*/
char *
-zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
+zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
+ boolean_t verbose)
{
char *path, *devid;
uint64_t value;
@@ -2412,7 +3098,7 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
* open a misbehaving device, which can have undesirable
* effects.
*/
- if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
+ if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &vsc) != 0 ||
vs->vs_state >= VDEV_STATE_DEGRADED) &&
zhp != NULL &&
@@ -2444,17 +3130,35 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
devid_str_free(newdevid);
}
- if (strncmp(path, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
- path += sizeof(_PATH_DEV) - 1;
+#ifdef sun
+ if (strncmp(path, "/dev/dsk/", 9) == 0)
+ path += 9;
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
&value) == 0 && value) {
+ int pathlen = strlen(path);
char *tmp = zfs_strdup(hdl, path);
- if (tmp == NULL)
- return (NULL);
- tmp[strlen(path) - 2] = '\0';
+
+ /*
+ * If it starts with c#, and ends with "s0", chop
+ * the "s0" off, or if it ends with "s0/old", remove
+ * the "s0" from the middle.
+ */
+ if (CTD_CHECK(tmp)) {
+ if (strcmp(&tmp[pathlen - 2], "s0") == 0) {
+ tmp[pathlen - 2] = '\0';
+ } else if (pathlen > 6 &&
+ strcmp(&tmp[pathlen - 6], "s0/old") == 0) {
+ (void) strcpy(&tmp[pathlen - 6],
+ "/old");
+ }
+ }
return (tmp);
}
+#else /* !sun */
+ if (strncmp(path, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+ path += sizeof(_PATH_DEV) - 1;
+#endif /* !sun */
} else {
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
@@ -2468,6 +3172,20 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
(u_longlong_t)value);
path = buf;
}
+
+ /*
+ * We identify each top-level vdev by using a <type-id>
+ * naming convention.
+ */
+ if (verbose) {
+ uint64_t id;
+
+ verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
+ &id) == 0);
+ (void) snprintf(buf, sizeof (buf), "%s-%llu", path,
+ (u_longlong_t)id);
+ path = buf;
+ }
}
return (zfs_strdup(hdl, path));
@@ -2686,7 +3404,7 @@ get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
* into 'records'. 'leftover' is set to the number of bytes that weren't
* processed as there wasn't a complete record.
*/
-static int
+int
zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
nvlist_t ***records, uint_t *numrecords)
{
@@ -2815,15 +3533,7 @@ zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
free(mntpnt);
}
-#define RDISK_ROOT "/dev/rdsk"
-#define BACKUP_SLICE "s2"
-/*
- * Don't start the slice at the default block of 34; many storage
- * devices will use a stripe width of 128k, so start there instead.
- */
-#define NEW_START_BLOCK 256
-
-#if defined(sun)
+#ifdef sun
/*
* Read the EFI label from the config, if a label does not exist then
* pass back the error to the caller. If the caller has passed a non-NULL
@@ -2897,7 +3607,7 @@ find_start_block(nvlist_t *config)
int
zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
{
-#if defined(sun)
+#ifdef sun
char path[MAXPATHLEN];
struct dk_gpt *vtoc;
int fd;
@@ -3017,6 +3727,7 @@ supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf)
if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
strcmp(type, VDEV_TYPE_FILE) == 0 ||
strcmp(type, VDEV_TYPE_LOG) == 0 ||
+ strcmp(type, VDEV_TYPE_HOLE) == 0 ||
strcmp(type, VDEV_TYPE_MISSING) == 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"vdev type '%s' is not supported"), type);
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
index cdde90a..9d3c984 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <assert.h>
@@ -36,24 +35,396 @@
#include <fcntl.h>
#include <sys/param.h>
#include <sys/mount.h>
-#include <sys/mntent.h>
-#include <sys/mnttab.h>
-#include <sys/avl.h>
-#include <stddef.h>
+#include <pthread.h>
+#include <umem.h>
#include <libzfs.h>
#include "zfs_namecheck.h"
#include "zfs_prop.h"
+#include "zfs_fletcher.h"
#include "libzfs_impl.h"
+#include <sha2.h>
+#include <sys/zio_checksum.h>
+#include <sys/ddt.h>
-#include <fletcher.c> /* XXX */
-
+/* in libzfs_dataset.c */
+extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
/* We need to use something for ENODATA. */
#define ENODATA EIDRM
static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t,
- int, avl_tree_t *, char **);
+ int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *);
+
+static const zio_cksum_t zero_cksum = { 0 };
+
+typedef struct dedup_arg {
+ int inputfd;
+ int outputfd;
+ libzfs_handle_t *dedup_hdl;
+} dedup_arg_t;
+
+typedef struct dataref {
+ uint64_t ref_guid;
+ uint64_t ref_object;
+ uint64_t ref_offset;
+} dataref_t;
+
+typedef struct dedup_entry {
+ struct dedup_entry *dde_next;
+ zio_cksum_t dde_chksum;
+ uint64_t dde_prop;
+ dataref_t dde_ref;
+} dedup_entry_t;
+
+#define MAX_DDT_PHYSMEM_PERCENT 20
+#define SMALLEST_POSSIBLE_MAX_DDT_MB 128
+
+typedef struct dedup_table {
+ dedup_entry_t **dedup_hash_array;
+ umem_cache_t *ddecache;
+ uint64_t max_ddt_size; /* max dedup table size in bytes */
+ uint64_t cur_ddt_size; /* current dedup table size in bytes */
+ uint64_t ddt_count;
+ int numhashbits;
+ boolean_t ddt_full;
+} dedup_table_t;
+
+static int
+high_order_bit(uint64_t n)
+{
+ int count;
+
+ for (count = 0; n != 0; count++)
+ n >>= 1;
+ return (count);
+}
+
+static size_t
+ssread(void *buf, size_t len, FILE *stream)
+{
+ size_t outlen;
+
+ if ((outlen = fread(buf, len, 1, stream)) == 0)
+ return (0);
+
+ return (outlen);
+}
+
+static void
+ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
+ zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
+{
+ dedup_entry_t *dde;
+
+ if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
+ if (ddt->ddt_full == B_FALSE) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Dedup table full. Deduplication will continue "
+ "with existing table entries"));
+ ddt->ddt_full = B_TRUE;
+ }
+ return;
+ }
+
+ if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
+ != NULL) {
+ assert(*ddepp == NULL);
+ dde->dde_next = NULL;
+ dde->dde_chksum = *cs;
+ dde->dde_prop = prop;
+ dde->dde_ref = *dr;
+ *ddepp = dde;
+ ddt->cur_ddt_size += sizeof (dedup_entry_t);
+ ddt->ddt_count++;
+ }
+}
+
+/*
+ * Using the specified dedup table, do a lookup for an entry with
+ * the checksum cs. If found, return the block's reference info
+ * in *dr. Otherwise, insert a new entry in the dedup table, using
+ * the reference information specified by *dr.
+ *
+ * return value: true - entry was found
+ * false - entry was not found
+ */
+static boolean_t
+ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
+ uint64_t prop, dataref_t *dr)
+{
+ uint32_t hashcode;
+ dedup_entry_t **ddepp;
+
+ hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
+
+ for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
+ ddepp = &((*ddepp)->dde_next)) {
+ if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
+ (*ddepp)->dde_prop == prop) {
+ *dr = (*ddepp)->dde_ref;
+ return (B_TRUE);
+ }
+ }
+ ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
+ return (B_FALSE);
+}
+
+static int
+cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
+{
+ fletcher_4_incremental_native(buf, len, zc);
+ return (write(outfd, buf, len));
+}
+
+/*
+ * This function is started in a separate thread when the dedup option
+ * has been requested. The main send thread determines the list of
+ * snapshots to be included in the send stream and makes the ioctl calls
+ * for each one. But instead of having the ioctl send the output to the
+ * the output fd specified by the caller of zfs_send()), the
+ * ioctl is told to direct the output to a pipe, which is read by the
+ * alternate thread running THIS function. This function does the
+ * dedup'ing by:
+ * 1. building a dedup table (the DDT)
+ * 2. doing checksums on each data block and inserting a record in the DDT
+ * 3. looking for matching checksums, and
+ * 4. sending a DRR_WRITE_BYREF record instead of a write record whenever
+ * a duplicate block is found.
+ * The output of this function then goes to the output fd requested
+ * by the caller of zfs_send().
+ */
+static void *
+cksummer(void *arg)
+{
+ dedup_arg_t *dda = arg;
+ char *buf = malloc(1<<20);
+ dmu_replay_record_t thedrr;
+ dmu_replay_record_t *drr = &thedrr;
+ struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
+ struct drr_end *drre = &thedrr.drr_u.drr_end;
+ struct drr_object *drro = &thedrr.drr_u.drr_object;
+ struct drr_write *drrw = &thedrr.drr_u.drr_write;
+ struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
+ FILE *ofp;
+ int outfd;
+ dmu_replay_record_t wbr_drr = {0};
+ struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref;
+ dedup_table_t ddt;
+ zio_cksum_t stream_cksum;
+ uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
+ uint64_t numbuckets;
+
+ ddt.max_ddt_size =
+ MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100,
+ SMALLEST_POSSIBLE_MAX_DDT_MB<<20);
+
+ numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t));
+
+ /*
+ * numbuckets must be a power of 2. Increase number to
+ * a power of 2 if necessary.
+ */
+ if (!ISP2(numbuckets))
+ numbuckets = 1 << high_order_bit(numbuckets);
+
+ ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
+ ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
+ NULL, NULL, NULL, NULL, NULL, 0);
+ ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
+ ddt.numhashbits = high_order_bit(numbuckets) - 1;
+ ddt.ddt_full = B_FALSE;
+
+ /* Initialize the write-by-reference block. */
+ wbr_drr.drr_type = DRR_WRITE_BYREF;
+ wbr_drr.drr_payloadlen = 0;
+
+ outfd = dda->outputfd;
+ ofp = fdopen(dda->inputfd, "r");
+ while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
+
+ switch (drr->drr_type) {
+ case DRR_BEGIN:
+ {
+ int fflags;
+ ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
+
+ /* set the DEDUP feature flag for this stream */
+ fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
+ fflags |= (DMU_BACKUP_FEATURE_DEDUP |
+ DMU_BACKUP_FEATURE_DEDUPPROPS);
+ DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
+
+ if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
+ &stream_cksum, outfd) == -1)
+ goto out;
+ if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
+ DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
+ int sz = drr->drr_payloadlen;
+
+ if (sz > 1<<20) {
+ free(buf);
+ buf = malloc(sz);
+ }
+ (void) ssread(buf, sz, ofp);
+ if (ferror(stdin))
+ perror("fread");
+ if (cksum_and_write(buf, sz, &stream_cksum,
+ outfd) == -1)
+ goto out;
+ }
+ break;
+ }
+
+ case DRR_END:
+ {
+ /* use the recalculated checksum */
+ ZIO_SET_CHECKSUM(&drre->drr_checksum,
+ stream_cksum.zc_word[0], stream_cksum.zc_word[1],
+ stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
+ if ((write(outfd, drr,
+ sizeof (dmu_replay_record_t))) == -1)
+ goto out;
+ break;
+ }
+
+ case DRR_OBJECT:
+ {
+ if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
+ &stream_cksum, outfd) == -1)
+ goto out;
+ if (drro->drr_bonuslen > 0) {
+ (void) ssread(buf,
+ P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
+ ofp);
+ if (cksum_and_write(buf,
+ P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
+ &stream_cksum, outfd) == -1)
+ goto out;
+ }
+ break;
+ }
+
+ case DRR_SPILL:
+ {
+ if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
+ &stream_cksum, outfd) == -1)
+ goto out;
+ (void) ssread(buf, drrs->drr_length, ofp);
+ if (cksum_and_write(buf, drrs->drr_length,
+ &stream_cksum, outfd) == -1)
+ goto out;
+ break;
+ }
+
+ case DRR_FREEOBJECTS:
+ {
+ if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
+ &stream_cksum, outfd) == -1)
+ goto out;
+ break;
+ }
+
+ case DRR_WRITE:
+ {
+ dataref_t dataref;
+
+ (void) ssread(buf, drrw->drr_length, ofp);
+
+ /*
+ * Use the existing checksum if it's dedup-capable,
+ * else calculate a SHA256 checksum for it.
+ */
+
+ if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
+ zero_cksum) ||
+ !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
+ SHA256_CTX ctx;
+ zio_cksum_t tmpsha256;
+
+ SHA256Init(&ctx);
+ SHA256Update(&ctx, buf, drrw->drr_length);
+ SHA256Final(&tmpsha256, &ctx);
+ drrw->drr_key.ddk_cksum.zc_word[0] =
+ BE_64(tmpsha256.zc_word[0]);
+ drrw->drr_key.ddk_cksum.zc_word[1] =
+ BE_64(tmpsha256.zc_word[1]);
+ drrw->drr_key.ddk_cksum.zc_word[2] =
+ BE_64(tmpsha256.zc_word[2]);
+ drrw->drr_key.ddk_cksum.zc_word[3] =
+ BE_64(tmpsha256.zc_word[3]);
+ drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
+ drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP;
+ }
+
+ dataref.ref_guid = drrw->drr_toguid;
+ dataref.ref_object = drrw->drr_object;
+ dataref.ref_offset = drrw->drr_offset;
+
+ if (ddt_update(dda->dedup_hdl, &ddt,
+ &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
+ &dataref)) {
+ /* block already present in stream */
+ wbr_drrr->drr_object = drrw->drr_object;
+ wbr_drrr->drr_offset = drrw->drr_offset;
+ wbr_drrr->drr_length = drrw->drr_length;
+ wbr_drrr->drr_toguid = drrw->drr_toguid;
+ wbr_drrr->drr_refguid = dataref.ref_guid;
+ wbr_drrr->drr_refobject =
+ dataref.ref_object;
+ wbr_drrr->drr_refoffset =
+ dataref.ref_offset;
+
+ wbr_drrr->drr_checksumtype =
+ drrw->drr_checksumtype;
+ wbr_drrr->drr_checksumflags =
+ drrw->drr_checksumtype;
+ wbr_drrr->drr_key.ddk_cksum =
+ drrw->drr_key.ddk_cksum;
+ wbr_drrr->drr_key.ddk_prop =
+ drrw->drr_key.ddk_prop;
+
+ if (cksum_and_write(&wbr_drr,
+ sizeof (dmu_replay_record_t), &stream_cksum,
+ outfd) == -1)
+ goto out;
+ } else {
+ /* block not previously seen */
+ if (cksum_and_write(drr,
+ sizeof (dmu_replay_record_t), &stream_cksum,
+ outfd) == -1)
+ goto out;
+ if (cksum_and_write(buf,
+ drrw->drr_length,
+ &stream_cksum, outfd) == -1)
+ goto out;
+ }
+ break;
+ }
+
+ case DRR_FREE:
+ {
+ if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
+ &stream_cksum, outfd) == -1)
+ goto out;
+ break;
+ }
+
+ default:
+ (void) printf("INVALID record type 0x%x\n",
+ drr->drr_type);
+ /* should never happen, so assert */
+ assert(B_FALSE);
+ }
+ }
+out:
+ umem_cache_destroy(ddt.ddecache);
+ free(ddt.dedup_hash_array);
+ free(buf);
+ (void) fclose(ofp);
+
+ return (NULL);
+}
/*
* Routines for dealing with the AVL tree of fs-nvlists
@@ -116,6 +487,9 @@ fsavl_destroy(avl_tree_t *avl)
free(avl);
}
+/*
+ * Given an nvlist, produce an avl tree of snapshots, ordered by guid
+ */
static avl_tree_t *
fsavl_create(nvlist_t *fss)
{
@@ -173,6 +547,7 @@ typedef struct send_data {
nvlist_t *snapprops;
const char *fromsnap;
const char *tosnap;
+ boolean_t recursive;
/*
* The header nvlist is of the following format:
@@ -240,25 +615,50 @@ send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
zfs_prop_t prop = zfs_name_to_prop(propname);
nvlist_t *propnv;
- assert(zfs_prop_user(propname) || prop != ZPROP_INVAL);
+ if (!zfs_prop_user(propname)) {
+ /*
+ * Realistically, this should never happen. However,
+ * we want the ability to add DSL properties without
+ * needing to make incompatible version changes. We
+ * need to ignore unknown properties to allow older
+ * software to still send datasets containing these
+ * properties, with the unknown properties elided.
+ */
+ if (prop == ZPROP_INVAL)
+ continue;
- if (!zfs_prop_user(propname) && zfs_prop_readonly(prop))
- continue;
+ if (zfs_prop_readonly(prop))
+ continue;
+ }
verify(nvpair_value_nvlist(elem, &propnv) == 0);
- if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION) {
- /* these guys are modifyable, but have no source */
+ if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
+ prop == ZFS_PROP_REFQUOTA ||
+ prop == ZFS_PROP_REFRESERVATION) {
+ char *source;
uint64_t value;
verify(nvlist_lookup_uint64(propnv,
ZPROP_VALUE, &value) == 0);
if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
continue;
+ /*
+ * May have no source before SPA_VERSION_RECVD_PROPS,
+ * but is still modifiable.
+ */
+ if (nvlist_lookup_string(propnv,
+ ZPROP_SOURCE, &source) == 0) {
+ if ((strcmp(source, zhp->zfs_name) != 0) &&
+ (strcmp(source,
+ ZPROP_SOURCE_VAL_RECVD) != 0))
+ continue;
+ }
} else {
char *source;
if (nvlist_lookup_string(propnv,
ZPROP_SOURCE, &source) != 0)
continue;
- if (strcmp(source, zhp->zfs_name) != 0)
+ if ((strcmp(source, zhp->zfs_name) != 0) &&
+ (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
continue;
}
@@ -277,12 +677,17 @@ send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
}
}
+/*
+ * recursively generate nvlists describing datasets. See comment
+ * for the data structure send_data_t above for description of contents
+ * of the nvlist.
+ */
static int
send_iterate_fs(zfs_handle_t *zhp, void *arg)
{
send_data_t *sd = arg;
nvlist_t *nvfs, *nv;
- int rv;
+ int rv = 0;
uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
uint64_t guid = zhp->zfs_dmustats.dds_guid;
char guidstring[64];
@@ -324,7 +729,8 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg)
nvlist_free(nvfs);
/* iterate over children */
- rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
+ if (sd->recursive)
+ rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
@@ -334,7 +740,7 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg)
static int
gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
- const char *tosnap, nvlist_t **nvlp, avl_tree_t **avlp)
+ const char *tosnap, boolean_t recursive, nvlist_t **nvlp, avl_tree_t **avlp)
{
zfs_handle_t *zhp;
send_data_t sd = { 0 };
@@ -347,6 +753,7 @@ gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
sd.fromsnap = fromsnap;
sd.tosnap = tosnap;
+ sd.recursive = recursive;
if ((error = send_iterate_fs(zhp, &sd)) != 0) {
nvlist_free(sd.fss);
@@ -378,14 +785,30 @@ static int
zfs_sort_snaps(zfs_handle_t *zhp, void *data)
{
avl_tree_t *avl = data;
- zfs_node_t *node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t));
+ zfs_node_t *node;
+ zfs_node_t search;
+
+ search.zn_handle = zhp;
+ node = avl_find(avl, &search, NULL);
+ if (node) {
+ /*
+ * If this snapshot was renamed while we were creating the
+ * AVL tree, it's possible that we already inserted it under
+ * its old name. Remove the old handle before adding the new
+ * one.
+ */
+ zfs_close(node->zn_handle);
+ avl_remove(avl, node);
+ free(node);
+ }
+ node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t));
node->zn_handle = zhp;
avl_add(avl, node);
+
return (0);
}
-/* ARGSUSED */
static int
zfs_snapshot_compare(const void *larg, const void *rarg)
{
@@ -408,7 +831,7 @@ zfs_snapshot_compare(const void *larg, const void *rarg)
return (0);
}
-static int
+int
zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data)
{
int ret = 0;
@@ -439,13 +862,19 @@ typedef struct send_dump_data {
/* these are all just the short snapname (the part after the @) */
const char *fromsnap;
const char *tosnap;
- char lastsnap[ZFS_MAXNAMELEN];
+ char prevsnap[ZFS_MAXNAMELEN];
+ uint64_t prevsnap_obj;
boolean_t seenfrom, seento, replicate, doall, fromorigin;
boolean_t verbose;
int outfd;
boolean_t err;
nvlist_t *fss;
avl_tree_t *fsavl;
+ snapfilter_cb_t *filter_cb;
+ void *filter_cb_arg;
+ nvlist_t *debugnv;
+ char holdtag[ZFS_MAXNAMELEN];
+ int cleanup_fd;
} send_dump_data_t;
/*
@@ -453,26 +882,40 @@ typedef struct send_dump_data {
* NULL) to the file descriptor specified by outfd.
*/
static int
-dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin,
- int outfd)
+dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
+ boolean_t fromorigin, int outfd, nvlist_t *debugnv)
{
zfs_cmd_t zc = { 0 };
libzfs_handle_t *hdl = zhp->zfs_hdl;
+ nvlist_t *thisdbg;
assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
- assert(fromsnap == NULL || fromsnap[0] == '\0' || !fromorigin);
+ assert(fromsnap_obj == 0 || !fromorigin);
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
- if (fromsnap)
- (void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_value));
zc.zc_cookie = outfd;
zc.zc_obj = fromorigin;
+ zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
+ zc.zc_fromobj = fromsnap_obj;
+
+ VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
+ if (fromsnap && fromsnap[0] != '\0') {
+ VERIFY(0 == nvlist_add_string(thisdbg,
+ "fromsnap", fromsnap));
+ }
if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) {
char errbuf[1024];
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"warning: cannot send '%s'"), zhp->zfs_name);
+ VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
+ if (debugnv) {
+ VERIFY(0 == nvlist_add_nvlist(debugnv,
+ zhp->zfs_name, thisdbg));
+ }
+ nvlist_free(thisdbg);
+
switch (errno) {
case EXDEV:
@@ -507,24 +950,74 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin,
}
}
+ if (debugnv)
+ VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
+ nvlist_free(thisdbg);
+
return (0);
}
static int
+hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
+{
+ zfs_handle_t *pzhp;
+ int error = 0;
+ char *thissnap;
+
+ assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
+
+ /*
+ * zfs_send() only opens a cleanup_fd for sends that need it,
+ * e.g. replication and doall.
+ */
+ if (sdd->cleanup_fd == -1)
+ return (0);
+
+ thissnap = strchr(zhp->zfs_name, '@') + 1;
+ *(thissnap - 1) = '\0';
+ pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
+ *(thissnap - 1) = '@';
+
+ /*
+ * It's OK if the parent no longer exists. The send code will
+ * handle that error.
+ */
+ if (pzhp) {
+ error = zfs_hold(pzhp, thissnap, sdd->holdtag,
+ B_FALSE, B_TRUE, B_TRUE, sdd->cleanup_fd,
+ zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID),
+ zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG));
+ zfs_close(pzhp);
+ }
+
+ return (error);
+}
+
+static int
dump_snapshot(zfs_handle_t *zhp, void *arg)
{
send_dump_data_t *sdd = arg;
- const char *thissnap;
+ char *thissnap;
int err;
+ boolean_t isfromsnap, istosnap;
+ boolean_t exclude = B_FALSE;
thissnap = strchr(zhp->zfs_name, '@') + 1;
+ isfromsnap = (sdd->fromsnap != NULL &&
+ strcmp(sdd->fromsnap, thissnap) == 0);
- if (sdd->fromsnap && !sdd->seenfrom &&
- strcmp(sdd->fromsnap, thissnap) == 0) {
- sdd->seenfrom = B_TRUE;
- (void) strcpy(sdd->lastsnap, thissnap);
+ if (!sdd->seenfrom && isfromsnap) {
+ err = hold_for_send(zhp, sdd);
+ if (err == 0) {
+ sdd->seenfrom = B_TRUE;
+ (void) strcpy(sdd->prevsnap, thissnap);
+ sdd->prevsnap_obj = zfs_prop_get_int(zhp,
+ ZFS_PROP_OBJSETID);
+ } else if (err == ENOENT) {
+ err = 0;
+ }
zfs_close(zhp);
- return (0);
+ return (err);
}
if (sdd->seento || !sdd->seenfrom) {
@@ -532,20 +1025,69 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
return (0);
}
+ istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
+ if (istosnap)
+ sdd->seento = B_TRUE;
+
+ if (!sdd->doall && !isfromsnap && !istosnap) {
+ if (sdd->replicate) {
+ char *snapname;
+ nvlist_t *snapprops;
+ /*
+ * Filter out all intermediate snapshots except origin
+ * snapshots needed to replicate clones.
+ */
+ nvlist_t *nvfs = fsavl_find(sdd->fsavl,
+ zhp->zfs_dmustats.dds_guid, &snapname);
+
+ VERIFY(0 == nvlist_lookup_nvlist(nvfs,
+ "snapprops", &snapprops));
+ VERIFY(0 == nvlist_lookup_nvlist(snapprops,
+ thissnap, &snapprops));
+ exclude = !nvlist_exists(snapprops, "is_clone_origin");
+ } else {
+ exclude = B_TRUE;
+ }
+ }
+
+ /*
+ * If a filter function exists, call it to determine whether
+ * this snapshot will be sent.
+ */
+ if (exclude || (sdd->filter_cb != NULL &&
+ sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
+ /*
+ * This snapshot is filtered out. Don't send it, and don't
+ * set prevsnap_obj, so it will be as if this snapshot didn't
+ * exist, and the next accepted snapshot will be sent as
+ * an incremental from the last accepted one, or as the
+ * first (and full) snapshot in the case of a replication,
+ * non-incremental send.
+ */
+ zfs_close(zhp);
+ return (0);
+ }
+
+ err = hold_for_send(zhp, sdd);
+ if (err) {
+ if (err == ENOENT)
+ err = 0;
+ zfs_close(zhp);
+ return (err);
+ }
+
/* send it */
if (sdd->verbose) {
(void) fprintf(stderr, "sending from @%s to %s\n",
- sdd->lastsnap, zhp->zfs_name);
+ sdd->prevsnap, zhp->zfs_name);
}
- err = dump_ioctl(zhp, sdd->lastsnap,
- sdd->lastsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate),
- sdd->outfd);
+ err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
+ sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate),
+ sdd->outfd, sdd->debugnv);
- if (!sdd->seento && strcmp(sdd->tosnap, thissnap) == 0)
- sdd->seento = B_TRUE;
-
- (void) strcpy(sdd->lastsnap, thissnap);
+ (void) strcpy(sdd->prevsnap, thissnap);
+ sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
zfs_close(zhp);
return (err);
}
@@ -584,51 +1126,33 @@ dump_filesystem(zfs_handle_t *zhp, void *arg)
}
}
- if (sdd->doall) {
- sdd->seenfrom = sdd->seento = sdd->lastsnap[0] = 0;
- if (sdd->fromsnap == NULL || missingfrom)
- sdd->seenfrom = B_TRUE;
+ sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
+ sdd->prevsnap_obj = 0;
+ if (sdd->fromsnap == NULL || missingfrom)
+ sdd->seenfrom = B_TRUE;
- rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
- if (!sdd->seenfrom) {
+ rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
+ if (!sdd->seenfrom) {
+ (void) fprintf(stderr,
+ "WARNING: could not send %s@%s:\n"
+ "incremental source (%s@%s) does not exist\n",
+ zhp->zfs_name, sdd->tosnap,
+ zhp->zfs_name, sdd->fromsnap);
+ sdd->err = B_TRUE;
+ } else if (!sdd->seento) {
+ if (sdd->fromsnap) {
(void) fprintf(stderr,
"WARNING: could not send %s@%s:\n"
- "incremental source (%s@%s) does not exist\n",
+ "incremental source (%s@%s) "
+ "is not earlier than it\n",
zhp->zfs_name, sdd->tosnap,
zhp->zfs_name, sdd->fromsnap);
- sdd->err = B_TRUE;
- } else if (!sdd->seento) {
- if (sdd->fromsnap) {
- (void) fprintf(stderr,
- "WARNING: could not send %s@%s:\n"
- "incremental source (%s@%s) "
- "is not earlier than it\n",
- zhp->zfs_name, sdd->tosnap,
- zhp->zfs_name, sdd->fromsnap);
- } else {
- (void) fprintf(stderr, "WARNING: "
- "could not send %s@%s: does not exist\n",
- zhp->zfs_name, sdd->tosnap);
- }
- sdd->err = B_TRUE;
- }
- } else {
- zfs_handle_t *snapzhp;
- char snapname[ZFS_MAXNAMELEN];
-
- (void) snprintf(snapname, sizeof (snapname), "%s@%s",
- zfs_get_name(zhp), sdd->tosnap);
- snapzhp = zfs_open(zhp->zfs_hdl, snapname, ZFS_TYPE_SNAPSHOT);
- if (snapzhp == NULL) {
- rv = -1;
} else {
- rv = dump_ioctl(snapzhp,
- missingfrom ? NULL : sdd->fromsnap,
- sdd->fromorigin || missingfrom,
- sdd->outfd);
- sdd->seento = B_TRUE;
- zfs_close(snapzhp);
+ (void) fprintf(stderr, "WARNING: "
+ "could not send %s@%s: does not exist\n",
+ zhp->zfs_name, sdd->tosnap);
}
+ sdd->err = B_TRUE;
}
return (rv);
@@ -644,6 +1168,29 @@ dump_filesystems(zfs_handle_t *rzhp, void *arg)
if (!sdd->replicate)
return (dump_filesystem(rzhp, sdd));
+ /* Mark the clone origin snapshots. */
+ for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
+ fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
+ nvlist_t *nvfs;
+ uint64_t origin_guid = 0;
+
+ VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
+ (void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
+ if (origin_guid != 0) {
+ char *snapname;
+ nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
+ origin_guid, &snapname);
+ if (origin_nv != NULL) {
+ nvlist_t *snapprops;
+ VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
+ "snapprops", &snapprops));
+ VERIFY(0 == nvlist_lookup_nvlist(snapprops,
+ snapname, &snapprops));
+ VERIFY(0 == nvlist_add_boolean(
+ snapprops, "is_clone_origin"));
+ }
+ }
+ }
again:
needagain = progress = B_FALSE;
for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
@@ -653,7 +1200,6 @@ again:
zfs_handle_t *zhp;
int err;
uint64_t origin_guid = 0;
- nvlist_t *origin_nv;
VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
if (nvlist_lookup_boolean(fslist, "sent") == 0)
@@ -662,15 +1208,19 @@ again:
VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
- origin_nv = fsavl_find(sdd->fsavl, origin_guid, NULL);
- if (origin_nv &&
- nvlist_lookup_boolean(origin_nv, "sent") == ENOENT) {
- /*
- * origin has not been sent yet;
- * skip this clone.
- */
- needagain = B_TRUE;
- continue;
+ if (origin_guid != 0) {
+ nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
+ origin_guid, NULL);
+ if (origin_nv != NULL &&
+ nvlist_lookup_boolean(origin_nv,
+ "sent") == ENOENT) {
+ /*
+ * origin has not been sent yet;
+ * skip this clone.
+ */
+ needagain = B_TRUE;
+ continue;
+ }
}
zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
@@ -691,20 +1241,38 @@ again:
}
/*
- * Dumps a backup of tosnap, incremental from fromsnap if it isn't NULL.
- * If 'doall', dump all intermediate snaps.
- * If 'replicate', dump special header and do recursively.
+ * Generate a send stream for the dataset identified by the argument zhp.
+ *
+ * The content of the send stream is the snapshot identified by
+ * 'tosnap'. Incremental streams are requested in two ways:
+ * - from the snapshot identified by "fromsnap" (if non-null) or
+ * - from the origin of the dataset identified by zhp, which must
+ * be a clone. In this case, "fromsnap" is null and "fromorigin"
+ * is TRUE.
+ *
+ * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
+ * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
+ * if "replicate" is set. If "doall" is set, dump all the intermediate
+ * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
+ * case too. If "props" is set, send properties.
*/
int
zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
- boolean_t replicate, boolean_t doall, boolean_t fromorigin,
- boolean_t verbose, int outfd)
+ sendflags_t flags, int outfd, snapfilter_cb_t filter_func,
+ void *cb_arg, nvlist_t **debugnvp)
{
char errbuf[1024];
send_dump_data_t sdd = { 0 };
int err;
nvlist_t *fss = NULL;
avl_tree_t *fsavl = NULL;
+ static uint64_t holdseq;
+ int spa_version;
+ boolean_t holdsnaps = B_FALSE;
+ pthread_t tid;
+ int pipefd[2];
+ dedup_arg_t dda = { 0 };
+ int featureflags = 0;
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot send '%s'"), zhp->zfs_name);
@@ -715,15 +1283,46 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
}
- if (replicate || doall) {
+ if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
+ uint64_t version;
+ version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
+ if (version >= ZPL_VERSION_SA) {
+ featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
+ }
+ }
+
+ if (zfs_spa_version(zhp, &spa_version) == 0 &&
+ spa_version >= SPA_VERSION_USERREFS &&
+ (flags.doall || flags.replicate))
+ holdsnaps = B_TRUE;
+
+ if (flags.dedup) {
+ featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
+ DMU_BACKUP_FEATURE_DEDUPPROPS);
+ if (err = pipe(pipefd)) {
+ zfs_error_aux(zhp->zfs_hdl, strerror(errno));
+ return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
+ errbuf));
+ }
+ dda.outputfd = outfd;
+ dda.inputfd = pipefd[1];
+ dda.dedup_hdl = zhp->zfs_hdl;
+ if (err = pthread_create(&tid, NULL, cksummer, &dda)) {
+ (void) close(pipefd[0]);
+ (void) close(pipefd[1]);
+ zfs_error_aux(zhp->zfs_hdl, strerror(errno));
+ return (zfs_error(zhp->zfs_hdl,
+ EZFS_THREADCREATEFAILED, errbuf));
+ }
+ }
+
+ if (flags.replicate || flags.doall || flags.props) {
dmu_replay_record_t drr = { 0 };
char *packbuf = NULL;
size_t buflen = 0;
zio_cksum_t zc = { 0 };
- assert(fromsnap || doall);
-
- if (replicate) {
+ if (flags.replicate || flags.props) {
nvlist_t *hdrnv;
VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
@@ -732,45 +1331,52 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
"fromsnap", fromsnap));
}
VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
+ if (!flags.replicate) {
+ VERIFY(0 == nvlist_add_boolean(hdrnv,
+ "not_recursive"));
+ }
err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
- fromsnap, tosnap, &fss, &fsavl);
+ fromsnap, tosnap, flags.replicate, &fss, &fsavl);
if (err)
- return (err);
+ goto err_out;
VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
err = nvlist_pack(hdrnv, &packbuf, &buflen,
NV_ENCODE_XDR, 0);
- nvlist_free(hdrnv);
+ if (debugnvp)
+ *debugnvp = hdrnv;
+ else
+ nvlist_free(hdrnv);
if (err) {
fsavl_destroy(fsavl);
nvlist_free(fss);
- return (zfs_standard_error(zhp->zfs_hdl,
- err, errbuf));
+ goto stderr_out;
}
}
/* write first begin record */
drr.drr_type = DRR_BEGIN;
drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
- drr.drr_u.drr_begin.drr_version = DMU_BACKUP_HEADER_VERSION;
+ DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.drr_versioninfo,
+ DMU_COMPOUNDSTREAM);
+ DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.drr_versioninfo,
+ featureflags);
(void) snprintf(drr.drr_u.drr_begin.drr_toname,
sizeof (drr.drr_u.drr_begin.drr_toname),
"%s@%s", zhp->zfs_name, tosnap);
drr.drr_payloadlen = buflen;
- fletcher_4_incremental_native(&drr, sizeof (drr), &zc);
- err = write(outfd, &drr, sizeof (drr));
+ err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
/* write header nvlist */
- if (err != -1) {
- fletcher_4_incremental_native(packbuf, buflen, &zc);
- err = write(outfd, packbuf, buflen);
+ if (err != -1 && packbuf != NULL) {
+ err = cksum_and_write(packbuf, buflen, &zc, outfd);
}
free(packbuf);
if (err == -1) {
fsavl_destroy(fsavl);
nvlist_free(fss);
- return (zfs_standard_error(zhp->zfs_hdl,
- errno, errbuf));
+ err = errno;
+ goto stderr_out;
}
/* write end record */
@@ -782,8 +1388,8 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
if (err == -1) {
fsavl_destroy(fsavl);
nvlist_free(fss);
- return (zfs_standard_error(zhp->zfs_hdl,
- errno, errbuf));
+ err = errno;
+ goto stderr_out;
}
}
}
@@ -791,18 +1397,47 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
/* dump each stream */
sdd.fromsnap = fromsnap;
sdd.tosnap = tosnap;
- sdd.outfd = outfd;
- sdd.replicate = replicate;
- sdd.doall = doall;
- sdd.fromorigin = fromorigin;
+ if (flags.dedup)
+ sdd.outfd = pipefd[0];
+ else
+ sdd.outfd = outfd;
+ sdd.replicate = flags.replicate;
+ sdd.doall = flags.doall;
+ sdd.fromorigin = flags.fromorigin;
sdd.fss = fss;
sdd.fsavl = fsavl;
- sdd.verbose = verbose;
+ sdd.verbose = flags.verbose;
+ sdd.filter_cb = filter_func;
+ sdd.filter_cb_arg = cb_arg;
+ if (debugnvp)
+ sdd.debugnv = *debugnvp;
+ if (holdsnaps) {
+ ++holdseq;
+ (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
+ ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
+ sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
+ if (sdd.cleanup_fd < 0) {
+ err = errno;
+ goto stderr_out;
+ }
+ } else {
+ sdd.cleanup_fd = -1;
+ }
err = dump_filesystems(zhp, &sdd);
fsavl_destroy(fsavl);
nvlist_free(fss);
- if (replicate || doall) {
+ if (flags.dedup) {
+ (void) close(pipefd[0]);
+ (void) pthread_join(tid, NULL);
+ }
+
+ if (sdd.cleanup_fd != -1) {
+ VERIFY(0 == close(sdd.cleanup_fd));
+ sdd.cleanup_fd = -1;
+ }
+
+ if (flags.replicate || flags.doall || flags.props) {
/*
* write final end record. NB: want to do this even if
* there was some error, because it might not be totally
@@ -817,6 +1452,18 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
}
return (err || sdd.err);
+
+stderr_out:
+ err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
+err_out:
+ if (sdd.cleanup_fd != -1)
+ VERIFY(0 == close(sdd.cleanup_fd));
+ if (flags.dedup) {
+ (void) pthread_cancel(tid);
+ (void) pthread_join(tid, NULL);
+ (void) close(pipefd[0]);
+ }
+ return (err);
}
/*
@@ -902,11 +1549,12 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
if (err)
return (err);
+ zc.zc_objset_type = DMU_OST_ZFS;
+ (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+
if (tryname) {
(void) strcpy(newname, tryname);
- zc.zc_objset_type = DMU_OST_ZFS;
- (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
(void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
if (flags.verbose) {
@@ -961,12 +1609,18 @@ recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
int err = 0;
prop_changelist_t *clp;
zfs_handle_t *zhp;
+ boolean_t defer = B_FALSE;
+ int spa_version;
zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
if (zhp == NULL)
return (-1);
clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
flags.force ? MS_FORCE : 0);
+ if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
+ zfs_spa_version(zhp, &spa_version) == 0 &&
+ spa_version >= SPA_VERSION_USERREFS)
+ defer = B_TRUE;
zfs_close(zhp);
if (clp == NULL)
return (-1);
@@ -975,12 +1629,12 @@ recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
return (err);
zc.zc_objset_type = DMU_OST_ZFS;
+ zc.zc_defer_destroy = defer;
(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
if (flags.verbose)
(void) printf("attempting destroy %s\n", zc.zc_name);
err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
-
if (err == 0) {
if (flags.verbose)
(void) printf("success\n");
@@ -990,8 +1644,14 @@ recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
(void) changelist_postfix(clp);
changelist_free(clp);
- if (err != 0)
+ /*
+ * Deferred destroy might destroy the snapshot or only mark it to be
+ * destroyed later, and it returns success in either case.
+ */
+ if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
+ ZFS_TYPE_SNAPSHOT))) {
err = recv_rename(hdl, name, NULL, baselen, newname, flags);
+ }
return (err);
}
@@ -1009,6 +1669,7 @@ guid_to_name_cb(zfs_handle_t *zhp, void *arg)
if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
(void) strcpy(gtnd->name, zhp->zfs_name);
+ zfs_close(zhp);
return (EEXIST);
}
err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
@@ -1099,19 +1760,22 @@ created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
static int
recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
- recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl)
+ recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
+ nvlist_t *renamed)
{
nvlist_t *local_nv;
avl_tree_t *local_avl;
nvpair_t *fselem, *nextfselem;
- char *tosnap, *fromsnap;
+ char *fromsnap;
char newname[ZFS_MAXNAMELEN];
int error;
- boolean_t needagain, progress;
+ boolean_t needagain, progress, recursive;
char *s1, *s2;
VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
- VERIFY(0 == nvlist_lookup_string(stream_nv, "tosnap", &tosnap));
+
+ recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
+ ENOENT);
if (flags.dryrun)
return (0);
@@ -1120,7 +1784,7 @@ again:
needagain = progress = B_FALSE;
if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
- &local_nv, &local_avl)) != 0)
+ recursive, &local_nv, &local_avl)) != 0)
return (error);
/*
@@ -1135,7 +1799,7 @@ again:
uint64_t originguid = 0;
uint64_t stream_originguid = 0;
uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
- char *fsname, *stream_fsname, *p1, *p2;
+ char *fsname, *stream_fsname;
nextfselem = nvlist_next_nvpair(local_nv, fselem);
@@ -1243,7 +1907,7 @@ again:
stream_snapname, &props)) {
zfs_cmd_t zc = { 0 };
- zc.zc_cookie = B_TRUE; /* clear current props */
+ zc.zc_cookie = B_TRUE; /* received */
(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
"%s@%s", fsname, nvpair_name(snapelem));
if (zcmd_write_src_nvlist(hdl, &zc,
@@ -1291,10 +1955,13 @@ again:
continue;
}
- if (fromguid == 0 && flags.verbose) {
- (void) printf("local fs %s does not have fromsnap "
- "(%s in stream); must have been deleted locally; "
- "ignoring\n", fsname, fromsnap);
+ if (fromguid == 0) {
+ if (flags.verbose) {
+ (void) printf("local fs %s does not have "
+ "fromsnap (%s in stream); must have "
+ "been deleted locally; ignoring\n",
+ fsname, fromsnap);
+ }
continue;
}
@@ -1306,10 +1973,16 @@ again:
s1 = strrchr(fsname, '/');
s2 = strrchr(stream_fsname, '/');
- /* check for rename */
+ /*
+ * Check for rename. If the exact receive path is specified, it
+ * does not count as a rename, but we still need to check the
+ * datasets beneath it.
+ */
if ((stream_parent_fromsnap_guid != 0 &&
+ parent_fromsnap_guid != 0 &&
stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
- ((s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
+ ((flags.isprefix || strcmp(tofs, fsname) != 0) &&
+ (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
nvlist_t *parent;
char tryname[ZFS_MAXNAMELEN];
@@ -1328,7 +2001,7 @@ again:
VERIFY(0 == nvlist_lookup_string(parent, "name",
&pname));
(void) snprintf(tryname, sizeof (tryname),
- "%s%s", pname, p2 != NULL ? p2 : "");
+ "%s%s", pname, strrchr(stream_fsname, '/'));
} else {
tryname[0] = '\0';
if (flags.verbose) {
@@ -1337,8 +2010,16 @@ again:
}
}
+ newname[0] = '\0';
+
error = recv_rename(hdl, fsname, tryname,
strlen(tofs)+1, newname, flags);
+
+ if (renamed != NULL && newname[0] != '\0') {
+ VERIFY(0 == nvlist_add_boolean(renamed,
+ newname));
+ }
+
if (error)
needagain = B_TRUE;
else
@@ -1362,42 +2043,33 @@ again:
static int
zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
recvflags_t flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
- char **top_zfs)
+ char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
{
nvlist_t *stream_nv = NULL;
avl_tree_t *stream_avl = NULL;
char *fromsnap = NULL;
+ char *cp;
char tofs[ZFS_MAXNAMELEN];
+ char sendfs[ZFS_MAXNAMELEN];
char errbuf[1024];
dmu_replay_record_t drre;
int error;
boolean_t anyerr = B_FALSE;
boolean_t softerr = B_FALSE;
+ boolean_t recursive;
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot receive"));
- if (strchr(destname, '@')) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "can not specify snapshot name for multi-snapshot stream"));
- return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
- }
-
assert(drr->drr_type == DRR_BEGIN);
assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
- assert(drr->drr_u.drr_begin.drr_version == DMU_BACKUP_HEADER_VERSION);
+ assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
+ DMU_COMPOUNDSTREAM);
/*
* Read in the nvlist from the stream.
*/
if (drr->drr_payloadlen != 0) {
- if (!flags.isprefix) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "must use -d to receive replication "
- "(send -R) stream"));
- return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
- }
-
error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
&stream_nv, flags.byteswap, zc);
if (error) {
@@ -1406,6 +2078,16 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
}
}
+ recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
+ ENOENT);
+
+ if (recursive && strchr(destname, '@')) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "cannot specify snapshot name for multi-snapshot stream"));
+ error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+ goto out;
+ }
+
/*
* Read in the end record and verify checksum.
*/
@@ -1449,21 +2131,73 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
}
if (fromsnap != NULL) {
+ nvlist_t *renamed = NULL;
+ nvpair_t *pair = NULL;
+
(void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
if (flags.isprefix) {
- int i = strcspn(drr->drr_u.drr_begin.drr_toname,
- "/@");
+ struct drr_begin *drrb = &drr->drr_u.drr_begin;
+ int i;
+
+ if (flags.istail) {
+ cp = strrchr(drrb->drr_toname, '/');
+ if (cp == NULL) {
+ (void) strlcat(tofs, "/",
+ ZFS_MAXNAMELEN);
+ i = 0;
+ } else {
+ i = (cp - drrb->drr_toname);
+ }
+ } else {
+ i = strcspn(drrb->drr_toname, "/@");
+ }
/* zfs_receive_one() will create_parents() */
- (void) strlcat(tofs,
- &drr->drr_u.drr_begin.drr_toname[i],
+ (void) strlcat(tofs, &drrb->drr_toname[i],
ZFS_MAXNAMELEN);
*strchr(tofs, '@') = '\0';
}
- softerr = recv_incremental_replication(hdl, tofs,
- flags, stream_nv, stream_avl);
+
+ if (recursive && !flags.dryrun && !flags.nomount) {
+ VERIFY(0 == nvlist_alloc(&renamed,
+ NV_UNIQUE_NAME, 0));
+ }
+
+ softerr = recv_incremental_replication(hdl, tofs, flags,
+ stream_nv, stream_avl, renamed);
+
+ /* Unmount renamed filesystems before receiving. */
+ while ((pair = nvlist_next_nvpair(renamed,
+ pair)) != NULL) {
+ zfs_handle_t *zhp;
+ prop_changelist_t *clp = NULL;
+
+ zhp = zfs_open(hdl, nvpair_name(pair),
+ ZFS_TYPE_FILESYSTEM);
+ if (zhp != NULL) {
+ clp = changelist_gather(zhp,
+ ZFS_PROP_MOUNTPOINT, 0, 0);
+ zfs_close(zhp);
+ if (clp != NULL) {
+ softerr |=
+ changelist_prefix(clp);
+ changelist_free(clp);
+ }
+ }
+ }
+
+ nvlist_free(renamed);
}
}
+ /*
+ * Get the fs specified by the first path in the stream (the top level
+ * specified by 'zfs send') and pass it to each invocation of
+ * zfs_receive_one().
+ */
+ (void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
+ ZFS_MAXNAMELEN);
+ if ((cp = strchr(sendfs, '@')) != NULL)
+ *cp = '\0';
/* Finally, receive each contained stream */
do {
@@ -1475,7 +2209,8 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
* recv_skip() and return 0).
*/
error = zfs_receive_impl(hdl, destname, flags, fd,
- stream_avl, top_zfs);
+ sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
+ action_handlep);
if (error == ENODATA) {
error = 0;
break;
@@ -1489,7 +2224,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
* renames again.
*/
softerr = recv_incremental_replication(hdl, tofs, flags,
- stream_nv, stream_avl);
+ stream_nv, stream_avl, NULL);
}
out:
@@ -1503,11 +2238,28 @@ out:
return (error);
}
+static void
+trunc_prop_errs(int truncated)
+{
+ ASSERT(truncated != 0);
+
+ if (truncated == 1)
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "1 more property could not be set\n"));
+ else
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "%d more properties could not be set\n"), truncated);
+}
+
static int
recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
{
dmu_replay_record_t *drr;
void *buf = malloc(1<<20);
+ char errbuf[1024];
+
+ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+ "cannot receive:"));
/* XXX would be great to use lseek if possible... */
drr = buf;
@@ -1520,7 +2272,11 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
switch (drr->drr_type) {
case DRR_BEGIN:
/* NB: not to be used on v2 stream packages */
- assert(drr->drr_payloadlen == 0);
+ if (drr->drr_payloadlen != 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "invalid substream header"));
+ return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+ }
break;
case DRR_END:
@@ -1546,13 +2302,23 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
(void) recv_read(hdl, fd, buf,
drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
break;
-
+ case DRR_SPILL:
+ if (byteswap) {
+ drr->drr_u.drr_write.drr_length =
+ BSWAP_64(drr->drr_u.drr_spill.drr_length);
+ }
+ (void) recv_read(hdl, fd, buf,
+ drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
+ break;
+ case DRR_WRITE_BYREF:
case DRR_FREEOBJECTS:
case DRR_FREE:
break;
default:
- assert(!"invalid record type");
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "invalid record type"));
+ return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
}
}
@@ -1566,27 +2332,34 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
static int
zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
recvflags_t flags, dmu_replay_record_t *drr,
- dmu_replay_record_t *drr_noswap, avl_tree_t *stream_avl,
- char **top_zfs)
+ dmu_replay_record_t *drr_noswap, const char *sendfs,
+ nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
+ uint64_t *action_handlep)
{
zfs_cmd_t zc = { 0 };
time_t begin_time;
- int ioctl_err, ioctl_errno, err, choplen;
+ int ioctl_err, ioctl_errno, err;
char *cp;
struct drr_begin *drrb = &drr->drr_u.drr_begin;
char errbuf[1024];
- char chopprefix[ZFS_MAXNAMELEN];
+ char prop_errbuf[1024];
+ const char *chopprefix;
boolean_t newfs = B_FALSE;
boolean_t stream_wantsnewfs;
uint64_t parent_snapguid = 0;
prop_changelist_t *clp = NULL;
nvlist_t *snapprops_nvlist = NULL;
+ zprop_errflags_t prop_errflags;
+ boolean_t recursive;
begin_time = time(NULL);
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot receive"));
+ recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
+ ENOENT);
+
if (stream_avl != NULL) {
char *snapname;
nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
@@ -1617,6 +2390,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
return (-1);
}
+ cp = NULL;
+
/*
* Determine how much of the snapshot name stored in the stream
* we are going to tack on to the name they specified on the
@@ -1625,38 +2400,77 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
* If they specified a snapshot, chop the entire name stored in
* the stream.
*/
- (void) strcpy(chopprefix, drrb->drr_toname);
- if (flags.isprefix) {
+ if (flags.istail) {
+ /*
+ * A filesystem was specified with -e. We want to tack on only
+ * the tail of the sent snapshot path.
+ */
+ if (strchr(tosnap, '@')) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+ "argument - snapshot not allowed with -e"));
+ return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+ }
+
+ chopprefix = strrchr(sendfs, '/');
+
+ if (chopprefix == NULL) {
+ /*
+ * The tail is the poolname, so we need to
+ * prepend a path separator.
+ */
+ int len = strlen(drrb->drr_toname);
+ cp = malloc(len + 2);
+ cp[0] = '/';
+ (void) strcpy(&cp[1], drrb->drr_toname);
+ chopprefix = cp;
+ } else {
+ chopprefix = drrb->drr_toname + (chopprefix - sendfs);
+ }
+ } else if (flags.isprefix) {
/*
- * They specified a fs with -d, we want to tack on
- * everything but the pool name stored in the stream
+ * A filesystem was specified with -d. We want to tack on
+ * everything but the first element of the sent snapshot path
+ * (all but the pool name).
*/
if (strchr(tosnap, '@')) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
"argument - snapshot not allowed with -d"));
return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
}
- cp = strchr(chopprefix, '/');
- if (cp == NULL)
- cp = strchr(chopprefix, '@');
- *cp = '\0';
+
+ chopprefix = strchr(drrb->drr_toname, '/');
+ if (chopprefix == NULL)
+ chopprefix = strchr(drrb->drr_toname, '@');
} else if (strchr(tosnap, '@') == NULL) {
/*
- * If they specified a filesystem without -d, we want to
- * tack on everything after the fs specified in the
- * first name from the stream.
+ * If a filesystem was specified without -d or -e, we want to
+ * tack on everything after the fs specified by 'zfs send'.
*/
- cp = strchr(chopprefix, '@');
- *cp = '\0';
+ chopprefix = drrb->drr_toname + strlen(sendfs);
+ } else {
+ /* A snapshot was specified as an exact path (no -d or -e). */
+ if (recursive) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "cannot specify snapshot name for multi-snapshot "
+ "stream"));
+ return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+ }
+ chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
}
- choplen = strlen(chopprefix);
+
+ ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
+ ASSERT(chopprefix > drrb->drr_toname);
+ ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
+ ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
+ chopprefix[0] == '\0');
/*
* Determine name of destination snapshot, store in zc_value.
*/
+ (void) strcpy(zc.zc_top_ds, tosnap);
(void) strcpy(zc.zc_value, tosnap);
- (void) strncat(zc.zc_value, drrb->drr_toname+choplen,
- sizeof (zc.zc_value));
+ (void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
+ free(cp);
if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
zcmd_free_nvlists(&zc);
return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
@@ -1714,7 +2528,14 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
(void) strcpy(zc.zc_name, zc.zc_value);
*strchr(zc.zc_name, '@') = '\0';
- if (!zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
+ /*
+ * If the exact receive path was specified and this is the
+ * topmost path in the stream, then if the fs does not exist we
+ * should look no further.
+ */
+ if ((flags.isprefix || (*(chopprefix = drrb->drr_toname +
+ strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
+ !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
char snap[ZFS_MAXNAMELEN];
(void) strcpy(snap, strchr(zc.zc_value, '@'));
if (guid_to_name(hdl, tosnap, drrb->drr_fromguid,
@@ -1730,6 +2551,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
zfs_handle_t *zhp;
+
/*
* Destination fs exists. Therefore this should either
* be an incremental, or the stream specifies a new fs
@@ -1737,7 +2559,6 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
* away (and have therefore specified -F and removed any
* snapshots).
*/
-
if (stream_wantsnewfs) {
if (!flags.force) {
zcmd_free_nvlists(&zc);
@@ -1780,21 +2601,17 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
/* We can't do online recv in this case */
clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
if (clp == NULL) {
+ zfs_close(zhp);
zcmd_free_nvlists(&zc);
return (-1);
}
if (changelist_prefix(clp) != 0) {
changelist_free(clp);
+ zfs_close(zhp);
zcmd_free_nvlists(&zc);
return (-1);
}
}
- if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME &&
- zvol_remove_link(hdl, zhp->zfs_name) != 0) {
- zfs_close(zhp);
- zcmd_free_nvlists(&zc);
- return (-1);
- }
zfs_close(zhp);
} else {
/*
@@ -1818,7 +2635,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
*/
*cp = '\0';
- if (flags.isprefix && !flags.dryrun &&
+ if (flags.isprefix && !flags.istail && !flags.dryrun &&
create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
zcmd_free_nvlists(&zc);
return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
@@ -1843,21 +2660,61 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
return (recv_skip(hdl, infd, flags.byteswap));
}
+ zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf;
+ zc.zc_nvlist_dst_size = sizeof (prop_errbuf);
+ zc.zc_cleanup_fd = cleanup_fd;
+ zc.zc_action_handle = *action_handlep;
+
err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
ioctl_errno = errno;
+ prop_errflags = (zprop_errflags_t)zc.zc_obj;
+
+ if (err == 0) {
+ nvlist_t *prop_errors;
+ VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
+ zc.zc_nvlist_dst_size, &prop_errors, 0));
+
+ nvpair_t *prop_err = NULL;
+
+ while ((prop_err = nvlist_next_nvpair(prop_errors,
+ prop_err)) != NULL) {
+ char tbuf[1024];
+ zfs_prop_t prop;
+ int intval;
+
+ prop = zfs_name_to_prop(nvpair_name(prop_err));
+ (void) nvpair_value_int32(prop_err, &intval);
+ if (strcmp(nvpair_name(prop_err),
+ ZPROP_N_MORE_ERRORS) == 0) {
+ trunc_prop_errs(intval);
+ break;
+ } else {
+ (void) snprintf(tbuf, sizeof (tbuf),
+ dgettext(TEXT_DOMAIN,
+ "cannot receive %s property on %s"),
+ nvpair_name(prop_err), zc.zc_name);
+ zfs_setprop_error(hdl, prop, intval, tbuf);
+ }
+ }
+ nvlist_free(prop_errors);
+ }
+
+ zc.zc_nvlist_dst = 0;
+ zc.zc_nvlist_dst_size = 0;
zcmd_free_nvlists(&zc);
if (err == 0 && snapprops_nvlist) {
zfs_cmd_t zc2 = { 0 };
(void) strcpy(zc2.zc_name, zc.zc_value);
+ zc2.zc_cookie = B_TRUE; /* received */
if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
zcmd_free_nvlists(&zc2);
}
}
- if (err && (ioctl_errno == ENOENT || ioctl_errno == ENODEV)) {
+ if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
/*
* It may be that this snapshot already exists,
* in which case we want to consume & ignore it
@@ -1865,7 +2722,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
*/
avl_tree_t *local_avl;
nvlist_t *local_nv, *fs;
- char *cp = strchr(zc.zc_value, '@');
+ cp = strchr(zc.zc_value, '@');
/*
* XXX Do this faster by just iterating over snaps in
@@ -1873,7 +2730,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
* get a strange "does not exist" error message.
*/
*cp = '\0';
- if (gather_nvlist(hdl, zc.zc_value, NULL, NULL,
+ if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE,
&local_nv, &local_avl) == 0) {
*cp = '@';
fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
@@ -1885,14 +2742,13 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
(void) printf("snap %s already exists; "
"ignoring\n", zc.zc_value);
}
- ioctl_err = recv_skip(hdl, infd,
+ err = ioctl_err = recv_skip(hdl, infd,
flags.byteswap);
}
}
*cp = '@';
}
-
if (ioctl_err != 0) {
switch (ioctl_errno) {
case ENODEV:
@@ -1931,17 +2787,25 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
"invalid stream (checksum mismatch)"));
(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
break;
+ case ENOTSUP:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "pool must be upgraded to receive this stream."));
+ (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
+ break;
+ case EDQUOT:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "destination %s space quota exceeded"), zc.zc_name);
+ (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
+ break;
default:
(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
}
}
/*
- * Mount or recreate the /dev links for the target filesystem
- * (if created, or if we tore them down to do an incremental
- * restore), and the /dev links for the new snapshot (if
- * created). Also mount any children of the target filesystem
- * if we did an incremental receive.
+ * Mount the target filesystem (if created). Also mount any
+ * children of the target filesystem if we did a replication
+ * receive (indicated by stream_avl being non-NULL).
*/
cp = strchr(zc.zc_value, '@');
if (cp && (ioctl_err == 0 || !newfs)) {
@@ -1953,11 +2817,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
if (h != NULL) {
if (h->zfs_type == ZFS_TYPE_VOLUME) {
*cp = '@';
- err = zvol_create_link(hdl, h->zfs_name);
- if (err == 0 && ioctl_err == 0)
- err = zvol_create_link(hdl,
- zc.zc_value);
- } else if (newfs) {
+ } else if (newfs || stream_avl) {
/*
* Track the first/top of hierarchy fs,
* for mounting and sharing later.
@@ -1975,9 +2835,24 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
changelist_free(clp);
}
+ if (prop_errflags & ZPROP_ERR_NOCLEAR) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
+ "failed to clear unreceived properties on %s"),
+ zc.zc_name);
+ (void) fprintf(stderr, "\n");
+ }
+ if (prop_errflags & ZPROP_ERR_NORESTORE) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
+ "failed to restore original properties on %s"),
+ zc.zc_name);
+ (void) fprintf(stderr, "\n");
+ }
+
if (err || ioctl_err)
return (-1);
+ *action_handlep = zc.zc_action_handle;
+
if (flags.verbose) {
char buf1[64];
char buf2[64];
@@ -1997,13 +2872,16 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
static int
zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
- int infd, avl_tree_t *stream_avl, char **top_zfs)
+ int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl,
+ char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
{
int err;
dmu_replay_record_t drr, drr_noswap;
struct drr_begin *drrb = &drr.drr_u.drr_begin;
char errbuf[1024];
zio_cksum_t zcksum = { 0 };
+ uint64_t featureflags;
+ int hdrtype;
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot receive"));
@@ -2041,7 +2919,7 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
drr.drr_type = BSWAP_32(drr.drr_type);
drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
drrb->drr_magic = BSWAP_64(drrb->drr_magic);
- drrb->drr_version = BSWAP_64(drrb->drr_version);
+ drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
drrb->drr_type = BSWAP_32(drrb->drr_type);
drrb->drr_flags = BSWAP_32(drrb->drr_flags);
@@ -2055,23 +2933,45 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
}
+ featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
+ hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
+
+ if (!DMU_STREAM_SUPPORTED(featureflags) ||
+ (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "stream has unsupported feature, feature flags = %lx"),
+ featureflags);
+ return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+ }
+
if (strchr(drrb->drr_toname, '@') == NULL) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
"stream (bad snapshot name)"));
return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
}
- if (drrb->drr_version == DMU_BACKUP_STREAM_VERSION) {
+ if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
+ char nonpackage_sendfs[ZFS_MAXNAMELEN];
+ if (sendfs == NULL) {
+ /*
+ * We were not called from zfs_receive_package(). Get
+ * the fs specified by 'zfs send'.
+ */
+ char *cp;
+ (void) strlcpy(nonpackage_sendfs,
+ drr.drr_u.drr_begin.drr_toname, ZFS_MAXNAMELEN);
+ if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
+ *cp = '\0';
+ sendfs = nonpackage_sendfs;
+ }
return (zfs_receive_one(hdl, infd, tosnap, flags,
- &drr, &drr_noswap, stream_avl, top_zfs));
- } else if (drrb->drr_version == DMU_BACKUP_HEADER_VERSION) {
- return (zfs_receive_package(hdl, infd, tosnap, flags,
- &drr, &zcksum, top_zfs));
+ &drr, &drr_noswap, sendfs, stream_nv, stream_avl,
+ top_zfs, cleanup_fd, action_handlep));
} else {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "stream is unsupported version %llu"),
- drrb->drr_version);
- return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+ assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
+ DMU_COMPOUNDSTREAM);
+ return (zfs_receive_package(hdl, infd, tosnap, flags,
+ &drr, &zcksum, top_zfs, cleanup_fd, action_handlep));
}
}
@@ -2087,8 +2987,16 @@ zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
{
char *top_zfs = NULL;
int err;
+ int cleanup_fd;
+ uint64_t action_handle = 0;
+
+ cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
+ VERIFY(cleanup_fd >= 0);
+
+ err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL,
+ stream_avl, &top_zfs, cleanup_fd, &action_handle);
- err = zfs_receive_impl(hdl, tosnap, flags, infd, stream_avl, &top_zfs);
+ VERIFY(0 == close(cleanup_fd));
if (err == 0 && !flags.nomount && top_zfs) {
zfs_handle_t *zhp;
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c
index c7eb04e..24725ec 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -104,6 +103,13 @@ vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs)
return (state == VDEV_STATE_OFFLINE);
}
+/* ARGSUSED */
+static int
+vdev_removed(uint64_t state, uint64_t aux, uint64_t errs)
+{
+ return (state == VDEV_STATE_REMOVED);
+}
+
/*
* Detect if any leaf devices that have seen errors or could not be opened.
*/
@@ -131,7 +137,7 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
if (find_vdev_problem(child[c], func))
return (B_TRUE);
} else {
- verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
+ verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &c) == 0);
if (func(vs->vs_state, vs->vs_aux,
@@ -166,7 +172,8 @@ check_status(nvlist_t *config, boolean_t isimport)
{
nvlist_t *nvroot;
vdev_stat_t *vs;
- uint_t vsc;
+ pool_scan_stat_t *ps = NULL;
+ uint_t vsc, psc;
uint64_t nerr;
uint64_t version;
uint64_t stateval;
@@ -177,15 +184,24 @@ check_status(nvlist_t *config, boolean_t isimport)
&version) == 0);
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0);
- verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
+ verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &vsc) == 0);
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
&stateval) == 0);
- (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
+
+ /*
+ * Currently resilvering a vdev
+ */
+ (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS,
+ (uint64_t **)&ps, &psc);
+ if (ps && ps->pss_func == POOL_SCAN_RESILVER &&
+ ps->pss_state == DSS_SCANNING)
+ return (ZPOOL_STATUS_RESILVERING);
/*
* Pool last accessed by another system.
*/
+ (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
if (hostid != 0 && (unsigned long)hostid != gethostid() &&
stateval == POOL_STATE_ACTIVE)
return (ZPOOL_STATUS_HOSTID_MISMATCH);
@@ -276,10 +292,10 @@ check_status(nvlist_t *config, boolean_t isimport)
return (ZPOOL_STATUS_OFFLINE_DEV);
/*
- * Currently resilvering
+ * Removed device
*/
- if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER)
- return (ZPOOL_STATUS_RESILVERING);
+ if (find_vdev_problem(nvroot, vdev_removed))
+ return (ZPOOL_STATUS_REMOVED_DEV);
/*
* Outdated, but usable, version
@@ -315,3 +331,68 @@ zpool_import_status(nvlist_t *config, char **msgid)
return (ret);
}
+
+static void
+dump_ddt_stat(const ddt_stat_t *dds, int h)
+{
+ char refcnt[6];
+ char blocks[6], lsize[6], psize[6], dsize[6];
+ char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6];
+
+ if (dds == NULL || dds->dds_blocks == 0)
+ return;
+
+ if (h == -1)
+ (void) strcpy(refcnt, "Total");
+ else
+ zfs_nicenum(1ULL << h, refcnt, sizeof (refcnt));
+
+ zfs_nicenum(dds->dds_blocks, blocks, sizeof (blocks));
+ zfs_nicenum(dds->dds_lsize, lsize, sizeof (lsize));
+ zfs_nicenum(dds->dds_psize, psize, sizeof (psize));
+ zfs_nicenum(dds->dds_dsize, dsize, sizeof (dsize));
+ zfs_nicenum(dds->dds_ref_blocks, ref_blocks, sizeof (ref_blocks));
+ zfs_nicenum(dds->dds_ref_lsize, ref_lsize, sizeof (ref_lsize));
+ zfs_nicenum(dds->dds_ref_psize, ref_psize, sizeof (ref_psize));
+ zfs_nicenum(dds->dds_ref_dsize, ref_dsize, sizeof (ref_dsize));
+
+ (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
+ refcnt,
+ blocks, lsize, psize, dsize,
+ ref_blocks, ref_lsize, ref_psize, ref_dsize);
+}
+
+/*
+ * Print the DDT histogram and the column totals.
+ */
+void
+zpool_dump_ddt(const ddt_stat_t *dds_total, const ddt_histogram_t *ddh)
+{
+ int h;
+
+ (void) printf("\n");
+
+ (void) printf("bucket "
+ " allocated "
+ " referenced \n");
+ (void) printf("______ "
+ "______________________________ "
+ "______________________________\n");
+
+ (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
+ "refcnt",
+ "blocks", "LSIZE", "PSIZE", "DSIZE",
+ "blocks", "LSIZE", "PSIZE", "DSIZE");
+
+ (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
+ "------",
+ "------", "-----", "-----", "-----",
+ "------", "-----", "-----", "-----");
+
+ for (h = 0; h < 64; h++)
+ dump_ddt_stat(&ddh->ddh_stat[h], h);
+
+ dump_ddt_stat(dds_total, -1);
+
+ (void) printf("\n");
+}
diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
index ddd8374..01738fb 100644
--- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
+++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c
@@ -19,14 +19,18 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Internal utility routines for the ZFS library.
*/
+#include <sys/param.h>
+#include <sys/linker.h>
+#include <sys/module.h>
+#include <sys/stat.h>
+
#include <errno.h>
#include <fcntl.h>
#include <libintl.h>
@@ -46,6 +50,8 @@
#include "libzfs_impl.h"
#include "zfs_prop.h"
+int aok;
+
int
libzfs_errno(libzfs_handle_t *hdl)
{
@@ -70,7 +76,7 @@ libzfs_error_description(libzfs_handle_t *hdl)
case EZFS_BADPROP:
return (dgettext(TEXT_DOMAIN, "invalid property value"));
case EZFS_PROPREADONLY:
- return (dgettext(TEXT_DOMAIN, "read only property"));
+ return (dgettext(TEXT_DOMAIN, "read-only property"));
case EZFS_PROPTYPE:
return (dgettext(TEXT_DOMAIN, "property doesn't apply to "
"datasets of this type"));
@@ -90,12 +96,10 @@ libzfs_error_description(libzfs_handle_t *hdl)
case EZFS_BADSTREAM:
return (dgettext(TEXT_DOMAIN, "invalid backup stream"));
case EZFS_DSREADONLY:
- return (dgettext(TEXT_DOMAIN, "dataset is read only"));
+ return (dgettext(TEXT_DOMAIN, "dataset is read-only"));
case EZFS_VOLTOOBIG:
return (dgettext(TEXT_DOMAIN, "volume size exceeds limit for "
"this system"));
- case EZFS_VOLHASDATA:
- return (dgettext(TEXT_DOMAIN, "volume has data"));
case EZFS_INVALIDNAME:
return (dgettext(TEXT_DOMAIN, "invalid name"));
case EZFS_BADRESTORE:
@@ -138,16 +142,12 @@ libzfs_error_description(libzfs_handle_t *hdl)
return (dgettext(TEXT_DOMAIN, "smb remove share failed"));
case EZFS_SHARESMBFAILED:
return (dgettext(TEXT_DOMAIN, "smb add share failed"));
- case EZFS_ISCSISVCUNAVAIL:
- return (dgettext(TEXT_DOMAIN,
- "iscsitgt service need to be enabled by "
- "a privileged user"));
- case EZFS_DEVLINKS:
- return (dgettext(TEXT_DOMAIN, "failed to create /dev links"));
case EZFS_PERM:
return (dgettext(TEXT_DOMAIN, "permission denied"));
case EZFS_NOSPC:
return (dgettext(TEXT_DOMAIN, "out of space"));
+ case EZFS_FAULT:
+ return (dgettext(TEXT_DOMAIN, "bad address"));
case EZFS_IO:
return (dgettext(TEXT_DOMAIN, "I/O error"));
case EZFS_INTR:
@@ -161,12 +161,6 @@ libzfs_error_description(libzfs_handle_t *hdl)
return (dgettext(TEXT_DOMAIN, "recursive dataset dependency"));
case EZFS_NOHISTORY:
return (dgettext(TEXT_DOMAIN, "no history available"));
- case EZFS_UNSHAREISCSIFAILED:
- return (dgettext(TEXT_DOMAIN,
- "iscsitgtd failed request to unshare"));
- case EZFS_SHAREISCSIFAILED:
- return (dgettext(TEXT_DOMAIN,
- "iscsitgtd failed request to share"));
case EZFS_POOLPROPS:
return (dgettext(TEXT_DOMAIN, "failed to retrieve "
"pool properties"));
@@ -194,9 +188,6 @@ libzfs_error_description(libzfs_handle_t *hdl)
case EZFS_NODELEGATION:
return (dgettext(TEXT_DOMAIN, "delegated administration is "
"disabled on pool"));
- case EZFS_PERMRDONLY:
- return (dgettext(TEXT_DOMAIN, "snapshot permissions cannot be"
- " modified"));
case EZFS_BADCACHE:
return (dgettext(TEXT_DOMAIN, "invalid or missing cache file"));
case EZFS_ISL2CACHE:
@@ -213,6 +204,31 @@ libzfs_error_description(libzfs_handle_t *hdl)
case EZFS_UNPLAYED_LOGS:
return (dgettext(TEXT_DOMAIN, "log device has unplayed intent "
"logs"));
+ case EZFS_REFTAG_RELE:
+ return (dgettext(TEXT_DOMAIN, "no such tag on this dataset"));
+ case EZFS_REFTAG_HOLD:
+ return (dgettext(TEXT_DOMAIN, "tag already exists on this "
+ "dataset"));
+ case EZFS_TAGTOOLONG:
+ return (dgettext(TEXT_DOMAIN, "tag too long"));
+ case EZFS_PIPEFAILED:
+ return (dgettext(TEXT_DOMAIN, "pipe create failed"));
+ case EZFS_THREADCREATEFAILED:
+ return (dgettext(TEXT_DOMAIN, "thread create failed"));
+ case EZFS_POSTSPLIT_ONLINE:
+ return (dgettext(TEXT_DOMAIN, "disk was split from this pool "
+ "into a new one"));
+ case EZFS_SCRUBBING:
+ return (dgettext(TEXT_DOMAIN, "currently scrubbing; "
+ "use 'zpool scrub -s' to cancel current scrub"));
+ case EZFS_NO_SCRUB:
+ return (dgettext(TEXT_DOMAIN, "there is no active scrub"));
+ case EZFS_DIFF:
+ return (dgettext(TEXT_DOMAIN, "unable to generate diffs"));
+ case EZFS_DIFFDATA:
+ return (dgettext(TEXT_DOMAIN, "invalid diff data"));
+ case EZFS_POOLREADONLY:
+ return (dgettext(TEXT_DOMAIN, "pool is read-only"));
case EZFS_UNKNOWN:
return (dgettext(TEXT_DOMAIN, "unknown error"));
default:
@@ -301,6 +317,10 @@ zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt,
zfs_verror(hdl, EZFS_IO, fmt, ap);
return (-1);
+ case EFAULT:
+ zfs_verror(hdl, EZFS_FAULT, fmt, ap);
+ return (-1);
+
case EINTR:
zfs_verror(hdl, EZFS_INTR, fmt, ap);
return (-1);
@@ -357,9 +377,7 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
zfs_verror(hdl, EZFS_BUSY, fmt, ap);
break;
case EROFS:
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "snapshot permissions cannot be modified"));
- zfs_verror(hdl, EZFS_PERMRDONLY, fmt, ap);
+ zfs_verror(hdl, EZFS_POOLREADONLY, fmt, ap);
break;
case ENAMETOOLONG:
zfs_verror(hdl, EZFS_NAMETOOLONG, fmt, ap);
@@ -373,7 +391,7 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap);
break;
default:
- zfs_error_aux(hdl, strerror(errno));
+ zfs_error_aux(hdl, strerror(error));
zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
break;
}
@@ -445,12 +463,17 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
case EDQUOT:
zfs_verror(hdl, EZFS_NOSPC, fmt, ap);
return (-1);
+
case EAGAIN:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool I/O is currently suspended"));
zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap);
break;
+ case EROFS:
+ zfs_verror(hdl, EZFS_POOLREADONLY, fmt, ap);
+ break;
+
default:
zfs_error_aux(hdl, strerror(error));
zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
@@ -484,6 +507,29 @@ zfs_alloc(libzfs_handle_t *hdl, size_t size)
}
/*
+ * A safe form of asprintf() which will die if the allocation fails.
+ */
+/*PRINTFLIKE2*/
+char *
+zfs_asprintf(libzfs_handle_t *hdl, const char *fmt, ...)
+{
+ va_list ap;
+ char *ret;
+ int err;
+
+ va_start(ap, fmt);
+
+ err = vasprintf(&ret, fmt, ap);
+
+ va_end(ap);
+
+ if (err < 0)
+ (void) no_memory(hdl);
+
+ return (ret);
+}
+
+/*
* A safe form of realloc(), which also zeroes newly allocated space.
*/
void *
@@ -573,7 +619,7 @@ libzfs_load(void)
/* Not present in kernel, try loading it. */
if (kldload("zfs") < 0 || modfind("zfs") < 0) {
if (errno != EEXIST)
- return (error);
+ return (-1);
}
}
return (0);
@@ -584,17 +630,18 @@ libzfs_init(void)
{
libzfs_handle_t *hdl;
- if ((hdl = calloc(sizeof (libzfs_handle_t), 1)) == NULL) {
+ if ((hdl = calloc(1, sizeof (libzfs_handle_t))) == NULL) {
+ return (NULL);
+ }
+
+ if (libzfs_load() < 0) {
+ free(hdl);
return (NULL);
}
if ((hdl->libzfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
- if (libzfs_load() == 0)
- hdl->libzfs_fd = open(ZFS_DEV, O_RDWR);
- if (hdl->libzfs_fd < 0) {
- free(hdl);
- return (NULL);
- }
+ free(hdl);
+ return (NULL);
}
if ((hdl->libzfs_mnttab = fopen(MNTTAB, "r")) == NULL) {
@@ -624,6 +671,9 @@ libzfs_fini(libzfs_handle_t *hdl)
if (hdl->libzfs_log_str)
(void) free(hdl->libzfs_log_str);
zpool_free_handles(hdl);
+#ifdef sun
+ libzfs_fru_clear(hdl, B_TRUE);
+#endif
namespace_clear(hdl);
libzfs_mnttab_fini(hdl);
free(hdl);
@@ -656,7 +706,9 @@ zfs_get_pool_handle(const zfs_handle_t *zhp)
zfs_handle_t *
zfs_path_to_zhandle(libzfs_handle_t *hdl, char *path, zfs_type_t argtype)
{
- struct statfs statbuf;
+ struct stat64 statbuf;
+ struct extmnttab entry;
+ int ret;
if (path[0] != '/' && strncmp(path, "./", strlen("./")) != 0) {
/*
@@ -665,18 +717,42 @@ zfs_path_to_zhandle(libzfs_handle_t *hdl, char *path, zfs_type_t argtype)
return (zfs_open(hdl, path, argtype));
}
- if (statfs(path, &statbuf) != 0) {
+ if (stat64(path, &statbuf) != 0) {
(void) fprintf(stderr, "%s: %s\n", path, strerror(errno));
return (NULL);
}
- if (strcmp(statbuf.f_fstypename, MNTTYPE_ZFS) != 0) {
+#ifdef sun
+ rewind(hdl->libzfs_mnttab);
+ while ((ret = getextmntent(hdl->libzfs_mnttab, &entry, 0)) == 0) {
+ if (makedevice(entry.mnt_major, entry.mnt_minor) ==
+ statbuf.st_dev) {
+ break;
+ }
+ }
+#else
+ {
+ struct statfs sfs;
+
+ if (statfs(path, &sfs) != 0) {
+ (void) fprintf(stderr, "%s: %s\n", path,
+ strerror(errno));
+ ret = -1;
+ }
+ statfs2mnttab(&sfs, &entry);
+ }
+#endif /* sun */
+ if (ret != 0) {
+ return (NULL);
+ }
+
+ if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) {
(void) fprintf(stderr, gettext("'%s': not a ZFS filesystem\n"),
path);
return (NULL);
}
- return (zfs_open(hdl, statbuf.f_mntfromname, ZFS_TYPE_FILESYSTEM));
+ return (zfs_open(hdl, entry.mnt_special, ZFS_TYPE_FILESYSTEM));
}
/*
@@ -687,7 +763,7 @@ int
zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len)
{
if (len == 0)
- len = 2048;
+ len = 16 * 1024;
zc->zc_nvlist_dst_size = len;
if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t)
zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == 0)
@@ -813,6 +889,8 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
"PROPERTY"));
cbp->cb_colwidths[GET_COL_VALUE] = strlen(dgettext(TEXT_DOMAIN,
"VALUE"));
+ cbp->cb_colwidths[GET_COL_RECVD] = strlen(dgettext(TEXT_DOMAIN,
+ "RECEIVED"));
cbp->cb_colwidths[GET_COL_SOURCE] = strlen(dgettext(TEXT_DOMAIN,
"SOURCE"));
@@ -826,7 +904,7 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
* inheriting from the longest name. This is acceptable because in the
* majority of cases 'SOURCE' is the last column displayed, and we don't
* use the width anyway. Note that the 'VALUE' column can be oversized,
- * if the name of the property is much longer the any values we find.
+ * if the name of the property is much longer than any values we find.
*/
for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) {
/*
@@ -857,6 +935,11 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
pl->pl_width > cbp->cb_colwidths[GET_COL_VALUE])
cbp->cb_colwidths[GET_COL_VALUE] = pl->pl_width;
+ /* 'RECEIVED' column. */
+ if (pl != cbp->cb_proplist &&
+ pl->pl_recvd_width > cbp->cb_colwidths[GET_COL_RECVD])
+ cbp->cb_colwidths[GET_COL_RECVD] = pl->pl_recvd_width;
+
/*
* 'NAME' and 'SOURCE' columns
*/
@@ -872,7 +955,7 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
/*
* Now go through and print the headers.
*/
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < ZFS_GET_NCOLS; i++) {
switch (cbp->cb_columns[i]) {
case GET_COL_NAME:
title = dgettext(TEXT_DOMAIN, "NAME");
@@ -883,6 +966,9 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
case GET_COL_VALUE:
title = dgettext(TEXT_DOMAIN, "VALUE");
break;
+ case GET_COL_RECVD:
+ title = dgettext(TEXT_DOMAIN, "RECEIVED");
+ break;
case GET_COL_SOURCE:
title = dgettext(TEXT_DOMAIN, "SOURCE");
break;
@@ -891,7 +977,8 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
}
if (title != NULL) {
- if (i == 3 || cbp->cb_columns[i + 1] == 0)
+ if (i == (ZFS_GET_NCOLS - 1) ||
+ cbp->cb_columns[i + 1] == GET_COL_NONE)
(void) printf("%s", title);
else
(void) printf("%-*s ",
@@ -909,7 +996,7 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
void
zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
const char *propname, const char *value, zprop_source_t sourcetype,
- const char *source)
+ const char *source, const char *recvd_value)
{
int i;
const char *str;
@@ -924,7 +1011,7 @@ zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
if (cbp->cb_first)
zprop_print_headers(cbp, cbp->cb_type);
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < ZFS_GET_NCOLS; i++) {
switch (cbp->cb_columns[i]) {
case GET_COL_NAME:
str = name;
@@ -961,14 +1048,21 @@ zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
"inherited from %s", source);
str = buf;
break;
+ case ZPROP_SRC_RECEIVED:
+ str = "received";
+ break;
}
break;
+ case GET_COL_RECVD:
+ str = (recvd_value == NULL ? "-" : recvd_value);
+ break;
+
default:
continue;
}
- if (cbp->cb_columns[i + 1] == 0)
+ if (cbp->cb_columns[i + 1] == GET_COL_NONE)
(void) printf("%s", str);
else if (cbp->cb_scripted)
(void) printf("%s\t", str);
@@ -976,7 +1070,6 @@ zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
(void) printf("%-*s ",
cbp->cb_colwidths[cbp->cb_columns[i]],
str);
-
}
(void) printf("\n");
@@ -1038,7 +1131,7 @@ zfs_nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num)
return (-1);
}
- /* Rely on stroull() to process the numeric portion. */
+ /* Rely on strtoull() to process the numeric portion. */
errno = 0;
*num = strtoull(value, &end, 10);
diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c b/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
index ca68ca1..2c07787 100644
--- a/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
+++ b/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <assert.h>
@@ -36,20 +35,24 @@
#include <sys/zfs_context.h>
#include <sys/zmod.h>
#include <sys/utsname.h>
+#include <sys/systeminfo.h>
/*
* Emulation of kernel services in userland.
*/
-int hz = 119; /* frequency when using gethrtime() >> 23 for lbolt */
+int aok;
uint64_t physmem;
vnode_t *rootdir = (vnode_t *)0xabcd1234;
-char hw_serial[11];
+char hw_serial[HW_HOSTID_LEN];
struct utsname utsname = {
"userland", "libzpool", "1", "1", "na"
};
+/* this only exists to have its address taken */
+struct proc p0;
+
/*
* =========================================================================
* threads
@@ -137,7 +140,7 @@ mutex_tryenter(kmutex_t *mp)
{
ASSERT(mp->initialized == B_TRUE);
ASSERT(mp->m_owner != (void *)-1UL);
- if (mutex_trylock(&mp->m_lock) == 0) {
+ if (0 == mutex_trylock(&mp->m_lock)) {
ASSERT(mp->m_owner == NULL);
mp->m_owner = curthread;
return (1);
@@ -150,7 +153,7 @@ void
mutex_exit(kmutex_t *mp)
{
ASSERT(mp->initialized == B_TRUE);
- ASSERT(mp->m_owner == curthread);
+ ASSERT(mutex_owner(mp) == curthread);
mp->m_owner = NULL;
VERIFY(mutex_unlock(&mp->m_lock) == 0);
}
@@ -308,9 +311,9 @@ cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
struct timeval tv;
clock_t delta;
- abstime += lbolt;
+ abstime += ddi_get_lbolt();
top:
- delta = abstime - lbolt;
+ delta = abstime - ddi_get_lbolt();
if (delta <= 0)
return (-1);
@@ -432,10 +435,7 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
*vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
vp->v_fd = fd;
- if (S_ISCHR(st.st_mode))
- ioctl(fd, DIOCGMEDIASIZE, &vp->v_size);
- else
- vp->v_size = st.st_size;
+ vp->v_size = st.st_size;
vp->v_path = spa_strdup(path);
return (0);
@@ -497,6 +497,24 @@ vn_close(vnode_t *vp, int openflag, cred_t *cr, kthread_t *td)
umem_free(vp, sizeof (vnode_t));
}
+/*
+ * At a minimum we need to update the size since vdev_reopen()
+ * will no longer call vn_openat().
+ */
+int
+fop_getattr(vnode_t *vp, vattr_t *vap)
+{
+ struct stat64 st;
+
+ if (fstat64(vp->v_fd, &st) == -1) {
+ close(vp->v_fd);
+ return (errno);
+ }
+
+ vap->va_size = st.st_size;
+ return (0);
+}
+
#ifdef ZFS_DEBUG
/*
@@ -811,6 +829,17 @@ ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
return (0);
}
+int
+ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
+{
+ char *end;
+
+ *result = strtoull(str, &end, base);
+ if (*result == 0)
+ return (errno);
+ return (0);
+}
+
/*
* =========================================================================
* kernel emulation setup & teardown
@@ -836,8 +865,8 @@ kernel_init(int mode)
dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
(double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
- snprintf(hw_serial, sizeof (hw_serial), "%lu",
- (unsigned long)gethostid());
+ (void) snprintf(hw_serial, sizeof (hw_serial), "%lu",
+ (mode & FWRITE) ? (unsigned long)gethostid() : 0);
VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
@@ -852,6 +881,8 @@ kernel_fini(void)
{
spa_fini();
+ system_taskq_fini();
+
close(random_fd);
close(urandom_fd);
@@ -942,3 +973,72 @@ ksiddomain_rele(ksiddomain_t *ksid)
spa_strfree(ksid->kd_name);
umem_free(ksid, sizeof (ksiddomain_t));
}
+
+/*
+ * Do not change the length of the returned string; it must be freed
+ * with strfree().
+ */
+char *
+kmem_asprintf(const char *fmt, ...)
+{
+ int size;
+ va_list adx;
+ char *buf;
+
+ va_start(adx, fmt);
+ size = vsnprintf(NULL, 0, fmt, adx) + 1;
+ va_end(adx);
+
+ buf = kmem_alloc(size, KM_SLEEP);
+
+ va_start(adx, fmt);
+ size = vsnprintf(buf, size, fmt, adx);
+ va_end(adx);
+
+ return (buf);
+}
+
+/* ARGSUSED */
+int
+zfs_onexit_fd_hold(int fd, minor_t *minorp)
+{
+ *minorp = 0;
+ return (0);
+}
+
+/* ARGSUSED */
+void
+zfs_onexit_fd_rele(int fd)
+{
+}
+
+/* ARGSUSED */
+int
+zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
+ uint64_t *action_handle)
+{
+ return (0);
+}
+
+/* ARGSUSED */
+int
+zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
+{
+ return (0);
+}
+
+/* ARGSUSED */
+int
+zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
+{
+ return (0);
+}
+
+#ifdef __FreeBSD__
+/* ARGSUSED */
+int
+zvol_create_minors(const char *name)
+{
+ return (0);
+}
+#endif
diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
index 942c836..472cf7b 100644
--- a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
+++ b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_ZFS_CONTEXT_H
@@ -59,6 +58,7 @@ extern "C" {
#include <time.h>
#include <math.h>
#include <umem.h>
+#include <inttypes.h>
#include <fsshare.h>
#include <sys/note.h>
#include <sys/types.h>
@@ -80,7 +80,9 @@ extern "C" {
#include <sys/u8_textprep.h>
#include <sys/kernel.h>
#include <sys/disk.h>
+#include <sys/sysevent.h>
#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/dev.h>
#include <machine/atomic.h>
#define ZFS_EXPORTS_PATH "/etc/zfs/exports"
@@ -119,20 +121,27 @@ extern void vpanic(const char *, __va_list);
#define fm_panic panic
+extern int aok;
+
/* This definition is copied from assert.h. */
#if defined(__STDC__)
#if __STDC_VERSION__ - 0 >= 199901L
-#define verify(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0))
+#define zverify(EX) (void)((EX) || (aok) || \
+ (__assert(#EX, __FILE__, __LINE__), 0))
#else
-#define verify(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0))
+#define zverify(EX) (void)((EX) || (aok) || \
+ (__assert(#EX, __FILE__, __LINE__), 0))
#endif /* __STDC_VERSION__ - 0 >= 199901L */
#else
-#define verify(EX) (void)((EX) || (_assert("EX", __FILE__, __LINE__), 0))
+#define zverify(EX) (void)((EX) || (aok) || \
+ (_assert("EX", __FILE__, __LINE__), 0))
#endif /* __STDC__ */
-#define VERIFY verify
-#define ASSERT assert
+#define VERIFY zverify
+#define ASSERT zverify
+#undef assert
+#define assert zverify
extern void __assert(const char *, const char *, int);
@@ -143,7 +152,7 @@ extern void __assert(const char *, const char *, int);
#define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE) do { \
const TYPE __left = (TYPE)(LEFT); \
const TYPE __right = (TYPE)(RIGHT); \
- if (!(__left OP __right)) { \
+ if (!(__left OP __right) && (!aok)) { \
char *__buf = alloca(256); \
(void) snprintf(__buf, 256, "%s %s %s (0x%llx %s 0x%llx)", \
#LEFT, #OP, #RIGHT, \
@@ -209,6 +218,18 @@ typedef struct kthread kthread_t;
#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \
zk_thread_create(func, arg)
#define thread_exit() thr_exit(NULL)
+#define thread_join(t) panic("libzpool cannot join threads")
+
+#define newproc(f, a, cid, pri, ctp, pid) (ENOSYS)
+
+/* in libzpool, p0 exists only to have its address taken */
+struct proc {
+ uintptr_t this_is_never_used_dont_dereference_it;
+};
+
+extern struct proc p0;
+
+#define PS_NONE -1
extern kthread_t *zk_thread_create(void (*func)(), void *arg);
@@ -225,8 +246,11 @@ typedef struct kmutex {
} kmutex_t;
#define MUTEX_DEFAULT USYNC_THREAD
-#undef MUTEX_HELD
+#undef MUTEX_HELD
+#undef MUTEX_NOT_HELD
#define MUTEX_HELD(m) ((m)->m_owner == curthread)
+#define MUTEX_NOT_HELD(m) (!MUTEX_HELD(m))
+#define _mutex_held(m) pthread_mutex_isowned_np(m)
/*
* Argh -- we have to get cheesy here because the kernel and userland
@@ -234,6 +258,7 @@ typedef struct kmutex {
*/
//extern int _mutex_init(mutex_t *mp, int type, void *arg);
//extern int _mutex_destroy(mutex_t *mp);
+//extern int _mutex_owned(mutex_t *mp);
#define mutex_init(mp, b, c, d) zmutex_init((kmutex_t *)(mp))
#define mutex_destroy(mp) zmutex_destroy((kmutex_t *)(mp))
@@ -305,6 +330,7 @@ extern void cv_broadcast(kcondvar_t *cv);
#define KM_PUSHPAGE KM_SLEEP
#define KM_NOSLEEP UMEM_DEFAULT
#define KMC_NODEBUG UMC_NODEBUG
+#define KMC_NOTOUCH 0 /* not needed for userland caches */
#define kmem_alloc(_s, _f) umem_alloc(_s, _f)
#define kmem_zalloc(_s, _f) umem_zalloc(_s, _f)
#define kmem_free(_b, _s) umem_free(_b, _s)
@@ -315,10 +341,21 @@ extern void cv_broadcast(kcondvar_t *cv);
#define kmem_cache_alloc(_c, _f) umem_cache_alloc(_c, _f)
#define kmem_cache_free(_c, _b) umem_cache_free(_c, _b)
#define kmem_debugging() 0
-#define kmem_cache_reap_now(c)
+#define kmem_cache_reap_now(_c) /* nothing */
+#define kmem_cache_set_move(_c, _cb) /* nothing */
+#define POINTER_INVALIDATE(_pp) /* nothing */
+#define POINTER_IS_VALID(_p) 0
typedef umem_cache_t kmem_cache_t;
+typedef enum kmem_cbrc {
+ KMEM_CBRC_YES,
+ KMEM_CBRC_NO,
+ KMEM_CBRC_LATER,
+ KMEM_CBRC_DONT_NEED,
+ KMEM_CBRC_DONT_KNOW
+} kmem_cbrc_t;
+
/*
* Task queues
*/
@@ -329,23 +366,30 @@ typedef void (task_func_t)(void *);
#define TASKQ_PREPOPULATE 0x0001
#define TASKQ_CPR_SAFE 0x0002 /* Use CPR safe protocol */
#define TASKQ_DYNAMIC 0x0004 /* Use dynamic thread scheduling */
-#define TASKQ_THREADS_CPU_PCT 0x0008 /* Use dynamic thread scheduling */
+#define TASKQ_THREADS_CPU_PCT 0x0008 /* Scale # threads by # cpus */
+#define TASKQ_DC_BATCH 0x0010 /* Mark threads as batch */
#define TQ_SLEEP KM_SLEEP /* Can block for memory */
#define TQ_NOSLEEP KM_NOSLEEP /* cannot block for memory; may fail */
-#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */
+#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */
+#define TQ_FRONT 0x08 /* Queue in front */
extern taskq_t *system_taskq;
extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
+#define taskq_create_proc(a, b, c, d, e, p, f) \
+ (taskq_create(a, b, c, d, e, f))
+#define taskq_create_sysdc(a, b, d, e, p, dc, f) \
+ (taskq_create(a, b, maxclsyspri, d, e, f))
extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
extern void taskq_destroy(taskq_t *);
extern void taskq_wait(taskq_t *);
extern int taskq_member(taskq_t *, void *);
extern void system_taskq_init(void);
+extern void system_taskq_fini(void);
-#define taskq_dispatch_safe(tq, func, arg, task) \
- taskq_dispatch((tq), (func), (arg), TQ_SLEEP)
+#define taskq_dispatch_safe(tq, func, arg, flags, task) \
+ taskq_dispatch((tq), (func), (arg), (flags))
#define XVA_MAPSIZE 3
#define XVA_MAGIC 0x78766174
@@ -359,6 +403,7 @@ typedef struct vnode {
char *v_path;
} vnode_t;
+#define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */
typedef struct xoptattr {
timestruc_t xoa_createtime; /* Create time of file */
@@ -374,6 +419,10 @@ typedef struct xoptattr {
uint8_t xoa_opaque;
uint8_t xoa_av_quarantined;
uint8_t xoa_av_modified;
+ uint8_t xoa_av_scanstamp[AV_SCANSTAMP_SZ];
+ uint8_t xoa_reparse;
+ uint8_t xoa_offline;
+ uint8_t xoa_sparse;
} xoptattr_t;
typedef struct vattr {
@@ -420,13 +469,15 @@ typedef struct vsecattr {
#define CRCREAT 0
+extern int fop_getattr(vnode_t *vp, vattr_t *vap);
+
#define VOP_CLOSE(vp, f, c, o, cr, ct) 0
#define VOP_PUTPAGE(vp, of, sz, fl, cr, ct) 0
-#define VOP_GETATTR(vp, vap, cr) ((vap)->va_size = (vp)->v_size, 0)
+#define VOP_GETATTR(vp, vap, cr) fop_getattr((vp), (vap));
#define VOP_FSYNC(vp, f, cr, ct) fsync((vp)->v_fd)
-#define VN_RELE(vp) vn_close(vp, 0, NULL, NULL)
+#define VN_RELE(vp) vn_close(vp, 0, NULL, NULL)
#define VN_RELE_ASYNC(vp, taskq) vn_close(vp, 0, NULL, NULL)
#define vn_lock(vp, type)
@@ -460,13 +511,18 @@ extern vnode_t *rootdir;
/*
* Random stuff
*/
-#define lbolt (gethrtime() >> 23)
-#define lbolt64 (gethrtime() >> 23)
-//#define hz 119 /* frequency when using gethrtime() >> 23 for lbolt */
+#define ddi_get_lbolt() (gethrtime() >> 23)
+#define ddi_get_lbolt64() (gethrtime() >> 23)
+#define hz 119 /* frequency when using gethrtime() >> 23 for lbolt */
extern void delay(clock_t ticks);
#define gethrestime_sec() time(NULL)
+#define gethrestime(t) \
+ do {\
+ (t)->tv_sec = gethrestime_sec();\
+ (t)->tv_nsec = 0;\
+ } while (0);
#define max_ncpus 64
@@ -475,6 +531,9 @@ extern void delay(clock_t ticks);
#define CPU_SEQID (thr_self() & (max_ncpus - 1))
+#define kcred NULL
+#define CRED() NULL
+
#ifndef ptob
#define ptob(x) ((x) * PAGESIZE)
#endif
@@ -516,14 +575,20 @@ typedef struct callb_cpr {
#define zone_dataset_visible(x, y) (1)
#define INGLOBALZONE(z) (1)
+extern char *kmem_asprintf(const char *fmt, ...);
+#define strfree(str) kmem_free((str), strlen(str)+1)
+
/*
* Hostname information
*/
extern struct utsname utsname;
-extern char hw_serial[];
+extern char hw_serial[]; /* for userland-emulated hostid access */
extern int ddi_strtoul(const char *str, char **nptr, int base,
unsigned long *result);
+extern int ddi_strtoull(const char *str, char **nptr, int base,
+ u_longlong_t *result);
+
/* ZFS Boot Related stuff. */
struct _buf {
@@ -563,7 +628,6 @@ extern zoneid_t getzoneid(void);
#define lbolt (gethrtime() >> 23)
#define lbolt64 (gethrtime() >> 23)
-extern int hz;
extern uint64_t physmem;
#define gethrestime_sec() time(NULL)
@@ -593,6 +657,9 @@ void ksiddomain_rele(ksiddomain_t *);
typedef uint32_t idmap_rid_t;
+#define DDI_SLEEP KM_SLEEP
+#define ddi_log_sysevent(_a, _b, _c, _d, _e, _f, _g) (0)
+
#define SX_SYSINIT(name, lock, desc)
#define SYSCTL_DECL(...)
diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c b/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c
index 1a73fe8..8db5d11 100644
--- a/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c
+++ b/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -49,6 +49,8 @@ struct taskq {
int tq_nalloc;
int tq_minalloc;
int tq_maxalloc;
+ kcondvar_t tq_maxalloc_cv;
+ int tq_maxalloc_wait;
task_t *tq_freelist;
task_t tq_task;
};
@@ -57,26 +59,36 @@ static task_t *
task_alloc(taskq_t *tq, int tqflags)
{
task_t *t;
+ int rv;
- if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
+again: if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
tq->tq_freelist = t->task_next;
} else {
- mutex_exit(&tq->tq_lock);
if (tq->tq_nalloc >= tq->tq_maxalloc) {
- if (!(tqflags & KM_SLEEP)) {
- mutex_enter(&tq->tq_lock);
+ if (!(tqflags & KM_SLEEP))
return (NULL);
- }
+
/*
* We don't want to exceed tq_maxalloc, but we can't
* wait for other tasks to complete (and thus free up
* task structures) without risking deadlock with
* the caller. So, we just delay for one second
- * to throttle the allocation rate.
+ * to throttle the allocation rate. If we have tasks
+ * complete before one second timeout expires then
+ * taskq_ent_free will signal us and we will
+ * immediately retry the allocation.
*/
- delay(hz);
+ tq->tq_maxalloc_wait++;
+ rv = cv_timedwait(&tq->tq_maxalloc_cv,
+ &tq->tq_lock, ddi_get_lbolt() + hz);
+ tq->tq_maxalloc_wait--;
+ if (rv > 0)
+ goto again; /* signaled */
}
+ mutex_exit(&tq->tq_lock);
+
t = kmem_alloc(sizeof (task_t), tqflags);
+
mutex_enter(&tq->tq_lock);
if (t != NULL)
tq->tq_nalloc++;
@@ -96,6 +108,9 @@ task_free(taskq_t *tq, task_t *t)
kmem_free(t, sizeof (task_t));
mutex_enter(&tq->tq_lock);
}
+
+ if (tq->tq_maxalloc_wait)
+ cv_signal(&tq->tq_maxalloc_cv);
}
taskqid_t
@@ -114,8 +129,13 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
mutex_exit(&tq->tq_lock);
return (0);
}
- t->task_next = &tq->tq_task;
- t->task_prev = tq->tq_task.task_prev;
+ if (tqflags & TQ_FRONT) {
+ t->task_next = tq->tq_task.task_next;
+ t->task_prev = &tq->tq_task;
+ } else {
+ t->task_next = &tq->tq_task;
+ t->task_prev = tq->tq_task.task_prev;
+ }
t->task_next->task_prev = t;
t->task_prev->task_next = t;
t->task_func = func;
@@ -191,6 +211,7 @@ taskq_create(const char *name, int nthreads, pri_t pri,
mutex_init(&tq->tq_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL);
cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&tq->tq_maxalloc_cv, NULL, CV_DEFAULT, NULL);
tq->tq_flags = flags | TASKQ_ACTIVE;
tq->tq_active = nthreads;
tq->tq_nthreads = nthreads;
@@ -247,6 +268,7 @@ taskq_destroy(taskq_t *tq)
mutex_destroy(&tq->tq_lock);
cv_destroy(&tq->tq_dispatch_cv);
cv_destroy(&tq->tq_wait_cv);
+ cv_destroy(&tq->tq_maxalloc_cv);
kmem_free(tq, sizeof (taskq_t));
}
@@ -272,3 +294,10 @@ system_taskq_init(void)
system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512,
TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
}
+
+void
+system_taskq_fini(void)
+{
+ taskq_destroy(system_taskq);
+ system_taskq = NULL; /* defensive */
+}
diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/util.c b/cddl/contrib/opensolaris/lib/libzpool/common/util.c
index 781edb6..9b99531 100644
--- a/cddl/contrib/opensolaris/lib/libzpool/common/util.c
+++ b/cddl/contrib/opensolaris/lib/libzpool/common/util.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <assert.h>
@@ -90,7 +89,7 @@ show_vdev_stats(const char *desc, const char *ctype, nvlist_t *nv, int indent)
if (is_log)
prefix = "log ";
- if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
+ if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &c) != 0)
vs = &v0;
diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/__init__.py b/cddl/contrib/opensolaris/lib/pyzfs/common/__init__.py
index f4b0f53..76b0998 100644
--- a/cddl/contrib/opensolaris/lib/pyzfs/common/__init__.py
+++ b/cddl/contrib/opensolaris/lib/pyzfs/common/__init__.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python2.4
+#! /usr/bin/python2.6
#
# CDDL HEADER START
#
@@ -19,8 +19,7 @@
#
# CDDL HEADER END
#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
#
"""
diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/allow.py b/cddl/contrib/opensolaris/lib/pyzfs/common/allow.py
index d3a03c7..fa8209f 100644
--- a/cddl/contrib/opensolaris/lib/pyzfs/common/allow.py
+++ b/cddl/contrib/opensolaris/lib/pyzfs/common/allow.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python2.4
+#! /usr/bin/python2.6
#
# CDDL HEADER START
#
@@ -19,8 +19,7 @@
#
# CDDL HEADER END
#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
#
"""This module implements the "zfs allow" and "zfs unallow" subcommands.
@@ -204,8 +203,8 @@ def args_to_perms(parser, options, who, perms):
perms_subcmd = dict(
create=_("Must also have the 'mount' ability"),
destroy=_("Must also have the 'mount' ability"),
- snapshot=_("Must also have the 'mount' ability"),
- rollback=_("Must also have the 'mount' ability"),
+ snapshot="",
+ rollback="",
clone=_("""Must also have the 'create' ability and 'mount'
\t\t\t\tability in the origin file system"""),
promote=_("""Must also have the 'mount'
@@ -217,6 +216,9 @@ perms_subcmd = dict(
mount=_("Allows mount/umount of ZFS datasets"),
share=_("Allows sharing file systems over NFS or SMB\n\t\t\t\tprotocols"),
send="",
+ hold=_("Allows adding a user hold to a snapshot"),
+ release=_("Allows releasing a user hold which\n\t\t\t\tmight destroy the snapshot"),
+ diff=_("Allows lookup of paths within a dataset,\n\t\t\t\tgiven an object number. Ordinary users need this\n\t\t\t\tin order to use zfs diff"),
)
perms_other = dict(
@@ -265,7 +267,7 @@ def print_perms():
print(fmt % (name, _("property"), ""))
def do_allow():
- """Implementes the "zfs allow" and "zfs unallow" subcommands."""
+ """Implements the "zfs allow" and "zfs unallow" subcommands."""
un = (sys.argv[1] == "unallow")
def usage(msg=None):
@@ -320,7 +322,7 @@ def do_allow():
if sys.argv[2] == "-h":
# hack to make "zfs allow -h" work
usage()
- ds = zfs.dataset.Dataset(sys.argv[2])
+ ds = zfs.dataset.Dataset(sys.argv[2], snaps=False)
p = dict()
for (fs, raw) in ds.get_fsacl().items():
diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/dataset.py b/cddl/contrib/opensolaris/lib/pyzfs/common/dataset.py
index b45173e..26192e4 100644
--- a/cddl/contrib/opensolaris/lib/pyzfs/common/dataset.py
+++ b/cddl/contrib/opensolaris/lib/pyzfs/common/dataset.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python2.4
+#! /usr/bin/python2.6
#
# CDDL HEADER START
#
@@ -19,8 +19,7 @@
#
# CDDL HEADER END
#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
#
"""Implements the Dataset class, providing methods for manipulating ZFS
@@ -109,7 +108,7 @@ class Dataset(object):
types is an iterable of strings specifying which types
of datasets are permitted. Accepted strings are
- "filesystem" and "volume". Defaults to acceptying all
+ "filesystem" and "volume". Defaults to accepting all
types.
snaps is a boolean specifying if snapshots are acceptable.
@@ -203,3 +202,33 @@ class Dataset(object):
Return a dict("whostr": { "perm" -> None })."""
return zfs.ioctl.get_fsacl(self.name)
+
+ def get_holds(self):
+ """Get the user holds on this Dataset.
+
+ Return a dict("tag": timestamp)."""
+
+ return zfs.ioctl.get_holds(self.name)
+
+def snapshots_fromcmdline(dsnames, recursive):
+ for dsname in dsnames:
+ if not "@" in dsname:
+ raise zfs.util.ZFSError(errno.EINVAL,
+ _("cannot open %s") % dsname,
+ _("operation only applies to snapshots"))
+ try:
+ ds = Dataset(dsname)
+ yield ds
+ except zfs.util.ZFSError, e:
+ if not recursive or e.errno != errno.ENOENT:
+ raise
+ if recursive:
+ (base, snapname) = dsname.split('@')
+ parent = Dataset(base)
+ for child in parent.descendents():
+ try:
+ yield Dataset(child.name + "@" +
+ snapname)
+ except zfs.util.ZFSError, e:
+ if e.errno != errno.ENOENT:
+ raise
diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/groupspace.py b/cddl/contrib/opensolaris/lib/pyzfs/common/groupspace.py
index 7db4bf3..9f380fd 100644
--- a/cddl/contrib/opensolaris/lib/pyzfs/common/groupspace.py
+++ b/cddl/contrib/opensolaris/lib/pyzfs/common/groupspace.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python2.4
+#! /usr/bin/python2.6
#
# CDDL HEADER START
#
@@ -19,8 +19,7 @@
#
# CDDL HEADER END
#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
#
import zfs.userspace
diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/holds.py b/cddl/contrib/opensolaris/lib/pyzfs/common/holds.py
new file mode 100644
index 0000000..800e28f
--- /dev/null
+++ b/cddl/contrib/opensolaris/lib/pyzfs/common/holds.py
@@ -0,0 +1,75 @@
+#! /usr/bin/python2.6
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+#
+
+"""This module implements the "zfs holds" subcommand.
+The only public interface is the zfs.holds.do_holds() function."""
+
+import optparse
+import sys
+import errno
+import time
+import zfs.util
+import zfs.dataset
+import zfs.table
+
+_ = zfs.util._
+
+def do_holds():
+ """Implements the "zfs holds" subcommand."""
+ def usage(msg=None):
+ parser.print_help()
+ if msg:
+ print
+ parser.exit("zfs: error: " + msg)
+ else:
+ parser.exit()
+
+ u = _("""holds [-r] <snapshot> ...""")
+
+ parser = optparse.OptionParser(usage=u, prog="zfs")
+
+ parser.add_option("-r", action="store_true", dest="recursive",
+ help=_("list holds recursively"))
+
+ (options, args) = parser.parse_args(sys.argv[2:])
+
+ if len(args) < 1:
+ usage(_("missing snapshot argument"))
+
+ fields = ("name", "tag", "timestamp")
+ rjustfields = ()
+ printing = False
+ gotone = False
+ t = zfs.table.Table(fields, rjustfields)
+ for ds in zfs.dataset.snapshots_fromcmdline(args, options.recursive):
+ gotone = True
+ for tag, tm in ds.get_holds().iteritems():
+ val = {"name": ds.name, "tag": tag,
+ "timestamp": time.ctime(tm)}
+ t.addline(ds.name, val)
+ printing = True
+ if printing:
+ t.printme()
+ elif not gotone:
+ raise zfs.util.ZFSError(errno.ENOENT, _("no matching datasets"))
diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/ioctl.c b/cddl/contrib/opensolaris/lib/pyzfs/common/ioctl.c
index 4571147..d1f82a7 100644
--- a/cddl/contrib/opensolaris/lib/pyzfs/common/ioctl.c
+++ b/cddl/contrib/opensolaris/lib/pyzfs/common/ioctl.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -29,8 +29,6 @@
#include <strings.h>
#include <unistd.h>
#include <libnvpair.h>
-#include <idmap.h>
-#include <zone.h>
#include <libintl.h>
#include <libzfs.h>
#include <libzfs_impl.h>
@@ -45,10 +43,6 @@ static int zfsdevfd;
#define _(s) dgettext(TEXT_DOMAIN, s)
-#ifdef sun
-extern int sid_to_id(char *sid, boolean_t user, uid_t *id);
-#endif /* sun */
-
/*PRINTFLIKE1*/
static void
seterr(char *fmt, ...)
@@ -66,7 +60,7 @@ seterr(char *fmt, ...)
static char cmdstr[HIS_MAX_RECORD_LEN];
static int
-ioctl_with_cmdstr(unsigned long ioc, zfs_cmd_t *zc)
+ioctl_with_cmdstr(int ioc, zfs_cmd_t *zc)
{
int err;
@@ -138,8 +132,7 @@ dict2nvl(PyObject *d)
nvlist_t *nvl;
int err;
PyObject *key, *value;
-// int pos = 0;
- Py_ssize_t pos = 0;
+ int pos = 0;
if (!PyDict_Check(d)) {
PyErr_SetObject(PyExc_ValueError, d);
@@ -205,7 +198,7 @@ add_ds_props(zfs_cmd_t *zc, PyObject *nvl)
/* On error, returns NULL but does not set python exception. */
static PyObject *
-ioctl_with_dstnv(unsigned long ioc, zfs_cmd_t *zc)
+ioctl_with_dstnv(int ioc, zfs_cmd_t *zc)
{
int nvsz = 2048;
void *nvbuf;
@@ -236,7 +229,7 @@ again:
static PyObject *
py_next_dataset(PyObject *self, PyObject *args)
{
- unsigned long ioc;
+ int ioc;
uint64_t cookie;
zfs_cmd_t zc = { 0 };
int snaps;
@@ -353,6 +346,25 @@ py_set_fsacl(PyObject *self, PyObject *args)
}
static PyObject *
+py_get_holds(PyObject *self, PyObject *args)
+{
+ zfs_cmd_t zc = { 0 };
+ char *name;
+ PyObject *nvl;
+
+ if (!PyArg_ParseTuple(args, "s", &name))
+ return (NULL);
+
+ (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+
+ nvl = ioctl_with_dstnv(ZFS_IOC_GET_HOLDS, &zc);
+ if (nvl == NULL)
+ seterr(_("cannot get holds for %s"), name);
+
+ return (nvl);
+}
+
+static PyObject *
py_userspace_many(PyObject *self, PyObject *args)
{
zfs_cmd_t zc = { 0 };
@@ -440,80 +452,6 @@ py_userspace_upgrade(PyObject *self, PyObject *args)
}
static PyObject *
-py_sid_to_id(PyObject *self, PyObject *args)
-{
-#ifdef sun
- char *sid;
- int err, isuser;
- uid_t id;
-
- if (!PyArg_ParseTuple(args, "si", &sid, &isuser))
- return (NULL);
-
- err = sid_to_id(sid, isuser, &id);
- if (err) {
- PyErr_SetString(PyExc_KeyError, sid);
- return (NULL);
- }
-
- return (Py_BuildValue("I", id));
-#else /* sun */
- return (NULL);
-#endif /* sun */
-}
-
-/*
- * Translate the sid string ("S-1-...") to the user@domain name, if
- * possible. There should be a better way to do this, but for now we
- * just translate to the (possibly ephemeral) uid and then back again.
- */
-static PyObject *
-py_sid_to_name(PyObject *self, PyObject *args)
-{
-#ifdef sun
- char *sid;
- int err, isuser;
- uid_t id;
- char *name, *domain;
- char buf[256];
-
- if (!PyArg_ParseTuple(args, "si", &sid, &isuser))
- return (NULL);
-
- err = sid_to_id(sid, isuser, &id);
- if (err) {
- PyErr_SetString(PyExc_KeyError, sid);
- return (NULL);
- }
-
- if (isuser) {
- err = idmap_getwinnamebyuid(id,
- IDMAP_REQ_FLG_USE_CACHE, &name, &domain);
- } else {
- err = idmap_getwinnamebygid(id,
- IDMAP_REQ_FLG_USE_CACHE, &name, &domain);
- }
- if (err != IDMAP_SUCCESS) {
- PyErr_SetString(PyExc_KeyError, sid);
- return (NULL);
- }
- (void) snprintf(buf, sizeof (buf), "%s@%s", name, domain);
- free(name);
- free(domain);
-
- return (Py_BuildValue("s", buf));
-#else /* sun */
- return(NULL);
-#endif /* sun */
-}
-
-static PyObject *
-py_isglobalzone(PyObject *self, PyObject *args)
-{
- return (Py_BuildValue("i", getzoneid() == GLOBAL_ZONEID));
-}
-
-static PyObject *
py_set_cmdstr(PyObject *self, PyObject *args)
{
char *str;
@@ -584,12 +522,7 @@ static PyMethodDef zfsmethods[] = {
"Get dataset properties."},
{"get_proptable", py_get_proptable, METH_NOARGS,
"Get property table."},
- /* Below are not really zfs-specific: */
- {"sid_to_id", py_sid_to_id, METH_VARARGS, "Map SID to UID/GID."},
- {"sid_to_name", py_sid_to_name, METH_VARARGS,
- "Map SID to name@domain."},
- {"isglobalzone", py_isglobalzone, METH_NOARGS,
- "Determine if this is the global zone."},
+ {"get_holds", py_get_holds, METH_VARARGS, "Get user holds."},
{NULL, NULL, 0, NULL}
};
diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/table.py b/cddl/contrib/opensolaris/lib/pyzfs/common/table.py
new file mode 100644
index 0000000..d2a45a1
--- /dev/null
+++ b/cddl/contrib/opensolaris/lib/pyzfs/common/table.py
@@ -0,0 +1,70 @@
+#! /usr/bin/python2.6
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+#
+
+import zfs.util
+
+class Table:
+ __slots__ = "fields", "rjustfields", "maxfieldlen", "lines"
+ __repr__ = zfs.util.default_repr
+
+ def __init__(self, fields, rjustfields=()):
+ # XXX maybe have a defaults, too?
+ self.fields = fields
+ self.rjustfields = rjustfields
+ self.maxfieldlen = dict.fromkeys(fields, 0)
+ self.lines = list()
+
+ def __updatemax(self, k, v):
+ self.maxfieldlen[k] = max(self.maxfieldlen.get(k, None), v)
+
+ def addline(self, sortkey, values):
+ """values is a dict from field name to value"""
+
+ va = list()
+ for f in self.fields:
+ v = str(values[f])
+ va.append(v)
+ self.__updatemax(f, len(v))
+ self.lines.append((sortkey, va))
+
+ def printme(self, headers=True):
+ if headers:
+ d = dict([(f, f.upper()) for f in self.fields])
+ self.addline(None, d)
+
+ self.lines.sort()
+ for (k, va) in self.lines:
+ line = str()
+ for i in range(len(self.fields)):
+ if not headers:
+ line += va[i]
+ line += "\t"
+ else:
+ if self.fields[i] in self.rjustfields:
+ fmt = "%*s "
+ else:
+ fmt = "%-*s "
+ mfl = self.maxfieldlen[self.fields[i]]
+ line += fmt % (mfl, va[i])
+ print(line)
diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/unallow.py b/cddl/contrib/opensolaris/lib/pyzfs/common/unallow.py
index 1458dc1..cbdd4dd 100644
--- a/cddl/contrib/opensolaris/lib/pyzfs/common/unallow.py
+++ b/cddl/contrib/opensolaris/lib/pyzfs/common/unallow.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python2.4
+#! /usr/bin/python2.6
#
# CDDL HEADER START
#
@@ -19,8 +19,7 @@
#
# CDDL HEADER END
#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
#
import zfs.allow
diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/userspace.py b/cddl/contrib/opensolaris/lib/pyzfs/common/userspace.py
index c269d51..33646bc 100644
--- a/cddl/contrib/opensolaris/lib/pyzfs/common/userspace.py
+++ b/cddl/contrib/opensolaris/lib/pyzfs/common/userspace.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python2.4
+#! /usr/bin/python2.6
#
# CDDL HEADER START
#
@@ -19,21 +19,22 @@
#
# CDDL HEADER END
#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
#
"""This module implements the "zfs userspace" and "zfs groupspace" subcommands.
The only public interface is the zfs.userspace.do_userspace() function."""
-import zfs.util
-import zfs.ioctl
-import zfs.dataset
import optparse
import sys
import pwd
import grp
import errno
+import solaris.misc
+import zfs.util
+import zfs.ioctl
+import zfs.dataset
+import zfs.table
_ = zfs.util._
@@ -58,9 +59,6 @@ def skiptype(options, prop):
return True
return False
-def updatemax(d, k, v):
- d[k] = max(d.get(k, None), v)
-
def new_entry(options, isgroup, domain, rid):
"""Return a dict("field": value) for this domain (string) + rid (int)"""
@@ -70,9 +68,9 @@ def new_entry(options, isgroup, domain, rid):
idstr = "%u" % rid
(typename, mapfunc) = {
- (1, 1): ("SMB Group", lambda id: zfs.ioctl.sid_to_name(id, 0)),
+ (1, 1): ("SMB Group", lambda id: solaris.misc.sid_to_name(id, 0)),
(1, 0): ("POSIX Group", lambda id: grp.getgrgid(int(id)).gr_name),
- (0, 1): ("SMB User", lambda id: zfs.ioctl.sid_to_name(id, 1)),
+ (0, 1): ("SMB User", lambda id: solaris.misc.sid_to_name(id, 1)),
(0, 0): ("POSIX User", lambda id: pwd.getpwuid(int(id)).pw_name)
}[isgroup, bool(domain)]
@@ -102,8 +100,8 @@ def new_entry(options, isgroup, domain, rid):
v["quota.sort"] = 0
return v
-def process_one_raw(acct, maxfieldlen, options, prop, elem):
- """Update the acct and maxfieldlen dicts to incorporate the
+def process_one_raw(acct, options, prop, elem):
+ """Update the acct dict to incorporate the
information from this elem from Dataset.userspace(prop)."""
(domain, rid, value) = elem
@@ -111,7 +109,7 @@ def process_one_raw(acct, maxfieldlen, options, prop, elem):
if options.translate and domain:
try:
- rid = zfs.ioctl.sid_to_id("%s-%u" % (domain, rid),
+ rid = solaris.misc.sid_to_id("%s-%u" % (domain, rid),
not isgroup)
domain = None
except KeyError:
@@ -134,10 +132,6 @@ def process_one_raw(acct, maxfieldlen, options, prop, elem):
v[field] = str(value)
else:
v[field] = zfs.util.nicenum(value)
- for k in v.keys():
- # some of the .sort fields are integers, so have no len()
- if isinstance(v[k], str):
- updatemax(maxfieldlen, k, len(v[k]))
def do_userspace():
"""Implements the "zfs userspace" and "zfs groupspace" subcommands."""
@@ -156,7 +150,7 @@ def do_userspace():
defaulttypes = "posixgroup,smbgroup"
fields = ("type", "name", "used", "quota")
- ljustfields = ("type", "name")
+ rjustfields = ("used", "quota")
types = ("all", "posixuser", "smbuser", "posixgroup", "smbgroup")
u = _("%s [-niHp] [-o field[,...]] [-sS field] ... \n") % sys.argv[1]
@@ -209,38 +203,23 @@ def do_userspace():
ds = zfs.dataset.Dataset(dsname, types=("filesystem"))
- if ds.getprop("jailed") and zfs.ioctl.isglobalzone():
+ if ds.getprop("jailed") and solaris.misc.isglobalzone():
options.noname = True
if not ds.getprop("useraccounting"):
print(_("Initializing accounting information on old filesystem, please wait..."))
ds.userspace_upgrade()
- acct = dict()
- maxfieldlen = dict()
-
# gather and process accounting information
+ # Due to -i, we need to keep a dict, so we can potentially add
+ # together the posix ID and SID's usage. Grr.
+ acct = dict()
for prop in props.keys():
if skiptype(options, prop):
continue;
for elem in ds.userspace(prop):
- process_one_raw(acct, maxfieldlen, options, prop, elem)
-
- # print out headers
- if not options.noheaders:
- line = str()
- for field in options.fields:
- # make sure the field header will fit
- updatemax(maxfieldlen, field, len(field))
-
- if field in ljustfields:
- fmt = "%-*s "
- else:
- fmt = "%*s "
- line += fmt % (maxfieldlen[field], field.upper())
- print(line)
-
- # custom sorting func
+ process_one_raw(acct, options, prop, elem)
+
def cmpkey(val):
l = list()
for (opt, field) in options.sortfields:
@@ -261,17 +240,7 @@ def do_userspace():
l.append(n)
return l
- # print out data lines
- for val in sorted(acct.itervalues(), key=cmpkey):
- line = str()
- for field in options.fields:
- if options.noheaders:
- line += val[field]
- line += "\t"
- else:
- if field in ljustfields:
- fmt = "%-*s "
- else:
- fmt = "%*s "
- line += fmt % (maxfieldlen[field], val[field])
- print(line)
+ t = zfs.table.Table(options.fields, rjustfields)
+ for val in acct.itervalues():
+ t.addline(cmpkey(val), val)
+ t.printme(not options.noheaders)
diff --git a/cddl/contrib/opensolaris/lib/pyzfs/common/util.py b/cddl/contrib/opensolaris/lib/pyzfs/common/util.py
index 14d05a8..a33c669 100644
--- a/cddl/contrib/opensolaris/lib/pyzfs/common/util.py
+++ b/cddl/contrib/opensolaris/lib/pyzfs/common/util.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python2.4
+#! /usr/bin/python2.6
#
# CDDL HEADER START
#
@@ -19,8 +19,7 @@
#
# CDDL HEADER END
#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
#
"""This module provides utility functions for ZFS.
@@ -29,6 +28,7 @@ zfs.util.dev -- a file object of /dev/zfs """
import gettext
import errno
import os
+import solaris.misc
# Note: this module (zfs.util) should not import zfs.ioctl, because that
# would introduce a circular dependency
@@ -37,8 +37,11 @@ errno.ENOTSUP = 48
dev = open("/dev/zfs", "w")
-_ = gettext.translation("SUNW_OST_OSLIB", "/usr/lib/locale",
- fallback=True).gettext
+try:
+ _ = gettext.translation("SUNW_OST_OSLIB", "/usr/lib/locale",
+ fallback=True).gettext
+except:
+ _ = solaris.misc.gettext
def default_repr(self):
"""A simple __repr__ function."""
diff --git a/cddl/lib/libzfs/Makefile b/cddl/lib/libzfs/Makefile
index 3023a1de..2235a2e 100644
--- a/cddl/lib/libzfs/Makefile
+++ b/cddl/lib/libzfs/Makefile
@@ -6,8 +6,8 @@
.PATH: ${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common
LIB= zfs
-DPADD= ${LIBUTIL}
-LDADD= -lutil
+DPADD= ${LIBMD} ${LIBPTHREAD} ${LIBUMEM} ${LIBUTIL}
+LDADD= -lmd -lpthread -lumem -lutil
SRCS= deviceid.c \
fsshare.c \
@@ -16,23 +16,28 @@ SRCS= deviceid.c \
zmount.c \
zone.c
-SRCS+= zfs_deleg.c \
- zfs_namecheck.c \
- zfs_prop.c \
- zpool_prop.c \
- zprop_common.c \
+SRCS+= libzfs_changelist.c \
+ libzfs_config.c \
libzfs_dataset.c \
- libzfs_util.c \
+ libzfs_diff.c \
libzfs_graph.c \
+ libzfs_import.c \
libzfs_mount.c \
libzfs_pool.c \
- libzfs_changelist.c \
- libzfs_config.c \
- libzfs_import.c \
libzfs_sendrecv.c \
- libzfs_status.c
+ libzfs_status.c \
+ libzfs_util.c \
+ zfs_comutil.c \
+ zfs_deleg.c \
+ zfs_fletcher.c \
+ zfs_ioctl_compat.c \
+ zfs_namecheck.c \
+ zfs_prop.c \
+ zpool_prop.c \
+ zprop_common.c \
WARNS?= 0
+CSTD= c99
CFLAGS+= -DZFS_NO_ACL
CFLAGS+= -I${.CURDIR}/../../../sbin/mount
CFLAGS+= -I${.CURDIR}/../../../cddl/lib/libumem
diff --git a/cddl/lib/libzpool/Makefile b/cddl/lib/libzpool/Makefile
index 7e2841a..0ff8c0d 100644
--- a/cddl/lib/libzpool/Makefile
+++ b/cddl/lib/libzpool/Makefile
@@ -11,7 +11,7 @@
# LIST_SRCS
.PATH: ${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/os
# ATOMIC_SRCS
-.if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "amd64" || ${MACHINE_ARCH} == "ia64" || ${MACHINE_ARCH} == "sparc64" || ${MACHINE_ARCH} == "powerpc64"
+.if exists(${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/atomic/${MACHINE_ARCH}/opensolaris_atomic.S)
.PATH: ${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/atomic/${MACHINE_ARCH}
ATOMIC_SRCS= opensolaris_atomic.S
.if ${MACHINE_ARCH} != "ia64" && ${MACHINE_ARCH} != "sparc64"
@@ -38,16 +38,16 @@ SRCS= ${ZFS_COMMON_SRCS} ${ZFS_SHARED_SRCS} \
WARNS?= 0
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/compat/opensolaris
-CFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/include
-CFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/lib/libumem
-CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzpool/common
+CFLAGS+= -I${.CURDIR}/../../compat/opensolaris/include
+CFLAGS+= -I${.CURDIR}/../../compat/opensolaris/lib/libumem
+CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libzpool/common
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
-CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/head
-CFLAGS+= -I${.CURDIR}/../../../cddl/lib/libumem
-CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair
+CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/head
+CFLAGS+= -I${.CURDIR}/../../lib/libumem
+CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libnvpair
# XXX: pthread doesn't have mutex_owned() equivalent, so we need to look
# into libthr private structures. That's sooo evil, but it's only for
# ZFS debugging tools needs.
@@ -56,8 +56,8 @@ CFLAGS+= -I${.CURDIR}/../../../lib/libpthread/thread
CFLAGS+= -I${.CURDIR}/../../../lib/libpthread/sys
CFLAGS+= -I${.CURDIR}/../../../lib/libthr/arch/${MACHINE_CPUARCH}/include
-DPADD= ${LIBPTHREAD} ${LIBZ}
-LDADD= -lpthread -lz
+DPADD= ${LIBMD} ${LIBPTHREAD} ${LIBZ}
+LDADD= -lmd -lpthread -lz
# atomic.S doesn't like profiling.
NO_PROFILE=
diff --git a/cddl/sbin/zfs/Makefile b/cddl/sbin/zfs/Makefile
index 591ef06..11f6a0f 100644
--- a/cddl/sbin/zfs/Makefile
+++ b/cddl/sbin/zfs/Makefile
@@ -19,10 +19,10 @@ CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys
+CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs
-DPADD= ${LIBZFS} ${LIBGEOM} ${LIBBSDXML} ${LIBSBUF} \
- ${LIBM} ${LIBNVPAIR} ${LIBUUTIL} ${LIBUTIL}
-LDADD= -lzfs -lgeom -lbsdxml -lsbuf \
- -lm -lnvpair -luutil -lutil
+DPADD= ${LIBBSDXML} ${LIBGEOM} ${LIBM} ${LIBNVPAIR} ${LIBSBUF} ${LIBUMEM} \
+ ${LIBUTIL} ${LIBUUTIL} ${LIBZFS}
+LDADD= -lbsdxml -lgeom -lm -lnvpair -lsbuf -lumem -lutil -luutil -lzfs
.include <bsd.prog.mk>
diff --git a/cddl/sbin/zpool/Makefile b/cddl/sbin/zpool/Makefile
index 06fd238..f810ee1 100644
--- a/cddl/sbin/zpool/Makefile
+++ b/cddl/sbin/zpool/Makefile
@@ -1,11 +1,13 @@
# $FreeBSD$
-.PATH: ${.CURDIR}/../../../cddl/contrib/opensolaris/cmd/zpool \
- ${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs
+.PATH: ${.CURDIR}/../../../cddl/contrib/opensolaris/cmd/zpool
+.PATH: ${.CURDIR}/../../../cddl/contrib/opensolaris/cmd/stat/common
+.PATH: ${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs
PROG= zpool
MAN= zpool.8
SRCS= zpool_main.c zpool_vdev.c zpool_iter.c zpool_util.c zfs_comutil.c
+SRCS+= timestamp.c
WARNS?= 0
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzpool/common
@@ -21,10 +23,11 @@ CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys
+CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzpool/common
+CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/cmd/stat/common
-DPADD= ${LIBAVL} ${LIBZFS} ${LIBGEOM} ${LIBBSDXML} ${LIBSBUF} \
- ${LIBM} ${LIBNVPAIR} ${LIBUUTIL} ${LIBUTIL}
-LDADD= -lavl -lzfs -lgeom -lbsdxml -lsbuf \
- -lm -lnvpair -luutil -lutil
+DPADD= ${LIBAVL} ${LIBBSDXML} ${LIBGEOM} ${LIBM} ${LIBNVPAIR} ${LIBSBUF} \
+ ${LIBUMEM} ${LIBUTIL} ${LIBUUTIL} ${LIBZFS}
+LDADD= -lavl -lbsdxml -lgeom -lm -lnvpair -lsbuf -lumem -lutil -luutil -lzfs
.include <bsd.prog.mk>
diff --git a/cddl/usr.bin/Makefile b/cddl/usr.bin/Makefile
index c6b1341..13d3a86 100644
--- a/cddl/usr.bin/Makefile
+++ b/cddl/usr.bin/Makefile
@@ -8,12 +8,16 @@ SUBDIR= \
ctfmerge \
sgsmsg \
${_zinject} \
+ ${_zlook} \
+ ${_zstreamdump} \
${_ztest}
.if ${MK_ZFS} != "no"
_zinject= zinject
+#_zlook= zlook
.if ${MK_LIBTHR} != "no"
_ztest= ztest
+_zstreamdump = zstreamdump
.endif
.endif
diff --git a/cddl/usr.bin/zlook/Makefile b/cddl/usr.bin/zlook/Makefile
new file mode 100644
index 0000000..0251f57
--- /dev/null
+++ b/cddl/usr.bin/zlook/Makefile
@@ -0,0 +1,25 @@
+# $FreeBSD$
+
+.PATH: ${.CURDIR}/../../contrib/opensolaris/cmd/zlook
+
+PROG= zlook
+NO_MAN=
+
+WARNS?= 0
+CFLAGS+= -I${.CURDIR}/../../../sys/cddl/compat/opensolaris
+#CFLAGS+= -I${.CURDIR}/../../compat/opensolaris/include
+#CFLAGS+= -I${.CURDIR}/../../compat/opensolaris/lib/libumem
+#CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libzfs/common
+#CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libzpool/common
+#CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libnvpair
+#CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs
+#CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys
+CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
+#CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/head
+#CFLAGS+= -I${.CURDIR}/../../lib/libumem
+#
+#DPADD= ${LIBAVL} ${LIBGEOM} ${LIBM} ${LIBNVPAIR} ${LIBUMEM} ${LIBUUTIL} \
+# ${LIBZFS} ${LIBZPOOL}
+#LDADD= -lavl -lgeom -lm -lnvpair -lumem -luutil -lzfs -lzpool
+
+.include <bsd.prog.mk>
diff --git a/cddl/usr.bin/zstreamdump/Makefile b/cddl/usr.bin/zstreamdump/Makefile
new file mode 100644
index 0000000..304ff7c
--- /dev/null
+++ b/cddl/usr.bin/zstreamdump/Makefile
@@ -0,0 +1,27 @@
+# $FreeBSD$
+
+.PATH: ${.CURDIR}/../..//contrib/opensolaris/cmd/zstreamdump
+
+PROG= zstreamdump
+MAN= zstreamdump.1
+
+WARNS?= 0
+CFLAGS+= -I${.CURDIR}/../../../sys/cddl/compat/opensolaris
+CFLAGS+= -I${.CURDIR}/../../compat/opensolaris/include
+CFLAGS+= -I${.CURDIR}/../../compat/opensolaris/lib/libumem
+CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libzpool/common
+CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libnvpair
+CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs
+CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs
+CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys
+CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
+CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/head
+CFLAGS+= -I${.CURDIR}/../../lib/libumem
+
+DPADD= ${LIBM} ${LIBNVPAIR} ${LIBUMEM} ${LIBZPOOL} \
+ ${LIBPTHREAD} ${LIBZ} ${LIBAVL}
+LDADD= -lm -lnvpair -lumem -lzpool -lpthread -lz -lavl
+
+CSTD= c99
+
+.include <bsd.prog.mk>
diff --git a/cddl/usr.bin/ztest/Makefile b/cddl/usr.bin/ztest/Makefile
index 8bb69b1..979880c 100644
--- a/cddl/usr.bin/ztest/Makefile
+++ b/cddl/usr.bin/ztest/Makefile
@@ -10,6 +10,7 @@ CFLAGS+= -I${.CURDIR}/../../../sys/cddl/compat/opensolaris
CFLAGS+= -I${.CURDIR}/../../compat/opensolaris/include
CFLAGS+= -I${.CURDIR}/../../compat/opensolaris/lib/libumem
CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libzpool/common
+CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libnvpair
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
diff --git a/cddl/usr.sbin/zdb/Makefile b/cddl/usr.sbin/zdb/Makefile
index b98038e..446d1c4 100644
--- a/cddl/usr.sbin/zdb/Makefile
+++ b/cddl/usr.sbin/zdb/Makefile
@@ -19,6 +19,7 @@ CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzpool/common
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys
+CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/head
CFLAGS+= -I${.CURDIR}/../../lib/libumem
OpenPOWER on IntegriCloud