summaryrefslogtreecommitdiffstats
path: root/cddl/contrib/opensolaris/cmd
diff options
context:
space:
mode:
Diffstat (limited to 'cddl/contrib/opensolaris/cmd')
-rw-r--r--cddl/contrib/opensolaris/cmd/zdb/zdb.86
-rw-r--r--cddl/contrib/opensolaris/cmd/zdb/zdb.c829
-rw-r--r--cddl/contrib/opensolaris/cmd/zdb/zdb_il.c39
-rw-r--r--cddl/contrib/opensolaris/cmd/zfs/zfs.81309
-rw-r--r--cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c71
-rw-r--r--cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h12
-rw-r--r--cddl/contrib/opensolaris/cmd/zfs/zfs_main.c1775
-rw-r--r--cddl/contrib/opensolaris/cmd/zinject/translate.c460
-rw-r--r--cddl/contrib/opensolaris/cmd/zinject/zinject.c770
-rw-r--r--cddl/contrib/opensolaris/cmd/zinject/zinject.h71
-rw-r--r--cddl/contrib/opensolaris/cmd/zpool/zpool.8868
-rw-r--r--cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c22
-rw-r--r--cddl/contrib/opensolaris/cmd/zpool/zpool_main.c1338
-rw-r--r--cddl/contrib/opensolaris/cmd/zpool/zpool_util.c27
-rw-r--r--cddl/contrib/opensolaris/cmd/zpool/zpool_util.h14
-rw-r--r--cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c408
-rw-r--r--cddl/contrib/opensolaris/cmd/ztest/ztest.c1640
17 files changed, 7258 insertions, 2401 deletions
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.8 b/cddl/contrib/opensolaris/cmd/zdb/zdb.8
index 87913f6..c9d5aed 100644
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb.8
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.8
@@ -28,13 +28,17 @@ zdb \- ZFS debugger
.fi
.SH DESCRIPTION
+.sp
.LP
The \fBzdb\fR command is used by support engineers to diagnose failures and gather statistics. Since the \fBZFS\fR file system is always consistent on disk and is self-repairing, \fBzdb\fR should only be run under the direction by a support engineer.
+.sp
.LP
If no arguments are specified, \fBzdb\fR, performs basic consistency checks on the pool and associated datasets, and report any problems detected.
+.sp
.LP
Any options supported by this command are internal to Sun and subject to change at any time.
.SH EXIT STATUS
+.sp
.LP
The following exit values are returned:
.sp
@@ -71,6 +75,7 @@ Invalid command line options were specified.
.RE
.SH ATTRIBUTES
+.sp
.LP
See \fBattributes\fR(5) for descriptions of the following attributes:
.sp
@@ -89,5 +94,6 @@ Interface StabilityUnstable
.TE
.SH SEE ALSO
+.sp
.LP
\fBzfs\fR(1M), \fBzpool\fR(1M), \fBattributes\fR(5)
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.c b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
index 2dc459d..16b2787 100644
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb.c
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdio.h>
#include <stdio_ext.h>
#include <stdlib.h>
@@ -51,6 +49,10 @@
#include <sys/dmu_traverse.h>
#include <sys/zio_checksum.h>
#include <sys/zio_compress.h>
+#include <sys/zfs_fuid.h>
+#undef ZFS_MAXNAMELEN
+#undef verify
+#include <libzfs.h>
const char cmdname[] = "zdb";
uint8_t dump_opt[256];
@@ -62,6 +64,9 @@ uint64_t *zopt_object = NULL;
int zopt_objects = 0;
int zdb_advance = ADVANCE_PRE;
zbookmark_t zdb_noread = { 0, 0, ZB_NO_LEVEL, 0 };
+libzfs_handle_t *g_zfs;
+boolean_t zdb_sig_user_data = B_TRUE;
+int zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
/*
* These libumem hooks provide a reasonable set of defaults for the allocator's
@@ -83,12 +88,15 @@ static void
usage(void)
{
(void) fprintf(stderr,
- "Usage: %s [-udibcsvLU] [-O order] [-B os:obj:level:blkid] "
+ "Usage: %s [-udibcsvL] [-U cachefile_path] [-O order] "
+ "[-B os:obj:level:blkid] [-S user:cksumalg] "
"dataset [object...]\n"
" %s -C [pool]\n"
" %s -l dev\n"
- " %s -R vdev:offset:size:flags\n",
- cmdname, cmdname, cmdname, cmdname);
+ " %s -R pool:vdev:offset:size:flags\n"
+ " %s [-p path_to_vdev_dir]\n"
+ " %s -e pool | GUID | devid ...\n",
+ cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
(void) fprintf(stderr, " -u uberblock\n");
(void) fprintf(stderr, " -d datasets\n");
@@ -97,16 +105,22 @@ usage(void)
(void) fprintf(stderr, " -b block statistics\n");
(void) fprintf(stderr, " -c checksum all data blocks\n");
(void) fprintf(stderr, " -s report stats on zdb's I/O\n");
+ (void) fprintf(stderr, " -S <user|all>:<cksum_alg|all> -- "
+ "dump blkptr signatures\n");
(void) fprintf(stderr, " -v verbose (applies to all others)\n");
(void) fprintf(stderr, " -l dump label contents\n");
(void) fprintf(stderr, " -L live pool (allows some errors)\n");
(void) fprintf(stderr, " -O [!]<pre|post|prune|data|holes> "
"visitation order\n");
- (void) fprintf(stderr, " -U use zpool.cache in /tmp\n");
+ (void) fprintf(stderr, " -U cachefile_path -- use alternate "
+ "cachefile\n");
(void) fprintf(stderr, " -B objset:object:level:blkid -- "
"simulate bad block\n");
- (void) fprintf(stderr, " -R read and display block from a"
+ (void) fprintf(stderr, " -R read and display block from a "
"device\n");
+ (void) fprintf(stderr, " -e Pool is exported/destroyed/"
+ "has altroot\n");
+ (void) fprintf(stderr, " -p <Path to vdev dir> (use with -e)\n");
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
"to make only that option verbose\n");
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
@@ -367,6 +381,44 @@ dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
zap_cursor_fini(&zc);
}
+/*ARGSUSED*/
+static void
+dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
+{
+ zap_cursor_t zc;
+ zap_attribute_t attr;
+ const char *typenames[] = {
+ /* 0 */ "not specified",
+ /* 1 */ "FIFO",
+ /* 2 */ "Character Device",
+ /* 3 */ "3 (invalid)",
+ /* 4 */ "Directory",
+ /* 5 */ "5 (invalid)",
+ /* 6 */ "Block Device",
+ /* 7 */ "7 (invalid)",
+ /* 8 */ "Regular File",
+ /* 9 */ "9 (invalid)",
+ /* 10 */ "Symbolic Link",
+ /* 11 */ "11 (invalid)",
+ /* 12 */ "Socket",
+ /* 13 */ "Door",
+ /* 14 */ "Event Port",
+ /* 15 */ "15 (invalid)",
+ };
+
+ dump_zap_stats(os, object);
+ (void) printf("\n");
+
+ for (zap_cursor_init(&zc, os, object);
+ zap_cursor_retrieve(&zc, &attr) == 0;
+ zap_cursor_advance(&zc)) {
+ (void) printf("\t\t%s = %lld (type: %s)\n",
+ attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
+ typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
+ }
+ zap_cursor_fini(&zc);
+}
+
static void
dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
{
@@ -456,10 +508,7 @@ dump_metaslabs(spa_t *spa)
for (c = 0; c < rvd->vdev_children; c++) {
vd = rvd->vdev_child[c];
- spa_config_enter(spa, RW_READER, FTAG);
- (void) printf("\n vdev %llu = %s\n\n",
- (u_longlong_t)vd->vdev_id, vdev_description(vd));
- spa_config_exit(spa, FTAG);
+ (void) printf("\n vdev %llu\n\n", (u_longlong_t)vd->vdev_id);
if (dump_opt['d'] <= 5) {
(void) printf("\t%10s %10s %5s\n",
@@ -477,7 +526,6 @@ static void
dump_dtl(vdev_t *vd, int indent)
{
avl_tree_t *t = &vd->vdev_dtl_map.sm_root;
- spa_t *spa = vd->vdev_spa;
space_seg_t *ss;
vdev_t *pvd;
int c;
@@ -485,9 +533,10 @@ dump_dtl(vdev_t *vd, int indent)
if (indent == 0)
(void) printf("\nDirty time logs:\n\n");
- spa_config_enter(spa, RW_READER, FTAG);
- (void) printf("\t%*s%s\n", indent, "", vdev_description(vd));
- spa_config_exit(spa, FTAG);
+ (void) printf("\t%*s%s\n", indent, "",
+ vd->vdev_path ? vd->vdev_path :
+ vd->vdev_parent ? vd->vdev_ops->vdev_op_type :
+ spa_name(vd->vdev_spa));
for (ss = avl_first(t); ss; ss = AVL_NEXT(t, ss)) {
/*
@@ -670,36 +719,49 @@ dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
{
dsl_dir_phys_t *dd = data;
time_t crtime;
- char used[6], compressed[6], uncompressed[6], quota[6], resv[6];
+ char nice[6];
if (dd == NULL)
return;
- ASSERT(size == sizeof (*dd));
+ ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
crtime = dd->dd_creation_time;
- nicenum(dd->dd_used_bytes, used);
- nicenum(dd->dd_compressed_bytes, compressed);
- nicenum(dd->dd_uncompressed_bytes, uncompressed);
- nicenum(dd->dd_quota, quota);
- nicenum(dd->dd_reserved, resv);
-
(void) printf("\t\tcreation_time = %s", ctime(&crtime));
(void) printf("\t\thead_dataset_obj = %llu\n",
(u_longlong_t)dd->dd_head_dataset_obj);
(void) printf("\t\tparent_dir_obj = %llu\n",
(u_longlong_t)dd->dd_parent_obj);
- (void) printf("\t\tclone_parent_obj = %llu\n",
- (u_longlong_t)dd->dd_clone_parent_obj);
+ (void) printf("\t\torigin_obj = %llu\n",
+ (u_longlong_t)dd->dd_origin_obj);
(void) printf("\t\tchild_dir_zapobj = %llu\n",
(u_longlong_t)dd->dd_child_dir_zapobj);
- (void) printf("\t\tused_bytes = %s\n", used);
- (void) printf("\t\tcompressed_bytes = %s\n", compressed);
- (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
- (void) printf("\t\tquota = %s\n", quota);
- (void) printf("\t\treserved = %s\n", resv);
+ nicenum(dd->dd_used_bytes, nice);
+ (void) printf("\t\tused_bytes = %s\n", nice);
+ nicenum(dd->dd_compressed_bytes, nice);
+ (void) printf("\t\tcompressed_bytes = %s\n", nice);
+ nicenum(dd->dd_uncompressed_bytes, nice);
+ (void) printf("\t\tuncompressed_bytes = %s\n", nice);
+ nicenum(dd->dd_quota, nice);
+ (void) printf("\t\tquota = %s\n", nice);
+ nicenum(dd->dd_reserved, nice);
+ (void) printf("\t\treserved = %s\n", nice);
(void) printf("\t\tprops_zapobj = %llu\n",
(u_longlong_t)dd->dd_props_zapobj);
+ (void) printf("\t\tdeleg_zapobj = %llu\n",
+ (u_longlong_t)dd->dd_deleg_zapobj);
+ (void) printf("\t\tflags = %llx\n",
+ (u_longlong_t)dd->dd_flags);
+
+#define DO(which) \
+ nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
+ (void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
+ DO(HEAD);
+ DO(SNAP);
+ DO(CHILD);
+ DO(CHILD_RSRV);
+ DO(REFRSRV);
+#undef DO
}
/*ARGSUSED*/
@@ -722,7 +784,7 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
nicenum(ds->ds_unique_bytes, unique);
sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ds->ds_bp);
- (void) printf("\t\tdataset_obj = %llu\n",
+ (void) printf("\t\tdir_obj = %llu\n",
(u_longlong_t)ds->ds_dir_obj);
(void) printf("\t\tprev_snap_obj = %llu\n",
(u_longlong_t)ds->ds_prev_snap_obj);
@@ -749,6 +811,10 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
(u_longlong_t)ds->ds_guid);
(void) printf("\t\tflags = %llx\n",
(u_longlong_t)ds->ds_flags);
+ (void) printf("\t\tnext_clones_obj = %llu\n",
+ (u_longlong_t)ds->ds_next_clones_obj);
+ (void) printf("\t\tprops_obj = %llu\n",
+ (u_longlong_t)ds->ds_props_obj);
(void) printf("\t\tbp = %s\n", blkbuf);
}
@@ -765,9 +831,11 @@ dump_bplist(objset_t *mos, uint64_t object, char *name)
if (dump_opt['d'] < 3)
return;
+ mutex_init(&bpl.bpl_lock, NULL, MUTEX_DEFAULT, NULL);
VERIFY(0 == bplist_open(&bpl, mos, object));
if (bplist_empty(&bpl)) {
bplist_close(&bpl);
+ mutex_destroy(&bpl.bpl_lock);
return;
}
@@ -785,6 +853,7 @@ dump_bplist(objset_t *mos, uint64_t object, char *name)
if (dump_opt['d'] < 5) {
bplist_close(&bpl);
+ mutex_destroy(&bpl.bpl_lock);
return;
}
@@ -800,6 +869,65 @@ dump_bplist(objset_t *mos, uint64_t object, char *name)
}
bplist_close(&bpl);
+ mutex_destroy(&bpl.bpl_lock);
+}
+
+static avl_tree_t idx_tree;
+static avl_tree_t domain_tree;
+static boolean_t fuid_table_loaded;
+
+static void
+fuid_table_destroy()
+{
+ if (fuid_table_loaded) {
+ zfs_fuid_table_destroy(&idx_tree, &domain_tree);
+ fuid_table_loaded = B_FALSE;
+ }
+}
+
+/*
+ * print uid or gid information.
+ * For normal POSIX id just the id is printed in decimal format.
+ * For CIFS files with FUID the fuid is printed in hex followed by
+ * the doman-rid string.
+ */
+static void
+print_idstr(uint64_t id, const char *id_type)
+{
+ if (FUID_INDEX(id)) {
+ char *domain;
+
+ domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
+ (void) printf("\t%s %llx [%s-%d]\n", id_type,
+ (u_longlong_t)id, domain, (int)FUID_RID(id));
+ } else {
+ (void) printf("\t%s %llu\n", id_type, (u_longlong_t)id);
+ }
+
+}
+
+static void
+dump_uidgid(objset_t *os, znode_phys_t *zp)
+{
+ uint32_t uid_idx, gid_idx;
+
+ uid_idx = FUID_INDEX(zp->zp_uid);
+ gid_idx = FUID_INDEX(zp->zp_gid);
+
+ /* Load domain table, if not already loaded */
+ if (!fuid_table_loaded && (uid_idx || gid_idx)) {
+ uint64_t fuid_obj;
+
+ /* first find the fuid object. It lives in the master node */
+ VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
+ 8, 1, &fuid_obj) == 0);
+ (void) zfs_fuid_table_load(os, fuid_obj,
+ &idx_tree, &domain_tree);
+ fuid_table_loaded = B_TRUE;
+ }
+
+ print_idstr(zp->zp_uid, "uid");
+ print_idstr(zp->zp_gid, "gid");
}
/*ARGSUSED*/
@@ -830,6 +958,7 @@ dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
z_ctime = (time_t)zp->zp_ctime[0];
(void) printf("\tpath %s\n", path);
+ dump_uidgid(os, zp);
(void) printf("\tatime %s", ctime(&z_atime));
(void) printf("\tmtime %s", ctime(&z_mtime));
(void) printf("\tctime %s", ctime(&z_ctime));
@@ -874,9 +1003,9 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = {
dump_zap, /* DSL props */
dump_dsl_dataset, /* DSL dataset */
dump_znode, /* ZFS znode */
- dump_acl, /* ZFS ACL */
+ dump_acl, /* ZFS V0 ACL */
dump_uint8, /* ZFS plain file */
- dump_zap, /* ZFS directory */
+ dump_zpldir, /* ZFS directory */
dump_zap, /* ZFS master node */
dump_zap, /* ZFS delete queue */
dump_uint8, /* zvol object */
@@ -888,6 +1017,13 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = {
dump_uint8, /* SPA history */
dump_uint64, /* SPA history offsets */
dump_zap, /* Pool properties */
+ dump_zap, /* DSL permissions */
+ dump_acl, /* ZFS ACL */
+ dump_uint8, /* ZFS SYSACL */
+ dump_none, /* FUID nvlist */
+ dump_packed_nvlist, /* FUID nvlist size */
+ dump_zap, /* DSL dataset next clones */
+ dump_zap, /* DSL scrub queue */
};
static void
@@ -930,13 +1066,15 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
aux[0] = '\0';
- if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6)
+ if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
(void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
- zio_checksum_table[doi.doi_checksum].ci_name);
+ zio_checksum_table[doi.doi_checksum].ci_name);
+ }
- if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6)
+ if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
(void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
- zio_compress_table[doi.doi_compress].ci_name);
+ zio_compress_table[doi.doi_compress].ci_name);
+ }
(void) printf("%10lld %3u %5s %5s %5s %5s %s%s\n",
(u_longlong_t)object, doi.doi_indirection, iblk, dblk, lsize,
@@ -972,13 +1110,13 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
}
for (;;) {
- error = dnode_next_offset(dn, B_FALSE, &start, minlvl,
- blkfill, 0);
+ error = dnode_next_offset(dn,
+ 0, &start, minlvl, blkfill, 0);
if (error)
break;
end = start;
- error = dnode_next_offset(dn, B_TRUE, &end, minlvl,
- blkfill, 0);
+ error = dnode_next_offset(dn,
+ DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
nicenum(end - start, segsize);
(void) printf("\t\tsegment [%016llx, %016llx)"
" size %5s\n", (u_longlong_t)start,
@@ -996,7 +1134,6 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
static char *objset_types[DMU_OST_NUMTYPES] = {
"NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
-/*ARGSUSED*/
static void
dump_dir(objset_t *os)
{
@@ -1019,8 +1156,8 @@ dump_dir(objset_t *os)
if (dds.dds_type == DMU_OST_META) {
dds.dds_creation_txg = TXG_INITIAL;
usedobjs = os->os->os_rootbp->blk_fill;
- refdbytes =
- os->os->os_spa->spa_dsl_pool->dp_mos_dir->dd_used_bytes;
+ refdbytes = os->os->os_spa->spa_dsl_pool->
+ dp_mos_dir->dd_phys->dd_used_bytes;
} else {
dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
}
@@ -1054,6 +1191,9 @@ dump_dir(objset_t *os)
if (verbosity < 2)
return;
+ if (os->os->os_rootbp->blk_birth == 0)
+ return;
+
if (zopt_objects != 0) {
for (i = 0; i < zopt_objects; i++)
dump_object(os, zopt_object[i], verbosity,
@@ -1115,6 +1255,52 @@ dump_config(const char *pool)
}
static void
+dump_cachefile(const char *cachefile)
+{
+ int fd;
+ struct stat64 statbuf;
+ char *buf;
+ nvlist_t *config;
+
+ if ((fd = open64(cachefile, O_RDONLY)) < 0) {
+ (void) printf("cannot open '%s': %s\n", cachefile,
+ strerror(errno));
+ exit(1);
+ }
+
+ if (fstat64(fd, &statbuf) != 0) {
+ (void) printf("failed to stat '%s': %s\n", cachefile,
+ strerror(errno));
+ exit(1);
+ }
+
+ if ((buf = malloc(statbuf.st_size)) == NULL) {
+ (void) fprintf(stderr, "failed to allocate %llu bytes\n",
+ (u_longlong_t)statbuf.st_size);
+ exit(1);
+ }
+
+ if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
+ (void) fprintf(stderr, "failed to read %llu bytes\n",
+ (u_longlong_t)statbuf.st_size);
+ exit(1);
+ }
+
+ (void) close(fd);
+
+ if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
+ (void) fprintf(stderr, "failed to unpack nvlist\n");
+ exit(1);
+ }
+
+ free(buf);
+
+ dump_nvlist(config, 0);
+
+ nvlist_free(config);
+}
+
+static void
dump_label(const char *dev)
{
int fd;
@@ -1136,15 +1322,7 @@ dump_label(const char *dev)
exit(1);
}
- if (S_ISCHR(statbuf.st_mode)) {
- if (ioctl(fd, DIOCGMEDIASIZE, &psize) != 0) {
- (void) printf("failed to get size '%s': %s\n", dev,
- strerror(errno));
- exit(1);
- }
- } else
- psize = statbuf.st_size;
-
+ psize = statbuf.st_size;
psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
for (l = 0; l < VDEV_LABELS; l++) {
@@ -1178,170 +1356,84 @@ dump_one_dir(char *dsname, void *arg)
objset_t *os;
error = dmu_objset_open(dsname, DMU_OST_ANY,
- DS_MODE_STANDARD | DS_MODE_READONLY, &os);
+ DS_MODE_USER | DS_MODE_READONLY, &os);
if (error) {
(void) printf("Could not open %s\n", dsname);
return (0);
}
dump_dir(os);
dmu_objset_close(os);
+ fuid_table_destroy();
return (0);
}
static void
-zdb_space_map_load(spa_t *spa)
+zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
{
- vdev_t *rvd = spa->spa_root_vdev;
- vdev_t *vd;
- int c, m, error;
+ vdev_t *vd = sm->sm_ppd;
- for (c = 0; c < rvd->vdev_children; c++) {
- vd = rvd->vdev_child[c];
- for (m = 0; m < vd->vdev_ms_count; m++) {
- metaslab_t *msp = vd->vdev_ms[m];
- mutex_enter(&msp->ms_lock);
- error = space_map_load(&msp->ms_allocmap[0], NULL,
- SM_ALLOC, &msp->ms_smo, spa->spa_meta_objset);
- mutex_exit(&msp->ms_lock);
- if (error)
- fatal("%s bad space map #%d, error %d",
- spa->spa_name, c, error);
- }
- }
+ (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
+ (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
}
-static int
-zdb_space_map_claim(spa_t *spa, blkptr_t *bp, zbookmark_t *zb)
+/* ARGSUSED */
+static void
+zdb_space_map_load(space_map_t *sm)
{
- dva_t *dva = bp->blk_dva;
- vdev_t *vd;
- metaslab_t *msp;
- space_map_t *allocmap, *freemap;
- int error;
- int d;
- blkptr_t blk = *bp;
-
- for (d = 0; d < BP_GET_NDVAS(bp); d++) {
- uint64_t vdev = DVA_GET_VDEV(&dva[d]);
- uint64_t offset = DVA_GET_OFFSET(&dva[d]);
- uint64_t size = DVA_GET_ASIZE(&dva[d]);
-
- if ((vd = vdev_lookup_top(spa, vdev)) == NULL)
- return (ENXIO);
-
- if ((offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count)
- return (ENXIO);
-
- msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
- allocmap = &msp->ms_allocmap[0];
- freemap = &msp->ms_freemap[0];
-
- /* Prepare our copy of the bp in case we need to read GBHs */
- if (DVA_GET_GANG(&dva[d])) {
- size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
- DVA_SET_ASIZE(&blk.blk_dva[d], size);
- DVA_SET_GANG(&blk.blk_dva[d], 0);
- }
-
- mutex_enter(&msp->ms_lock);
- if (space_map_contains(freemap, offset, size)) {
- mutex_exit(&msp->ms_lock);
- return (EAGAIN); /* allocated more than once */
- }
-
- if (!space_map_contains(allocmap, offset, size)) {
- mutex_exit(&msp->ms_lock);
- return (ESTALE); /* not allocated at all */
- }
-
- space_map_remove(allocmap, offset, size);
- space_map_add(freemap, offset, size);
-
- mutex_exit(&msp->ms_lock);
- }
-
- if (BP_IS_GANG(bp)) {
- zio_gbh_phys_t gbh;
- int g;
-
- /* LINTED - compile time assert */
- ASSERT(sizeof (zio_gbh_phys_t) == SPA_GANGBLOCKSIZE);
-
- BP_SET_CHECKSUM(&blk, ZIO_CHECKSUM_GANG_HEADER);
- BP_SET_PSIZE(&blk, SPA_GANGBLOCKSIZE);
- BP_SET_LSIZE(&blk, SPA_GANGBLOCKSIZE);
- BP_SET_COMPRESS(&blk, ZIO_COMPRESS_OFF);
- error = zio_wait(zio_read(NULL, spa, &blk, &gbh,
- SPA_GANGBLOCKSIZE, NULL, NULL, ZIO_PRIORITY_SYNC_READ,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_CONFIG_HELD, zb));
- if (error)
- return (error);
- if (BP_SHOULD_BYTESWAP(&blk))
- byteswap_uint64_array(&gbh, SPA_GANGBLOCKSIZE);
- for (g = 0; g < SPA_GBH_NBLKPTRS; g++) {
- if (BP_IS_HOLE(&gbh.zg_blkptr[g]))
- break;
- error = zdb_space_map_claim(spa, &gbh.zg_blkptr[g], zb);
- if (error)
- return (error);
- }
- }
-
- return (0);
}
static void
-zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
+zdb_space_map_unload(space_map_t *sm)
{
- metaslab_t *msp;
-
- /* LINTED */
- msp = (metaslab_t *)((char *)sm - offsetof(metaslab_t, ms_allocmap[0]));
+ space_map_vacate(sm, zdb_leak, sm);
+}
- (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
- (u_longlong_t)msp->ms_group->mg_vd->vdev_id,
- (u_longlong_t)start,
- (u_longlong_t)size);
+/* ARGSUSED */
+static void
+zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
+{
}
+static space_map_ops_t zdb_space_map_ops = {
+ zdb_space_map_load,
+ zdb_space_map_unload,
+ NULL, /* alloc */
+ zdb_space_map_claim,
+ NULL /* free */
+};
+
static void
-zdb_space_map_unload(spa_t *spa)
+zdb_leak_init(spa_t *spa)
{
vdev_t *rvd = spa->spa_root_vdev;
- vdev_t *vd;
- int c, m;
- for (c = 0; c < rvd->vdev_children; c++) {
- vd = rvd->vdev_child[c];
- for (m = 0; m < vd->vdev_ms_count; m++) {
+ for (int c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *vd = rvd->vdev_child[c];
+ for (int m = 0; m < vd->vdev_ms_count; m++) {
metaslab_t *msp = vd->vdev_ms[m];
mutex_enter(&msp->ms_lock);
- space_map_vacate(&msp->ms_allocmap[0], zdb_leak,
- &msp->ms_allocmap[0]);
- space_map_unload(&msp->ms_allocmap[0]);
- space_map_vacate(&msp->ms_freemap[0], NULL, NULL);
+ VERIFY(space_map_load(&msp->ms_map, &zdb_space_map_ops,
+ SM_ALLOC, &msp->ms_smo, spa->spa_meta_objset) == 0);
+ msp->ms_map.sm_ppd = vd;
mutex_exit(&msp->ms_lock);
}
}
}
static void
-zdb_refresh_ubsync(spa_t *spa)
+zdb_leak_fini(spa_t *spa)
{
- uberblock_t ub = { 0 };
vdev_t *rvd = spa->spa_root_vdev;
- zio_t *zio;
- /*
- * Reload the uberblock.
- */
- zio = zio_root(spa, NULL, NULL,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
- vdev_uberblock_load(zio, rvd, &ub);
- (void) zio_wait(zio);
-
- if (ub.ub_txg != 0)
- spa->spa_ubsync = ub;
+ for (int c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *vd = rvd->vdev_child[c];
+ for (int m = 0; m < vd->vdev_ms_count; m++) {
+ metaslab_t *msp = vd->vdev_ms[m];
+ mutex_enter(&msp->ms_lock);
+ space_map_unload(&msp->ms_map);
+ mutex_exit(&msp->ms_lock);
+ }
+ }
}
/*
@@ -1371,9 +1463,7 @@ typedef struct zdb_cb {
static void
zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type)
{
- int i, error;
-
- for (i = 0; i < 4; i++) {
+ for (int i = 0; i < 4; i++) {
int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
int t = (i & 1) ? type : DMU_OT_TOTAL;
zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
@@ -1384,21 +1474,34 @@ zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type)
zb->zb_count++;
}
- if (dump_opt['L'])
- return;
-
- error = zdb_space_map_claim(spa, bp, &zcb->zcb_cache->bc_bookmark);
-
- if (error == 0)
- return;
-
- if (error == EAGAIN)
- (void) fatal("double-allocation, bp=%p", bp);
-
- if (error == ESTALE)
- (void) fatal("reference to freed block, bp=%p", bp);
+ if (dump_opt['S']) {
+ boolean_t print_sig;
+
+ print_sig = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 &&
+ BP_GET_TYPE(bp) == DMU_OT_PLAIN_FILE_CONTENTS);
+
+ if (BP_GET_CHECKSUM(bp) < zdb_sig_cksumalg)
+ print_sig = B_FALSE;
+
+ if (print_sig) {
+ (void) printf("%llu\t%lld\t%lld\t%s\t%s\t%s\t"
+ "%llx:%llx:%llx:%llx\n",
+ (u_longlong_t)BP_GET_LEVEL(bp),
+ (longlong_t)BP_GET_PSIZE(bp),
+ (longlong_t)BP_GET_NDVAS(bp),
+ dmu_ot[BP_GET_TYPE(bp)].ot_name,
+ zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name,
+ zio_compress_table[BP_GET_COMPRESS(bp)].ci_name,
+ (u_longlong_t)bp->blk_cksum.zc_word[0],
+ (u_longlong_t)bp->blk_cksum.zc_word[1],
+ (u_longlong_t)bp->blk_cksum.zc_word[2],
+ (u_longlong_t)bp->blk_cksum.zc_word[3]);
+ }
+ }
- (void) fatal("fatal error %d in bp %p", error, bp);
+ if (!dump_opt['L'])
+ VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp,
+ NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0);
}
static int
@@ -1411,9 +1514,16 @@ zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
char blkbuf[BP_SPRINTF_LEN];
int error = 0;
+ ASSERT(!BP_IS_HOLE(bp));
+
+ zdb_count_block(spa, zcb, bp, type);
+
if (bc->bc_errno) {
if (zcb->zcb_readfails++ < 10 && dump_opt['L']) {
- zdb_refresh_ubsync(spa);
+ uberblock_t ub;
+ vdev_uberblock_load(NULL, spa->spa_root_vdev, &ub);
+ if (ub.ub_txg != 0)
+ spa->spa_ubsync = ub;
error = EAGAIN;
} else {
zcb->zcb_haderrors = 1;
@@ -1426,35 +1536,32 @@ zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
else
blkbuf[0] = '\0';
- (void) printf("zdb_blkptr_cb: Got error %d reading "
- "<%llu, %llu, %lld, %llx> %s -- %s\n",
- bc->bc_errno,
- (u_longlong_t)zb->zb_objset,
- (u_longlong_t)zb->zb_object,
- (u_longlong_t)zb->zb_level,
- (u_longlong_t)zb->zb_blkid,
- blkbuf,
- error == EAGAIN ? "retrying" : "skipping");
+ if (!dump_opt['S']) {
+ (void) printf("zdb_blkptr_cb: Got error %d reading "
+ "<%llu, %llu, %lld, %llx> %s -- %s\n",
+ bc->bc_errno,
+ (u_longlong_t)zb->zb_objset,
+ (u_longlong_t)zb->zb_object,
+ (u_longlong_t)zb->zb_level,
+ (u_longlong_t)zb->zb_blkid,
+ blkbuf,
+ error == EAGAIN ? "retrying" : "skipping");
+ }
return (error);
}
zcb->zcb_readfails = 0;
- ASSERT(!BP_IS_HOLE(bp));
-
if (dump_opt['b'] >= 4) {
sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
(void) printf("objset %llu object %llu offset 0x%llx %s\n",
(u_longlong_t)zb->zb_objset,
(u_longlong_t)zb->zb_object,
(u_longlong_t)blkid2offset(bc->bc_dnode,
- zb->zb_level, zb->zb_blkid),
- blkbuf);
+ zb->zb_level, zb->zb_blkid), blkbuf);
}
- zdb_count_block(spa, zcb, bp, type);
-
return (0);
}
@@ -1465,32 +1572,35 @@ dump_block_stats(spa_t *spa)
zdb_cb_t zcb = { 0 };
traverse_blk_cache_t dummy_cache = { 0 };
zdb_blkstats_t *zb, *tzb;
- uint64_t alloc, space;
+ uint64_t alloc, space, logalloc;
+ vdev_t *rvd = spa->spa_root_vdev;
int leaks = 0;
int advance = zdb_advance;
- int flags;
- int e;
+ int c, e, flags;
zcb.zcb_cache = &dummy_cache;
- if (dump_opt['c'])
+ if (dump_opt['c'] || dump_opt['S'])
advance |= ADVANCE_DATA;
advance |= ADVANCE_PRUNE | ADVANCE_ZIL;
- (void) printf("\nTraversing all blocks to %sverify"
- " nothing leaked ...\n",
- dump_opt['c'] ? "verify checksums and " : "");
+ if (!dump_opt['S']) {
+ (void) printf("\nTraversing all blocks to %sverify"
+ " nothing leaked ...\n",
+ dump_opt['c'] ? "verify checksums and " : "");
+ }
/*
- * Load all space maps. As we traverse the pool, if we find a block
- * that's not in its space map, that indicates a double-allocation,
- * reference to a freed block, or an unclaimed block. Otherwise we
- * remove the block from the space map. If the space maps are not
- * empty when we're done, that indicates leaked blocks.
+ * Load all space maps as SM_ALLOC maps, then traverse the pool
+ * claiming each block we discover. If the pool is perfectly
+ * consistent, the space maps will be empty when we're done.
+ * Anything left over is a leak; any block we can't claim (because
+ * it's not part of any space map) is a double allocation,
+ * reference to a freed block, or an unclaimed log block.
*/
if (!dump_opt['L'])
- zdb_space_map_load(spa);
+ zdb_leak_init(spa);
/*
* If there's a deferred-free bplist, process that first.
@@ -1533,7 +1643,7 @@ dump_block_stats(spa_t *spa)
traverse_fini(th);
- if (zcb.zcb_haderrors) {
+ if (zcb.zcb_haderrors && !dump_opt['S']) {
(void) printf("\nError counts:\n\n");
(void) printf("\t%5s %s\n", "errno", "count");
for (e = 0; e < 256; e++) {
@@ -1548,7 +1658,15 @@ dump_block_stats(spa_t *spa)
* Report any leaked segments.
*/
if (!dump_opt['L'])
- zdb_space_map_unload(spa);
+ zdb_leak_fini(spa);
+
+ /*
+ * If we're interested in printing out the blkptr signatures,
+ * return now as we don't print out anything else (including
+ * errors and leaks).
+ */
+ if (dump_opt['S'])
+ return (zcb.zcb_haderrors ? 3 : 0);
if (dump_opt['L'])
(void) printf("\n\n *** Live pool traversal; "
@@ -1557,17 +1675,27 @@ dump_block_stats(spa_t *spa)
alloc = spa_get_alloc(spa);
space = spa_get_space(spa);
+ /*
+ * Log blocks allocated from a separate log device don't count
+ * as part of the normal pool space; factor them in here.
+ */
+ logalloc = 0;
+
+ for (c = 0; c < rvd->vdev_children; c++)
+ if (rvd->vdev_child[c]->vdev_islog)
+ logalloc += rvd->vdev_child[c]->vdev_stat.vs_alloc;
+
tzb = &zcb.zcb_type[ZB_TOTAL][DMU_OT_TOTAL];
- if (tzb->zb_asize == alloc) {
+ if (tzb->zb_asize == alloc + logalloc) {
(void) printf("\n\tNo leaks (block sum matches space"
" maps exactly)\n");
} else {
(void) printf("block traversal size %llu != alloc %llu "
"(leaked %lld)\n",
(u_longlong_t)tzb->zb_asize,
- (u_longlong_t)alloc,
- (u_longlong_t)(alloc - tzb->zb_asize));
+ (u_longlong_t)alloc + logalloc,
+ (u_longlong_t)(alloc + logalloc - tzb->zb_asize));
leaks = 1;
}
@@ -1682,11 +1810,11 @@ dump_zpool(spa_t *spa)
dump_dtl(spa->spa_root_vdev, 0);
dump_metaslabs(spa);
}
- (void) dmu_objset_find(spa->spa_name, dump_one_dir, NULL,
+ (void) dmu_objset_find(spa_name(spa), dump_one_dir, NULL,
DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
}
- if (dump_opt['b'] || dump_opt['c'])
+ if (dump_opt['b'] || dump_opt['c'] || dump_opt['S'])
rc = dump_block_stats(spa);
if (dump_opt['s'])
@@ -1898,12 +2026,12 @@ zdb_read_block(char *thing, spa_t **spap)
zio_t *zio;
vdev_t *vd;
void *buf;
- char *s, *p, *dup, *spa_name, *vdev, *flagstr;
+ char *s, *p, *dup, *pool, *vdev, *flagstr;
int i, error, zio_flags;
dup = strdup(thing);
s = strtok(dup, ":");
- spa_name = s ? s : "";
+ pool = s ? s : "";
s = strtok(NULL, ":");
vdev = s ? s : "";
s = strtok(NULL, ":");
@@ -1953,14 +2081,13 @@ zdb_read_block(char *thing, spa_t **spap)
}
}
- if (spa == NULL || spa->spa_name == NULL ||
- strcmp(spa->spa_name, spa_name)) {
- if (spa && spa->spa_name)
+ if (spa == NULL || strcmp(spa_name(spa), pool) != 0) {
+ if (spa)
spa_close(spa, (void *)zdb_read_block);
- error = spa_open(spa_name, spap, (void *)zdb_read_block);
+ error = spa_open(pool, spap, (void *)zdb_read_block);
if (error)
fatal("Failed to open pool '%s': %s",
- spa_name, strerror(error));
+ pool, strerror(error));
spa = *spap;
}
@@ -1980,16 +2107,15 @@ zdb_read_block(char *thing, spa_t **spap)
buf = umem_alloc(size, UMEM_NOFAIL);
zio_flags = ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
- ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY | ZIO_FLAG_NOBOOKMARK;
-
- if (flags & ZDB_FLAG_PHYS)
- zio_flags |= ZIO_FLAG_PHYSICAL;
+ ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY;
+ spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
zio = zio_root(spa, NULL, NULL, 0);
/* XXX todo - cons up a BP so RAID-Z will be happy */
zio_nowait(zio_vdev_child_io(zio, NULL, vd, offset, buf, size,
ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, zio_flags, NULL, NULL));
error = zio_wait(zio);
+ spa_config_exit(spa, SCL_STATE, FTAG);
if (error) {
(void) printf("Read of %s failed, error: %d\n", thing, error);
@@ -2014,6 +2140,132 @@ out:
free(dup);
}
+static boolean_t
+nvlist_string_match(nvlist_t *config, char *name, char *tgt)
+{
+ char *s;
+
+ if (nvlist_lookup_string(config, name, &s) != 0)
+ return (B_FALSE);
+
+ return (strcmp(s, tgt) == 0);
+}
+
+static boolean_t
+nvlist_uint64_match(nvlist_t *config, char *name, uint64_t tgt)
+{
+ uint64_t val;
+
+ if (nvlist_lookup_uint64(config, name, &val) != 0)
+ return (B_FALSE);
+
+ return (val == tgt);
+}
+
+static boolean_t
+vdev_child_guid_match(nvlist_t *vdev, uint64_t guid)
+{
+ nvlist_t **child;
+ uint_t c, children;
+
+ verify(nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) == 0);
+ for (c = 0; c < children; ++c)
+ if (nvlist_uint64_match(child[c], ZPOOL_CONFIG_GUID, guid))
+ return (B_TRUE);
+ return (B_FALSE);
+}
+
+static boolean_t
+vdev_child_string_match(nvlist_t *vdev, char *tgt)
+{
+ nvlist_t **child;
+ uint_t c, children;
+
+ verify(nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) == 0);
+ for (c = 0; c < children; ++c) {
+ if (nvlist_string_match(child[c], ZPOOL_CONFIG_PATH, tgt) ||
+ nvlist_string_match(child[c], ZPOOL_CONFIG_DEVID, tgt))
+ return (B_TRUE);
+ }
+ return (B_FALSE);
+}
+
+static boolean_t
+vdev_guid_match(nvlist_t *config, uint64_t guid)
+{
+ nvlist_t *nvroot;
+
+ verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+ &nvroot) == 0);
+
+ return (nvlist_uint64_match(nvroot, ZPOOL_CONFIG_GUID, guid) ||
+ vdev_child_guid_match(nvroot, guid));
+}
+
+static boolean_t
+vdev_string_match(nvlist_t *config, char *tgt)
+{
+ nvlist_t *nvroot;
+
+ verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+ &nvroot) == 0);
+
+ return (vdev_child_string_match(nvroot, tgt));
+}
+
+static boolean_t
+pool_match(nvlist_t *config, char *tgt)
+{
+ uint64_t guid = strtoull(tgt, NULL, 0);
+
+ if (guid != 0) {
+ return (
+ nvlist_uint64_match(config, ZPOOL_CONFIG_POOL_GUID, guid) ||
+ vdev_guid_match(config, guid));
+ } else {
+ return (
+ nvlist_string_match(config, ZPOOL_CONFIG_POOL_NAME, tgt) ||
+ vdev_string_match(config, tgt));
+ }
+}
+
+static int
+find_exported_zpool(char *pool_id, nvlist_t **configp, char *vdev_dir)
+{
+ nvlist_t *pools;
+ int error = ENOENT;
+ nvlist_t *match = NULL;
+
+ if (vdev_dir != NULL)
+ pools = zpool_find_import_activeok(g_zfs, 1, &vdev_dir);
+ else
+ pools = zpool_find_import_activeok(g_zfs, 0, NULL);
+
+ if (pools != NULL) {
+ nvpair_t *elem = NULL;
+
+ while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
+ verify(nvpair_value_nvlist(elem, configp) == 0);
+ if (pool_match(*configp, pool_id)) {
+ if (match != NULL) {
+ (void) fatal(
+ "More than one matching pool - "
+ "specify guid/devid/device path.");
+ } else {
+ match = *configp;
+ error = 0;
+ }
+ }
+ }
+ }
+
+ *configp = error ? NULL : match;
+
+ return (error);
+}
+
int
main(int argc, char **argv)
{
@@ -2026,13 +2278,15 @@ main(int argc, char **argv)
int verbose = 0;
int error;
int flag, set;
+ int exported = 0;
+ char *vdev_dir = NULL;
(void) setrlimit(RLIMIT_NOFILE, &rl);
(void) enable_extended_FILE_stdio(-1, -1);
dprintf_setup(&argc, argv);
- while ((c = getopt(argc, argv, "udibcsvCLO:B:UlR")) != -1) {
+ while ((c = getopt(argc, argv, "udibcsvCLO:B:S:U:lRep:")) != -1) {
switch (c) {
case 'u':
case 'd':
@@ -2093,7 +2347,31 @@ main(int argc, char **argv)
verbose++;
break;
case 'U':
- spa_config_dir = "/tmp";
+ spa_config_path = optarg;
+ break;
+ case 'e':
+ exported = 1;
+ break;
+ case 'p':
+ vdev_dir = optarg;
+ break;
+ case 'S':
+ dump_opt[c]++;
+ dump_all = 0;
+ zdb_sig_user_data = (strncmp(optarg, "user:", 5) == 0);
+ if (!zdb_sig_user_data && strncmp(optarg, "all:", 4))
+ usage();
+ endstr = strchr(optarg, ':') + 1;
+ if (strcmp(endstr, "fletcher2") == 0)
+ zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_2;
+ else if (strcmp(endstr, "fletcher4") == 0)
+ zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_4;
+ else if (strcmp(endstr, "sha256") == 0)
+ zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
+ else if (strcmp(endstr, "all") == 0)
+ zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_2;
+ else
+ usage();
break;
default:
usage();
@@ -2101,7 +2379,12 @@ main(int argc, char **argv)
}
}
+ if (vdev_dir != NULL && exported == 0)
+ (void) fatal("-p option requires use of -e\n");
+
kernel_init(FREAD);
+ g_zfs = libzfs_init();
+ ASSERT(g_zfs != NULL);
/*
* Disable vdev caching. If we don't do this, live pool traversal
@@ -2121,7 +2404,7 @@ main(int argc, char **argv)
if (argc < 1) {
if (dump_opt['C']) {
- dump_config(NULL);
+ dump_cachefile(spa_config_path);
return (0);
}
usage();
@@ -2156,11 +2439,48 @@ main(int argc, char **argv)
if (dump_opt['C'])
dump_config(argv[0]);
- if (strchr(argv[0], '/') != NULL) {
- error = dmu_objset_open(argv[0], DMU_OST_ANY,
- DS_MODE_STANDARD | DS_MODE_READONLY, &os);
- } else {
- error = spa_open(argv[0], &spa, FTAG);
+ error = 0;
+ if (exported) {
+ /*
+ * Check to see if the name refers to an exported zpool
+ */
+ char *slash;
+ nvlist_t *exported_conf = NULL;
+
+ if ((slash = strchr(argv[0], '/')) != NULL)
+ *slash = '\0';
+
+ error = find_exported_zpool(argv[0], &exported_conf, vdev_dir);
+ if (error == 0) {
+ nvlist_t *nvl = NULL;
+
+ if (vdev_dir != NULL) {
+ if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
+ error = ENOMEM;
+ else if (nvlist_add_string(nvl,
+ zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
+ vdev_dir) != 0)
+ error = ENOMEM;
+ }
+
+ if (error == 0)
+ error = spa_import_faulted(argv[0],
+ exported_conf, nvl);
+
+ nvlist_free(nvl);
+ }
+
+ if (slash != NULL)
+ *slash = '/';
+ }
+
+ if (error == 0) {
+ if (strchr(argv[0], '/') != NULL) {
+ error = dmu_objset_open(argv[0], DMU_OST_ANY,
+ DS_MODE_USER | DS_MODE_READONLY, &os);
+ } else {
+ error = spa_open(argv[0], &spa, FTAG);
+ }
}
if (error)
@@ -2187,6 +2507,9 @@ main(int argc, char **argv)
spa_close(spa, FTAG);
}
+ fuid_table_destroy();
+
+ libzfs_fini(g_zfs);
kernel_fini();
return (0);
diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c b/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
index 10dfe20..02d35a0 100644
--- a/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
+++ b/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -233,19 +233,26 @@ typedef struct zil_rec_info {
} zil_rec_info_t;
static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = {
- { NULL, "Total " },
- { zil_prt_rec_create, "TX_CREATE " },
- { zil_prt_rec_create, "TX_MKDIR " },
- { zil_prt_rec_create, "TX_MKXATTR " },
- { zil_prt_rec_create, "TX_SYMLINK " },
- { zil_prt_rec_remove, "TX_REMOVE " },
- { zil_prt_rec_remove, "TX_RMDIR " },
- { zil_prt_rec_link, "TX_LINK " },
- { zil_prt_rec_rename, "TX_RENAME " },
- { zil_prt_rec_write, "TX_WRITE " },
- { zil_prt_rec_truncate, "TX_TRUNCATE" },
- { zil_prt_rec_setattr, "TX_SETATTR " },
- { zil_prt_rec_acl, "TX_ACL " },
+ { NULL, "Total " },
+ { zil_prt_rec_create, "TX_CREATE " },
+ { zil_prt_rec_create, "TX_MKDIR " },
+ { zil_prt_rec_create, "TX_MKXATTR " },
+ { zil_prt_rec_create, "TX_SYMLINK " },
+ { zil_prt_rec_remove, "TX_REMOVE " },
+ { zil_prt_rec_remove, "TX_RMDIR " },
+ { zil_prt_rec_link, "TX_LINK " },
+ { zil_prt_rec_rename, "TX_RENAME " },
+ { zil_prt_rec_write, "TX_WRITE " },
+ { zil_prt_rec_truncate, "TX_TRUNCATE " },
+ { zil_prt_rec_setattr, "TX_SETATTR " },
+ { zil_prt_rec_acl, "TX_ACL_V0 " },
+ { zil_prt_rec_acl, "TX_ACL_ACL " },
+ { zil_prt_rec_create, "TX_CREATE_ACL " },
+ { zil_prt_rec_create, "TX_CREATE_ATTR " },
+ { zil_prt_rec_create, "TX_CREATE_ACL_ATTR " },
+ { zil_prt_rec_create, "TX_MKDIR_ACL " },
+ { zil_prt_rec_create, "TX_MKDIR_ATTR " },
+ { zil_prt_rec_create, "TX_MKDIR_ACL_ATTR " },
};
/* ARGSUSED */
@@ -255,12 +262,14 @@ print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
int txtype;
int verbose = MAX(dump_opt['d'], dump_opt['i']);
+ /* reduce size of txtype to strip off TX_CI bit */
txtype = lr->lrc_txtype;
ASSERT(txtype != 0 && (uint_t)txtype < TX_MAX_TYPE);
ASSERT(lr->lrc_txg);
- (void) printf("\t\t%s len %6llu, txg %llu, seq %llu\n",
+ (void) printf("\t\t%s%s len %6llu, txg %llu, seq %llu\n",
+ (lr->lrc_txtype & TX_CI) ? "CI-" : "",
zil_rec_info[txtype].zri_name,
(u_longlong_t)lr->lrc_reclen,
(u_longlong_t)lr->lrc_txg,
diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs.8 b/cddl/contrib/opensolaris/cmd/zfs/zfs.8
index d49cb87..7217fe8 100644
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs.8
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs.8
@@ -18,7 +18,7 @@
.\"
.\" CDDL HEADER END
.\" Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
-.TH zfs 1M "16 Mar 2007" "SunOS 5.11" "System Administration Commands"
+.TH zfs 1M "8 Apr 2008" "SunOS 5.11" "System Administration Commands"
.SH NAME
zfs \- configures ZFS file systems
.SH SYNOPSIS
@@ -29,12 +29,12 @@ zfs \- configures ZFS file systems
.LP
.nf
-\fBzfs\fR \fBcreate\fR [[\fB-o\fR property=\fIvalue\fR]]... \fIfilesystem\fR
+\fBzfs\fR \fBcreate\fR [\fB-p\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIfilesystem\fR
.fi
.LP
.nf
-\fBzfs\fR \fBcreate\fR [\fB-s\fR] [\fB-b\fR \fIblocksize\fR] [[\fB-o\fR property=\fIvalue\fR]]... \fB-V\fR \fIsize\fR \fIvolume\fR
+\fBzfs\fR \fBcreate\fR [\fB-ps\fR] [\fB-b\fR \fIblocksize\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fB-V\fR \fIsize\fR \fIvolume\fR
.fi
.LP
@@ -44,35 +44,44 @@ zfs \- configures ZFS file systems
.LP
.nf
-\fBzfs\fR \fBclone\fR \fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR
+\fBzfs\fR \fBsnapshot\fR [\fB-r\fR] \fIfilesystem@snapname\fR|\fIvolume@snapname\fR
.fi
.LP
.nf
-\fBzfs\fR \fBpromote\fR \fIfilesystem\fR
+\fBzfs\fR \fBrollback\fR [\fB-rRf\fR] \fIsnapshot\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBclone\fR [\fB-p\fR] \fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBpromote\fR \fIclone-filesystem\fR
.fi
.LP
.nf
\fBzfs\fR \fBrename\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
- [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR]
+ \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
.fi
.LP
.nf
-\fBzfs\fR \fBsnapshot\fR [\fB-r\fR] \fIfilesystem@name\fR|\fIvolume@name\fR
+\fBzfs\fR \fBrename\fR [\fB-p\fR] \fIfilesystem\fR|\fIvolume\fR \fIfilesystem\fR|\fIvolume\fR
.fi
.LP
.nf
-\fBzfs\fR \fBrollback\fR [\fB-rRf\fR] \fIsnapshot\fR
+\fBzfs\fR \fBrename\fR \fB-r\fR \fIsnapshot\fR \fIsnapshot\fR
.fi
.LP
.nf
-\fBzfs\fR \fBlist\fR [\fB-rH\fR] [\fB-o\fR \fIprop\fR[,\fIprop\fR] ]... [ \fB-t\fR \fItype\fR[,\fItype\fR]...]
- [ \fB-s\fR \fIprop\fR [\fB-s\fR \fIprop\fR]... [ \fB-S\fR \fIprop\fR [\fB-S\fR \fIprop\fR]...
- [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR|\fI/pathname\fR|.\fI/pathname\fR ...
+\fBzfs\fR \fBlist\fR [\fB-rH\fR] [\fB-o\fR \fIproperty\fR[,...]] [\fB-t\fR \fItype\fR[,...]]
+ [\fB-s\fR \fIproperty\fR] ... [\fB-S\fR \fIproperty\fR ... [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR] ...
.fi
.LP
@@ -82,14 +91,23 @@ zfs \- configures ZFS file systems
.LP
.nf
-\fBzfs\fR \fBget\fR [\fB-rHp\fR] [\fB-o\fR \fIfield\fR[,\fIfield\fR]...]
- [\fB-s\fR \fIsource\fR[,\fIsource\fR]...] \fIall\fR | \fIproperty\fR[,\fIproperty\fR]...
+\fBzfs\fR \fBget\fR [\fB-rHp\fR] [\fB-o\fR \fIfield\fR[,...]] [\fB-s\fR \fIsource\fR[,...]] "\fIall\fR" | \fIproperty\fR[,...]
\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...
.fi
.LP
.nf
-\fBzfs\fR \fBinherit\fR [\fB-r\fR] \fIproperty\fR \fIfilesystem\fR|\fIvolume\fR... ...
+\fBzfs\fR \fBinherit\fR [\fB-r\fR] \fIproperty\fR \fIfilesystem\fR|\fIvolume\fR ...
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBupgrade\fR [\fB-v\fR]
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBupgrade\fR [\fB-r\fR] [\fB-V\fR \fIversion\fR] \fB-a\fR | \fIfilesystem\fR
.fi
.LP
@@ -99,58 +117,81 @@ zfs \- configures ZFS file systems
.LP
.nf
-\fBzfs\fR \fBmount\fR [\fB-o \fIoptions\fR\fR] [\fB-O\fR] \fB-a\fR
+\fBzfs\fR \fBmount\fR [\fB-vO\fR] [\fB-o \fIoptions\fR\fR] \fB-a\fR | \fIfilesystem\fR
.fi
.LP
.nf
-\fBzfs\fR \fBmount\fR [\fB-o \fIoptions\fR\fR] [\fB-O\fR] \fIfilesystem\fR
+\fBzfs\fR \fBunmount\fR [\fB-f\fR] \fB-a\fR | \fIfilesystem\fR|\fImountpoint\fR
.fi
.LP
.nf
-\fBzfs\fR \fBunmount\fR [\fB-f\fR] \fB-a\fR
+\fBzfs\fR \fBshare\fR \fB-a\fR | \fIfilesystem\fR
.fi
.LP
.nf
-\fBzfs\fR \fBunmount\fR [\fB-f\fR] \fB\fIfilesystem\fR|\fImountpoint\fR\fR
+\fBzfs\fR \fBunshare\fR \fB-a\fR \fIfilesystem\fR|\fImountpoint\fR
.fi
.LP
.nf
-\fBzfs\fR \fBshare\fR \fB-a\fR
+\fBzfs\fR \fBsend\fR [\fB-vR\fR] [\fB-\fR[\fB-iI\fR] \fIsnapshot\fR] \fIsnapshot\fR
.fi
.LP
.nf
-\fBzfs\fR \fBshare\fR \fIfilesystem\fR
+\fBzfs\fR \fBreceive\fR [\fB-vnF\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
.fi
.LP
.nf
-\fBzfs\fR \fBunshare\fR [\fB-f\fR] \fB-a\fR
+\fBzfs\fR \fBreceive\fR [\fB-vnF\fR] \fB-d\fR \fIfilesystem\fR
.fi
.LP
.nf
-\fBzfs\fR \fBunshare\fR [\fB-f\fR] \fB\fIfilesystem\fR|\fImountpoint\fR\fR
+\fBzfs\fR \fBallow\fR [\fB-ldug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...] \fIperm\fR|\fI@setname\fR[,...]
+ \fIfilesystem\fR|\fIvolume\fR
.fi
.LP
.nf
-\fBzfs\fR \fBsend\fR [\fB-i\fR \fIsnapshot1\fR] \fB\fIsnapshot2\fR\fR
+\fBzfs\fR \fBallow\fR [\fB-ld\fR] \fB-e\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR
.fi
.LP
.nf
-\fBzfs\fR \fBreceive\fR [\fB-vnF\fR ] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
+\fBzfs\fR \fBallow\fR \fB-c\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR
.fi
.LP
.nf
-\fBzfs\fR \fBreceive\fR [\fB-vnF\fR ] \fB-d\fR \fB\fIfilesystem\fR\fR
+\fBzfs\fR \fBallow\fR \fB-s\fR @setname \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR
.fi
+
+.LP
+.nf
+\fBzfs\fR \fBunallow\fR [\fB-rldug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...] [\fIperm\fR|@\fIsetname\fR[,... ]]
+ \fIfilesystem\fR|\fIvolume\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBunallow\fR [\fB-rld\fR] \fB-e\fR [\fIperm\fR|@\fIsetname\fR[,... ]] \fIfilesystem\fR|\fIvolume\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBunallow\fR [\fB-r\fR] \fB-c\fR [\fIperm\fR|@\fIsetname\fR[ ... ]] \fIfilesystem\fR|\fIvolume\fR
+.fi
+
+.LP
+.nf
+\fBzfs\fR \fBunallow\fR [\fB-r\fR] \fB-s\fR @setname [\fIperm\fR|@\fIsetname\fR[,... ]] \fIfilesystem\fR|\fIvolume\fR
+.fi
+
.LP
.nf
\fBzfs\fR \fBjail\fR \fBjailid\fR \fB\fIfilesystem\fR\fR
@@ -161,6 +202,7 @@ zfs \- configures ZFS file systems
.fi
.SH DESCRIPTION
+.sp
.LP
The \fBzfs\fR command configures \fBZFS\fR datasets within a \fBZFS\fR storage pool, as described in \fBzpool\fR(1M). A
dataset is identified by a unique path within the \fBZFS\fR namespace. For example:
@@ -172,8 +214,10 @@ pool/{filesystem,volume,snapshot}
.in -2
.sp
+.sp
.LP
where the maximum length of a dataset name is \fBMAXNAMELEN\fR (256 bytes).
+.sp
.LP
A dataset can be one of the following:
.sp
@@ -210,65 +254,88 @@ A read-only version of a file system or volume at a given point in time. It is s
.RE
.SS "ZFS File System Hierarchy"
+.sp
.LP
A \fBZFS\fR storage pool is a logical collection of devices that provide space for datasets. A storage pool is also the root of the \fBZFS\fR file system hierarchy.
+.sp
.LP
The root of the pool can be accessed as a file system, such as mounting and unmounting, taking snapshots, and setting properties. The physical storage characteristics, however, are managed by the \fBzpool\fR(1M) command.
+.sp
.LP
See \fBzpool\fR(1M) for more information on creating and administering pools.
.SS "Snapshots"
+.sp
.LP
A snapshot is a read-only copy of a file system or volume. Snapshots can be created extremely quickly, and initially consume no additional space within the pool. As data within the active dataset changes, the snapshot consumes more data than would otherwise be shared with the active dataset.
+.sp
.LP
Snapshots can have arbitrary names. Snapshots of volumes can be cloned or rolled back, but cannot be accessed independently.
+.sp
.LP
File system snapshots can be accessed under the ".zfs/snapshot" directory in the root of the file system. Snapshots are automatically mounted on demand and may be unmounted at regular intervals. The visibility of the ".zfs" directory can be controlled by the "snapdir"
property.
.SS "Clones"
+.sp
.LP
A clone is a writable volume or file system whose initial contents are the same as another dataset. As with snapshots, creating a clone is nearly instantaneous, and initially consumes no additional space.
+.sp
.LP
Clones can only be created from a snapshot. When a snapshot is cloned, it creates an implicit dependency between the parent and child. Even though the clone is created somewhere else in the dataset hierarchy, the original snapshot cannot be destroyed as long as a clone exists. The "origin"
property exposes this dependency, and the \fBdestroy\fR command lists any such dependencies, if they exist.
+.sp
.LP
The clone parent-child dependency relationship can be reversed by using the "\fBpromote\fR" subcommand. This causes the "origin" file system to become a clone of the specified file system, which makes it possible to destroy the file system that the clone
was created from.
.SS "Mount Points"
+.sp
.LP
-Creating a \fBZFS\fR file system is a simple operation, so the number of file systems per system will likely be numerous. To cope with this, \fBZFS\fR automatically manages mounting and unmounting file systems without the need to edit the \fB/etc/vfstab\fR file.
-All automatically managed file systems are mounted by \fBZFS\fR at boot time.
+Creating a \fBZFS\fR file system is a simple operation, so the number of file systems per system is likely to be numerous. To cope with this, \fBZFS\fR automatically manages mounting and unmounting file systems without the need to edit the \fB/etc/vfstab\fR file. All automatically managed file systems are mounted by \fBZFS\fR at boot time.
+.sp
.LP
By default, file systems are mounted under \fB/\fIpath\fR\fR, where \fIpath\fR is the name of the file system in the \fBZFS\fR namespace. Directories are created and destroyed as needed.
+.sp
.LP
A file system can also have a mount point set in the "mountpoint" property. This directory is created as needed, and \fBZFS\fR automatically mounts the file system when the "\fBzfs mount -a\fR" command is invoked (without editing \fB/etc/vfstab\fR). The mountpoint property can be inherited, so if \fBpool/home\fR has a mount point of \fB/export/stuff\fR, then \fBpool/home/user\fR automatically inherits a mount point of \fB/export/stuff/user\fR.
+.sp
.LP
A file system mountpoint property of "none" prevents the file system from being mounted.
+.sp
.LP
If needed, \fBZFS\fR file systems can also be managed with traditional tools (\fBmount\fR, \fBumount\fR, \fB/etc/vfstab\fR). If a file system's mount point is set to "legacy", \fBZFS\fR makes no attempt to manage
the file system, and the administrator is responsible for mounting and unmounting the file system.
.SS "Zones"
+.sp
.LP
A \fBZFS\fR file system can be added to a non-global zone by using zonecfg's "\fBadd fs\fR" subcommand. A \fBZFS\fR file system that is added to a non-global zone must have its mountpoint property set to legacy.
+.sp
.LP
The physical properties of an added file system are controlled by the global administrator. However, the zone administrator can create, modify, or destroy files within the added file system, depending on how the file system is mounted.
+.sp
.LP
A dataset can also be delegated to a non-global zone by using zonecfg's "\fBadd dataset\fR" subcommand. You cannot delegate a dataset to one zone and the children of the same dataset to another zone. The zone administrator can change properties of the dataset or
any of its children. However, the "quota" property is controlled by the global administrator.
+.sp
.LP
A \fBZFS\fR volume can be added as a device to a non-global zone by using zonecfg's "\fBadd device\fR" subcommand. However, its physical properties can only be modified by the global administrator.
+.sp
.LP
For more information about \fBzonecfg\fR syntax, see \fBzonecfg\fR(1M).
+.sp
.LP
After a dataset is delegated to a non-global zone, the "zoned" property is automatically set. A zoned file system cannot be mounted in the global zone, since the zone administrator might have to set the mount point to an unacceptable value.
+.sp
.LP
The global administrator can forcibly clear the "zoned" property, though this should be done with extreme care. The global administrator should verify that all the mount points are acceptable before clearing the property.
.SS "Native Properties"
+.sp
.LP
Properties are divided into two types, native properties and user defined properties. Native properties either export internal statistics or control \fBZFS\fR behavior. In addition, native properties are either editable or read-only. User properties have no effect on \fBZFS\fR behavior,
but you can use them to annotate datasets in a way that is meaningful in your environment. For more information about user properties, see the "User Properties" section.
+.sp
.LP
Every dataset has a set of properties that export statistics about the dataset as well as control various behavior. Properties are inherited from the parent unless overridden by the child. Snapshot properties can not be edited; they always inherit their inheritable properties. Properties
that are not applicable to snapshots are not displayed.
+.sp
.LP
The values of numeric properties can be specified using the following human-readable suffixes (for example, "k", "KB", "M", "Gb", etc, up to Z for zettabyte). The following are all valid (and equal) specifications:
.sp
@@ -279,19 +346,35 @@ The values of numeric properties can be specified using the following human-read
.in -2
.sp
+.sp
.LP
-The values of non-numeric properties are case sensitive and must be lowercase, except for "mountpoint" and "sharenfs".
+The values of non-numeric properties are case sensitive and must be lowercase, except for "mountpoint", "sharenfs" and "sharesmb".
+.sp
.LP
-The first set of properties consist of read-only statistics about the dataset. These properties cannot be set, nor are they inherited. Native properties apply to all dataset types unless otherwise noted.
+The following native properties consist of read-only statistics about the dataset. These properties cannot be set, nor are they inherited. Native properties apply to all dataset types unless otherwise noted.
.sp
.ne 2
.mk
.na
-\fBtype\fR
+\fBavailable\fR
.ad
-.RS 17n
-.rt
-The type of dataset: "filesystem", "volume", "snapshot", or "clone".
+.sp .6
+.RS 4n
+The amount of space available to the dataset and all its children, assuming that there is no other activity in the pool. Because space is shared within a pool, availability can be limited by any number of factors, including physical pool size, quotas, reservations, or other datasets
+within the pool.
+.sp
+This property can also be referred to by its shortened column name, "avail".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBcompressratio\fR
+.ad
+.sp .6
+.RS 4n
+The compression ratio achieved for this dataset, expressed as a multiplier. Compression can be turned on by running "zfs set compression=on \fIdataset\fR". The default value is "off".
.RE
.sp
@@ -300,8 +383,8 @@ The type of dataset: "filesystem", "volume", "snapshot", or "clone".
.na
\fBcreation\fR
.ad
-.RS 17n
-.rt
+.sp .6
+.RS 4n
The time this dataset was created.
.RE
@@ -309,31 +392,22 @@ The time this dataset was created.
.ne 2
.mk
.na
-\fBused\fR
+\fBmounted\fR
.ad
-.RS 17n
-.rt
-The amount of space consumed by this dataset and all its descendants. This is the value that is checked against this dataset's quota and reservation. The space used does not include this dataset's reservation, but does take into account the reservations of any descendant datasets.
-The amount of space that a dataset consumes from its parent, as well as the amount of space that will be freed if this dataset is recursively destroyed, is the greater of its space used and its reservation.
-.sp
-When snapshots (see the "Snapshots" section) are created, their space is initially shared between the snapshot and the file system, and possibly with previous snapshots. As the file system changes, space that was previously shared becomes unique to the snapshot, and counted in
-the snapshot's space used. Additionally, deleting snapshots can increase the amount of space unique to (and used by) other snapshots.
-.sp
-The amount of space used, available, or referenced does not take into account pending changes. Pending changes are generally accounted for within a few seconds. Committing a change to a disk using \fBfsync\fR(3c) or \fBO_SYNC\fR does not necessarily guarantee that the space usage information is updated immediately.
+.sp .6
+.RS 4n
+For file systems, indicates whether the file system is currently mounted. This property can be either "yes" or "no".
.RE
.sp
.ne 2
.mk
.na
-\fBavailable\fR
+\fBorigin\fR
.ad
-.RS 17n
-.rt
-The amount of space available to the dataset and all its children, assuming that there is no other activity in the pool. Because space is shared within a pool, availability can be limited by any number of factors, including physical pool size, quotas, reservations, or other datasets
-within the pool.
-.sp
-This property can also be referred to by its shortened column name, "avail".
+.sp .6
+.RS 4n
+For cloned file systems or volumes, the snapshot from which the clone was created. The origin cannot be destroyed (even with the \fB-r\fR or \fB-f\fR options) so long as a clone exists.
.RE
.sp
@@ -342,8 +416,8 @@ This property can also be referred to by its shortened column name, "avail".
.na
\fBreferenced\fR
.ad
-.RS 17n
-.rt
+.sp .6
+.RS 4n
The amount of data that is accessible by this dataset, which may or may not be shared with other datasets in the pool. When a snapshot or clone is created, it initially references the same amount of space as the file system or snapshot it was created from, since its contents are
identical.
.sp
@@ -354,116 +428,166 @@ This property can also be referred to by its shortened column name, "refer".
.ne 2
.mk
.na
-\fBcompressratio\fR
+\fBtype\fR
.ad
-.RS 17n
-.rt
-The compression ratio achieved for this dataset, expressed as a multiplier. Compression can be turned on by running "zfs set compression=on \fIdataset\fR". The default value is "off".
+.sp .6
+.RS 4n
+The type of dataset: "filesystem", "volume", "snapshot", or "clone".
.RE
.sp
.ne 2
.mk
.na
-\fBmounted\fR
+\fBused\fR
.ad
-.RS 17n
-.rt
-For file systems, indicates whether the file system is currently mounted. This property can be either "yes" or "no".
+.sp .6
+.RS 4n
+The amount of space consumed by this dataset and all its descendents. This is the value that is checked against this dataset's quota and reservation. The space used does not include this dataset's reservation, but does take into account the reservations of any descendent datasets.
+The amount of space that a dataset consumes from its parent, as well as the amount of space that are freed if this dataset is recursively destroyed, is the greater of its space used and its reservation.
+.sp
+When snapshots (see the "Snapshots" section) are created, their space is initially shared between the snapshot and the file system, and possibly with previous snapshots. As the file system changes, space that was previously shared becomes unique to the snapshot, and counted in
+the snapshot's space used. Additionally, deleting snapshots can increase the amount of space unique to (and used by) other snapshots.
+.sp
+The amount of space used, available, or referenced does not take into account pending changes. Pending changes are generally accounted for within a few seconds. Committing a change to a disk using \fBfsync\fR(3c) or \fBO_SYNC\fR does not necessarily guarantee that the space usage information is updated immediately.
.RE
.sp
.ne 2
.mk
.na
-\fBorigin\fR
+\fBvolblocksize=\fIblocksize\fR\fR
.ad
-.RS 17n
-.rt
-For cloned file systems or volumes, the snapshot from which the clone was created. The origin cannot be destroyed (even with the \fB-r\fR or \fB-f\fR options) so long as a clone exists.
+.sp .6
+.RS 4n
+For volumes, specifies the block size of the volume. The \fBblocksize\fR cannot be changed once the volume has been written, so it should be set at volume creation time. The default \fBblocksize\fR for volumes is 8 Kbytes. Any power of 2 from 512 bytes
+to 128 Kbytes is valid.
+.sp
+This property can also be referred to by its shortened column name, "volblock".
.RE
+.sp
.LP
-The following two properties can be set to control the way space is allocated between datasets. These properties are not inherited, but do affect their descendants.
+The following native properties can be used to change the behavior of a \fBZFS\fR dataset.
.sp
.ne 2
.mk
.na
-\fBquota=\fIsize\fR | \fInone\fR\fR
+\fBaclinherit=\fBdiscard\fR | \fBnoallow\fR | \fBrestricted\fR | \fBpassthrough\fR\fR
.ad
.sp .6
.RS 4n
-Limits the amount of space a dataset and its descendants can consume. This property enforces a hard limit on the amount of space used. This includes all space consumed by descendants, including file systems and snapshots. Setting a quota on a descendant of a dataset that already
-has a quota does not override the ancestor's quota, but rather imposes an additional limit.
+Controls how \fBACL\fR entries are inherited when files and directories are created. A file system with an "aclinherit" property of "\fBdiscard\fR" does not inherit any \fBACL\fR entries. A file system with an "aclinherit"
+property value of "\fBnoallow\fR" only inherits inheritable \fBACL\fR entries that specify "deny" permissions. The property value "\fBrestricted\fR" (the default) removes the "\fBwrite_acl\fR" and "\fBwrite_owner\fR" permissions when the \fBACL\fR entry is inherited. A file system with an "aclinherit" property value of "\fBpassthrough\fR" inherits all inheritable \fBACL\fR entries without any modifications made to the \fBACL\fR entries when they are inherited.
.sp
-Quotas cannot be set on volumes, as the "volsize" property acts as an implicit quota.
+When the property value is set to "\fBpassthrough\fR," files are created with a mode determined by the inheritable \fBACE\fRs. If no inheritable \fBACE\fRs exist that affect the mode, then the mode is set in accordance to the requested mode
+from the application.
.RE
.sp
.ne 2
.mk
.na
-\fBreservation=\fIsize\fR | \fInone\fR\fR
+\fBaclmode=\fBdiscard\fR | \fBgroupmask\fR | \fBpassthrough\fR\fR
.ad
.sp .6
.RS 4n
-The minimum amount of space guaranteed to a dataset and its descendants. When the amount of space used is below this value, the dataset is treated as if it were taking up the amount of space specified by its reservation. Reservations are accounted for in the parent datasets' space
-used, and count against the parent datasets' quotas and reservations.
+Controls how an \fBACL\fR is modified during \fBchmod\fR(2). A file system with an "aclmode" property of "\fBdiscard\fR"
+deletes all \fBACL\fR entries that do not represent the mode of the file. An "aclmode" property of "\fBgroupmask\fR" (the default) reduces user or group permissions. The permissions are reduced, such that they are no greater than the group permission
+bits, unless it is a user entry that has the same \fBUID\fR as the owner of the file or directory. In this case, the \fBACL\fR permissions are reduced so that they are no greater than owner permission bits. A file system with an "aclmode" property of "\fBpassthrough\fR" indicates that no changes are made to the \fBACL\fR other than generating the necessary \fBACL\fR entries to represent the new mode of the file or directory.
+.RE
+
.sp
-This property can also be referred to by its shortened column name, "reserv".
+.ne 2
+.mk
+.na
+\fBatime=\fIon\fR | \fIoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls whether the access time for files is updated when they are read. Turning this property off avoids producing write traffic when reading files and can result in significant performance gains, though it might confuse mailers and other similar utilities. The default value
+is "on".
.RE
.sp
.ne 2
.mk
.na
-\fBvolsize=\fIsize\fR\fR
+\fBcanmount=\fBon\fR | \fBoff\fR | \fBnoauto\fR\fR
.ad
.sp .6
.RS 4n
-For volumes, specifies the logical size of the volume. By default, creating a volume establishes a reservation of equal size. Any changes to \fBvolsize\fR are reflected in an equivalent change to the reservation. The \fBvolsize\fR can only be set to a
-multiple of \fBvolblocksize\fR, and cannot be zero.
+If this property is set to "\fBoff\fR", the file system cannot be mounted, and is ignored by "\fBzfs mount -a\fR". Setting this property to "\fBoff\fR" is similar to setting the "mountpoint"
+property to "\fBnone\fR", except that the dataset still has a normal "mountpoint" property, which can be inherited. Setting this property to "\fBoff\fR" allows datasets to be used solely as a mechanism to inherit properties. One example
+of setting canmount=\fBoff\fR is to have two datasets with the same mountpoint, so that the children of both datasets appear in the same directory, but might have different inherited characteristics.
.sp
-The reservation is kept equal to the volume's logical size to prevent unexpected behavior for consumers. Without the reservation, the volume could run out of space, resulting in undefined behavior or data corruption, depending on how the volume is used. These effects can also occur when
-the volume size is changed while it is in use (particularly when shrinking the size). Extreme care should be used when adjusting the volume size.
+When the "\fBnoauto\fR" option is set, a dataset can only be mounted and unmounted explicitly. The dataset is not mounted automatically when the dataset is created or imported, nor is it mounted by the "\fBzfs mount -a\fR" command or unmounted
+by the "\fBzfs unmount -a\fR" command.
.sp
-Though not recommended, a "sparse volume" (also known as "thin provisioning") can be created by specifying the \fB-s\fR option to the "\fBzfs create -V\fR" command, or by changing the reservation after the volume has been created.
-A "sparse volume" is a volume where the reservation is less then the volume size. Consequently, writes to a sparse volume can fail with \fBENOSPC\fR when the pool is low on space. For a sparse volume, changes to \fBvolsize\fR are not reflected in the reservation.
+This property is not inherited.
.RE
.sp
.ne 2
.mk
.na
-\fBvolblocksize=\fIblocksize\fR\fR
+\fBchecksum=\fIon\fR | \fIoff\fR | \fIfletcher2\fR, | \fIfletcher4\fR | \fIsha256\fR\fR
.ad
.sp .6
.RS 4n
-For volumes, specifies the block size of the volume. The \fBblocksize\fR cannot be changed once the volume has been written, so it should be set at volume creation time. The default \fBblocksize\fR for volumes is 8 Kbytes. Any power of 2 from 512 bytes
-to 128 Kbytes is valid.
-.sp
-This property can also be referred to by its shortened column name, "volblock".
+Controls the checksum used to verify data integrity. The default value is "on", which automatically selects an appropriate algorithm (currently, \fIfletcher2\fR, but this may change in future releases). The value "off" disables integrity
+checking on user data. Disabling checksums is NOT a recommended practice.
.RE
.sp
.ne 2
.mk
.na
-\fBrecordsize=\fIsize\fR\fR
+\fBcompression=\fIon\fR | \fIoff\fR | \fIlzjb\fR | \fIgzip\fR | \fIgzip-N\fR\fR
.ad
.sp .6
.RS 4n
-Specifies a suggested block size for files in the file system. This property is designed solely for use with database workloads that access files in fixed-size records. \fBZFS\fR automatically tunes block sizes according to internal algorithms optimized for typical
-access patterns.
+Controls the compression algorithm used for this dataset. The "lzjb" compression algorithm is optimized for performance while providing decent data compression. Setting compression to "on" uses the "lzjb" compression algorithm. The "gzip"
+compression algorithm uses the same compression as the \fBgzip\fR(1) command. You can specify the "gzip" level by using the value "gzip-\fIN\fR" where \fIN\fR is
+an integer from 1 (fastest) to 9 (best compression ratio). Currently, "gzip" is equivalent to "gzip-6" (which is also the default for \fBgzip\fR(1)).
.sp
-For databases that create very large files but access them in small random chunks, these algorithms may be suboptimal. Specifying a "recordsize" greater than or equal to the record size of the database can result in significant performance gains. Use of this property for general
-purpose file systems is strongly discouraged, and may adversely affect performance.
+This property can also be referred to by its shortened column name "compress".
+.RE
+
.sp
-The size specified must be a power of two greater than or equal to 512 and less than or equal to 128 Kbytes.
+.ne 2
+.mk
+.na
+\fBcopies=\fB1\fR | \fB2\fR | \fB3\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls the number of copies of data stored for this dataset. These copies are in addition to any redundancy provided by the pool, for example, mirroring or raid-z. The copies are stored on different disks, if possible. The space used by multiple copies is charged to the associated
+file and dataset, changing the "used" property and counting against quotas and reservations.
.sp
-Changing the file system's \fBrecordsize\fR only affects files created afterward; existing files are unaffected.
+Changing this property only affects newly-written data. Therefore, set this property at file system creation time by using the "\fB-o\fR copies=" option.
+.RE
+
.sp
-This property can also be referred to by its shortened column name, "recsize".
+.ne 2
+.mk
+.na
+\fBdevices=\fIon\fR | \fIoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls whether device nodes can be opened on this file system. The default value is "on".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBexec=\fIon\fR | \fIoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls whether processes can be executed from within this file system. The default value is "on".
.RE
.sp
@@ -484,89 +608,100 @@ previously "legacy" or "none", or if they were mounted before the property was c
.ne 2
.mk
.na
-\fBsharenfs=\fIon\fR | \fIoff\fR | \fIopts\fR\fR
+\fBnbmand=\fIon\fR | \fIoff\fR\fR
.ad
.sp .6
.RS 4n
-Controls whether the file system is shared via \fBNFS\fR, and what options are used. A file system with a sharenfs property of "off" is managed through traditional tools such as \fBshare\fR(1M), \fBunshare\fR(1M), and \fBdfstab\fR(4). Otherwise, the file system is automatically shared and unshared with the "\fBzfs share\fR" and "\fBzfs unshare\fR" commands. If the property is set to "on", the \fBshare\fR(1M) command is invoked with no options. Otherwise, the \fBshare\fR(1M) command is invoked with options equivalent to the contents of this property.
-.sp
-When the "sharenfs" property is changed for a dataset, the dataset and any children inheriting the property are re-shared with the new options, only if the property was previously "off", or if they were shared before the property was changed. If the new property is "off",
-the file systems are unshared.
+Controls whether the file system should be mounted with "\fBnbmand\fR" (Non Blocking mandatory locks). This is used for \fBCIFS\fR clients. Changes to this property only take effect when the file system is umounted and remounted. See \fBmount\fR(1M) for more information on "\fBnbmand\fR" mounts.
.RE
.sp
.ne 2
.mk
.na
-\fBshareiscsi=\fIon\fR | \fIoff\fR\fR
+\fBquota=\fIsize\fR | \fInone\fR\fR
.ad
.sp .6
.RS 4n
-Like the "sharenfs" property, "shareiscsi" indicates whether a \fBZFS\fR volume is exported as an \fBiSCSI\fR target. The acceptable values for this property are "on", "off", and "type=disk".
-The default value is "off". In the future, other target types might be supported. For example, "tape".
+Limits the amount of space a dataset and its descendents can consume. This property enforces a hard limit on the amount of space used. This includes all space consumed by descendents, including file systems and snapshots. Setting a quota on a descendent of a dataset that already
+has a quota does not override the ancestor's quota, but rather imposes an additional limit.
.sp
-You might want to set "shareiscsi=on" for a file system so that all \fBZFS\fR volumes within the file system are shared by default. Setting this property on a file system has no direct effect, however.
+Quotas cannot be set on volumes, as the "volsize" property acts as an implicit quota.
.RE
.sp
.ne 2
.mk
.na
-\fBchecksum=\fIon\fR | \fIoff\fR | \fIfletcher2\fR, | \fIfletcher4\fR | \fIsha256\fR\fR
+\fBreadonly=\fIon\fR | \fIoff\fR\fR
.ad
.sp .6
.RS 4n
-Controls the checksum used to verify data integrity. The default value is "on", which automatically selects an appropriate algorithm (currently, \fIfletcher2\fR, but this may change in future releases). The value "off" disables integrity
-checking on user data. Disabling checksums is NOT a recommended practice.
+Controls whether this dataset can be modified. The default value is "off".
+.sp
+This property can also be referred to by its shortened column name, "rdonly".
.RE
.sp
.ne 2
.mk
.na
-\fBcompression=\fIon\fR | \fIoff\fR | \fIlzjb\fR | \fIgzip\fR | \fIgzip-N\fR\fR
+\fBrecordsize=\fIsize\fR\fR
.ad
.sp .6
.RS 4n
-Controls the compression algorithm used for this dataset. The "lzjb" compression algorithm is optimized for performance while providing decent data compression. Setting compression to "on" uses the "lzjb" compression algorithm. The "gzip"
-compression algorithm uses the same compression as the \fBgzip\fR(1) command. You can specify the "gzip" level by using the value "gzip-\fIN\fR",
-where \fIN\fR is an integer from 1 (fastest) to 9 (best compression ratio). Currently, "gzip" is equivalent to "gzip-6" (which is also the default for \fBgzip\fR(1)).
+Specifies a suggested block size for files in the file system. This property is designed solely for use with database workloads that access files in fixed-size records. \fBZFS\fR automatically tunes block sizes according to internal algorithms optimized for typical
+access patterns.
.sp
-This property can also be referred to by its shortened column name "compress".
+For databases that create very large files but access them in small random chunks, these algorithms may be suboptimal. Specifying a "recordsize" greater than or equal to the record size of the database can result in significant performance gains. Use of this property for general
+purpose file systems is strongly discouraged, and may adversely affect performance.
+.sp
+The size specified must be a power of two greater than or equal to 512 and less than or equal to 128 Kbytes.
+.sp
+Changing the file system's \fBrecordsize\fR only affects files created afterward; existing files are unaffected.
+.sp
+This property can also be referred to by its shortened column name, "recsize".
.RE
.sp
.ne 2
.mk
.na
-\fBatime=\fIon\fR | \fIoff\fR\fR
+\fBrefquota=\fIsize\fR | \fInone\fR\fR
.ad
.sp .6
.RS 4n
-Controls whether the access time for files is updated when they are read. Turning this property off avoids producing write traffic when reading files and can result in significant performance gains, though it might confuse mailers and other similar utilities. The default value
-is "on".
+Limits the amount of space a dataset can consume. This property enforces a hard limit on the amount of space used. This hard limit does not include space used by descendents, including file systems and snapshots.
.RE
.sp
.ne 2
.mk
.na
-\fBdevices=\fIon\fR | \fIoff\fR\fR
+\fBrefreservation=\fIsize\fR | \fInone\fR\fR
.ad
.sp .6
.RS 4n
-Controls whether device nodes can be opened on this file system. The default value is "on".
+The minimum amount of space guaranteed to a dataset, not including its descendents. When the amount of space used is below this value, the dataset is treated as if it were taking up the amount of space specified by \fBrefreservation\fR. The \fBrefreservation\fR reservation
+is accounted for in the parent datasets' space used, and counts against the parent datasets' quotas and reservations.
+.sp
+If \fBrefreservation\fR is set, a snapshot is only allowed if there is enough free pool space outside of this reservation to accommodate the current number of "referenced" bytes in the dataset.
+.sp
+This property can also be referred to by its shortened column name, "refreserv".
.RE
.sp
.ne 2
.mk
.na
-\fBexec=\fIon\fR | \fIoff\fR\fR
+\fBreservation=\fIsize\fR | \fInone\fR\fR
.ad
.sp .6
.RS 4n
-Controls whether processes can be executed from within this file system. The default value is "on".
+The minimum amount of space guaranteed to a dataset and its descendents. When the amount of space used is below this value, the dataset is treated as if it were taking up the amount of space specified by its reservation. Reservations are accounted for in the parent datasets' space
+used, and count against the parent datasets' quotas and reservations.
+.sp
+This property can also be referred to by its shortened column name, "reserv".
.RE
.sp
@@ -584,24 +719,48 @@ Controls whether the set-\fBUID\fR bit is respected for the file system. The def
.ne 2
.mk
.na
-\fBreadonly=\fIon\fR | \fIoff\fR\fR
+\fBshareiscsi=\fIon\fR | \fIoff\fR\fR
.ad
.sp .6
.RS 4n
-Controls whether this dataset can be modified. The default value is "off".
+Like the "sharenfs" property, "shareiscsi" indicates whether a \fBZFS\fR volume is exported as an \fBiSCSI\fR target. The acceptable values for this property are "on", "off", and "type=disk".
+The default value is "off". In the future, other target types might be supported. For example, "tape".
.sp
-This property can also be referred to by its shortened column name, "rdonly".
+You might want to set "shareiscsi=on" for a file system so that all \fBZFS\fR volumes within the file system are shared by default. Setting this property on a file system has no direct effect, however.
.RE
.sp
.ne 2
.mk
.na
-\fBzoned=\fIon\fR | \fIoff\fR\fR
+\fBsharesmb=\fIon\fR | \fIoff\fR | \fIopts\fR\fR
.ad
.sp .6
.RS 4n
-Controls whether the dataset is managed from a non-global zone. See the "Zones" section for more information. The default value is "off".
+Controls whether the file system is shared by using the Solaris \fBCIFS\fR service, and what options are to be used. A file system with the "\fBsharesmb\fR" property set to "off" is managed through traditional tools such as \fBsharemgr\fR(1M). Otherwise, the file system is automatically shared and unshared with the "zfs share" and "zfs unshare" commands. If the property is set to "on",
+the \fBsharemgr\fR(1M) command is invoked with no options. Otherwise, the \fBsharemgr\fR(1M) command is invoked with options equivalent to the contents of this property.
+.sp
+Because \fBSMB\fR shares requires a resource name, a unique resource name is constructed from the dataset name. The constructed name is a copy of the dataset name except that the characters in the dataset name, which would be illegal in the resource name, are replaced with underscore
+(_) characters. A pseudo property "name" is also supported that allows you to replace the data set name with a specified name. The specified name is then used to replace the prefix dataset in the case of inheritance. For example, if the dataset "\fBdata/home/john\fR"
+is set to "name=john", then "\fBdata/home/john\fR" has a resource name of "john". If a child dataset of "\fBdata/home/john/backups\fR", it has a resource name of "john_backups".
+.sp
+When the "sharesmb" property is changed for a dataset, the dataset and any children inheriting the property are re-shared with the new options, only if the property was previously set to "off", or if they were shared before the property was changed. If the new property
+is set to "off", the file systems are unshared.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBsharenfs=\fIon\fR | \fIoff\fR | \fIopts\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls whether the file system is shared via \fBNFS\fR, and what options are used. A file system with a"\fBsharenfs\fR" property of "off" is managed through traditional tools such as \fBshare\fR(1M), \fBunshare\fR(1M), and \fBdfstab\fR(4). Otherwise, the file system is automatically shared and unshared with the "\fBzfs share\fR" and "\fBzfs unshare\fR" commands. If the property is set to "on",
+the \fBshare\fR(1M) command is invoked with no options. Otherwise, the \fBshare\fR(1M) command is invoked with options equivalent to the contents of this property.
+.sp
+When the "sharenfs" property is changed for a dataset, the dataset and any children inheriting the property are re-shared with the new options, only if the property was previously "off", or if they were shared before the property was changed. If the new property is "off",
+the file systems are unshared.
.RE
.sp
@@ -619,40 +778,40 @@ Controls whether the ".zfs" directory is hidden or visible in the root of the fi
.ne 2
.mk
.na
-\fBaclmode=\fBdiscard\fR | \fBgroupmask\fR | \fBpassthrough\fR\fR
+\fBversion=\fB1\fR|\fB2\fR|\fBcurrent\fR\fR
.ad
.sp .6
.RS 4n
-Controls how an \fBACL\fR is modified during \fBchmod\fR(2). A file system with an "aclmode" property of "\fBdiscard\fR"
-deletes all \fBACL\fR entries that do not represent the mode of the file. An "aclmode" property of "\fBgroupmask\fR" (the default) reduces user or group permissions. The permissions are reduced, such that they are no greater than the group permission
-bits, unless it is a user entry that has the same \fBUID\fR as the owner of the file or directory. In this case, the \fBACL\fR permissions are reduced so that they are no greater than owner permission bits. A file system with an "aclmode" property of "\fBpassthrough\fR" indicates that no changes will be made to the \fBACL\fR other than generating the necessary \fBACL\fR entries to represent the new mode of the file or directory.
+The on-disk version of this file system, which is independent of the pool version. This property can only be set to later supported versions. See "\fBzfs upgrade\fR".
.RE
.sp
.ne 2
.mk
.na
-\fBaclinherit=\fBdiscard\fR | \fBnoallow\fR | \fBsecure\fR | \fBpassthrough\fR\fR
+\fBvolsize=\fIsize\fR\fR
.ad
.sp .6
.RS 4n
-Controls how \fBACL\fR entries are inherited when files and directories are created. A file system with an "aclinherit" property of "\fBdiscard\fR" does not inherit any \fBACL\fR entries. A file system with an "aclinherit"
-property value of "\fBnoallow\fR" only inherits inheritable \fBACL\fR entries that specify "deny" permissions. The property value "\fBsecure\fR" (the default) removes the "\fBwrite_acl\fR" and "\fBwrite_owner\fR" permissions when the \fBACL\fR entry is inherited. A file system with an "aclinherit" property value of "\fBpassthrough\fR" inherits all inheritable \fBACL\fR entries without any modifications made to the \fBACL\fR entries when they are inherited.
+For volumes, specifies the logical size of the volume. By default, creating a volume establishes a reservation of equal size. For storage pools with a version number of 9 or higher, a \fBrefreservation\fR is set instead. Any changes to \fBvolsize\fR are
+reflected in an equivalent change to the reservation (or \fBrefreservation\fR). The \fBvolsize\fR can only be set to a multiple of \fBvolblocksize\fR, and cannot be zero.
+.sp
+The reservation is kept equal to the volume's logical size to prevent unexpected behavior for consumers. Without the reservation, the volume could run out of space, resulting in undefined behavior or data corruption, depending on how the volume is used. These effects can also occur when
+the volume size is changed while it is in use (particularly when shrinking the size). Extreme care should be used when adjusting the volume size.
+.sp
+Though not recommended, a "sparse volume" (also known as "thin provisioning") can be created by specifying the \fB-s\fR option to the "\fBzfs create -V\fR" command, or by changing the reservation after the volume has been created.
+A "sparse volume" is a volume where the reservation is less then the volume size. Consequently, writes to a sparse volume can fail with \fBENOSPC\fR when the pool is low on space. For a sparse volume, changes to \fBvolsize\fR are not reflected in the reservation.
.RE
.sp
.ne 2
.mk
.na
-\fBcanmount=\fBon\fR | \fBoff\fR\fR
+\fBvscan=\fBon\fR|\fBoff\fR\fR
.ad
.sp .6
.RS 4n
-If this property is set to "\fBoff\fR", the file system cannot be mounted, and is ignored by "\fBzfs mount -a\fR". This is similar to setting the "mountpoint" property to "\fBnone\fR", except
-that the dataset still has a normal "mountpoint" property which can be inherited. This allows datasets to be used solely as a mechanism to inherit properties. One use case is to have two logically separate datasets have the same mountpoint, so that the children of both datasets appear
-in the same directory, but may have different inherited characteristics. The default value is "\fBon\fR".
-.sp
-This property is not inherited.
+Controls whether regular files should be scanned for viruses when a file is opened and closed. In addition to enabling this property, the virus scan service must also be enabled for virus scanning to occur. The default value is "off".
.RE
.sp
@@ -670,31 +829,74 @@ Controls whether extended attributes are enabled for this file system. The defau
.ne 2
.mk
.na
-\fBcopies=\fB1\fR | \fB2\fR | \fB3\fR\fR
+\fBzoned=\fIon\fR | \fIoff\fR\fR
.ad
.sp .6
.RS 4n
-Controls the number of copies of data stored for this dataset. These copies are in addition to any redundancy provided by the pool, for example, mirroring or raid-z. The copies are stored on different disks, if possible. The space used by multiple copies is charged to the associated
-file and dataset, changing the "used" property and counting against quotas and reservations.
+Controls whether the dataset is managed from a non-global zone. See the "Zones" section for more information. The default value is "off".
+.RE
+
.sp
-Changing this property only affects newly-written data. Therefore, set this property at file system creation time by using the "\fB-o\fR copies=" option.
+.LP
+The following three properties cannot be changed after the file system is created, and therefore, should be set when the file system is created. If the properties are not set with the "\fBzfs create\fR" command, these properties are inherited from the parent dataset.
+If the parent dataset lacks these properties due to having been created prior to these features being supported, the new file system will have the default values for these properties.
+.sp
+.ne 2
+.mk
+.na
+\fBcasesensitivity = \fBsensitive\fR | \fBinsensitive\fR | \fBmixed\fR\fR
+.ad
+.sp .6
+.RS 4n
+Indicates whether the file name matching algorithm used by the file system should be case-sensitive, case-insensitive, or allow a combination of both styles of matching. The default value for the "\fBcasesensitivity\fR" property is "\fBsensitive\fR."
+Traditionally, UNIX and POSIX file systems have case-sensitive file names.
+.sp
+The "\fBmixed\fR" value for the "\fBcasesensitivity\fR" property indicates that the file system can support requests for both case-sensitive and case-insensitive matching behavior. Currently, case-insensitive matching behavior on a file system
+that supports mixed behavior is limited to the Solaris CIFS server product. For more information about the "mixed" value behavior, see the \fIZFS Administration Guide\fR.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBnormalization =\fBnone\fR | \fBformD\fR | \fBformKCf\fR\fR
+.ad
+.sp .6
+.RS 4n
+Indicates whether the file system should perform a \fBunicode\fR normalization of file names whenever two file names are compared, and which normalization algorithm should be used. File names are always stored unmodified, names are normalized as part of any comparison
+process. If this property is set to a legal value other than "\fBnone\fR," and the "\fButf8only\fR" property was left unspecified, the "\fButf8only\fR" property is automatically set to "\fBon\fR."
+The default value of the "\fBnormalization\fR" property is "\fBnone\fR." This property cannot be changed after the file system is created.
.RE
.sp
.ne 2
.mk
.na
-\fBjailed=\fIon\fR | \fIoff\fR\fR
+\fBjailed =\fIon\fR | \fIoff\fR\fR
.ad
.sp .6
.RS 4n
Controls whether the dataset is managed from within a jail. The default value is "off".
.RE
-.SS "iscsioptions"
+.sp
+.ne 2
+.mk
+.na
+\fButf8only =\fBon\fR | \fBoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+Indicates whether the file system should reject file names that include characters that are not present in the \fBUTF-8\fR character code set. If this property is explicitly set to "\fBoff\fR," the normalization property must either not be
+explicitly set or be set to "\fBnone\fR." The default value for the "\fButf8only\fR" property is "off." This property cannot be changed after the file system is created.
+.RE
+
+.sp
.LP
-This read-only property, which is hidden, is used by the \fBiSCSI\fR target daemon to store persistent information, such as the \fBIQN\fR. It cannot be viewed or modified using the \fBzfs\fR command. The contents are not intended for external consumers.
+The "\fBcasesensitivity\fR," "\fBnormalization\fR," and "\fButf8only\fR" properties are also new permissions that can be assigned to non-privileged users by using the \fBZFS\fR delegated administration
+feature.
.SS "Temporary Mount Point Properties"
+.sp
.LP
When a file system is mounted, either through \fBmount\fR(1M) for legacy mounts or the "\fBzfs mount\fR" command for normal file systems,
its mount options are set according to its properties. The correlation between properties and mount options is as follows:
@@ -711,30 +913,39 @@ its mount options are set according to its properties. The correlation between p
.in -2
.sp
+.sp
.LP
In addition, these options can be set on a per-mount basis using the \fB-o\fR option, without affecting the property that is stored on disk. The values specified on the command line override the values stored in the dataset. The \fB-nosuid\fR option is an alias for "nodevices,nosetuid".
These properties are reported as "temporary" by the "\fBzfs get\fR" command. If the properties are changed while the dataset is mounted, the new setting overrides any temporary settings.
.SS "User Properties"
+.sp
.LP
In addition to the standard native properties, \fBZFS\fR supports arbitrary user properties. User properties have no effect on \fBZFS\fR behavior, but applications or administrators can use them to annotate datasets.
+.sp
.LP
User property names must contain a colon (":") character, to distinguish them from native properties. They might contain lowercase letters, numbers, and the following punctuation characters: colon (":"), dash ("-"), period ("."), and underscore
("_"). The expected convention is that the property name is divided into two portions such as "\fImodule\fR:\fIproperty\fR", but this namespace is not enforced by \fBZFS\fR. User property names can be at most 256 characters,
and cannot begin with a dash ("-").
+.sp
.LP
When making programmatic use of user properties, it is strongly suggested to use a reversed \fBDNS\fR domain name for the \fImodule\fR component of property names to reduce the chance that two independently-developed packages use the same property name for
different purposes. Property names beginning with "com.sun." are reserved for use by Sun Microsystems.
+.sp
.LP
The values of user properties are arbitrary strings, are always inherited, and are never validated. All of the commands that operate on properties ("zfs list", "zfs get", "zfs set", etc.) can be used to manipulate both native properties and user properties.
Use the "\fBzfs inherit\fR" command to clear a user property . If the property is not defined in any parent dataset, it is removed entirely. Property values are limited to 1024 characters.
.SS "Volumes as Swap or Dump Devices"
+.sp
.LP
To set up a swap area, create a \fBZFS\fR volume of a specific size and then enable swap on that device. For more information, see the EXAMPLES section.
+.sp
.LP
Do not swap to a file on a \fBZFS\fR file system. A \fBZFS\fR swap file configuration is not supported.
+.sp
.LP
Using a \fBZFS\fR volume as a dump device is not supported.
.SH SUBCOMMANDS
+.sp
.LP
All subcommands that modify state are logged persistently to the pool in their original form.
.sp
@@ -752,7 +963,7 @@ Displays a help message.
.ne 2
.mk
.na
-\fB\fBzfs create\fR [[\fB-o\fR property=value]...] \fIfilesystem\fR\fR
+\fB\fBzfs create\fR [\fB-p\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIfilesystem\fR\fR
.ad
.sp .6
.RS 4n
@@ -761,7 +972,19 @@ Creates a new \fBZFS\fR file system. The file system is automatically mounted ac
.ne 2
.mk
.na
-\fB\fB-o\fR property=value\fR
+\fB\fB-p\fR\fR
+.ad
+.RS 21n
+.rt
+Creates all the non-existing parent datasets. Datasets created in this manner are automatically mounted according to the "mountpoint" property inherited from their parent. Any property specified on the command line using the \fB-o\fR option is ignored. If
+the target filesystem already exists, the operation completes successfully.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-o\fR \fIproperty\fR=\fIvalue\fR\fR
.ad
.RS 21n
.rt
@@ -775,7 +998,7 @@ error results if the same property is specified in multiple \fB-o\fR options.
.ne 2
.mk
.na
-\fB\fBzfs create\fR [\fB-s\fR] [\fB-b\fR \fIblocksize\fR] [[\fB-o\fR property=value]...] \fB-V\fR \fIsize\fR \fIvolume\fR\fR
+\fB\fBzfs create\fR [\fB-ps\fR] [\fB-b\fR \fIblocksize\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fB-V\fR \fIsize\fR \fIvolume\fR\fR
.ad
.sp .6
.RS 4n
@@ -787,6 +1010,18 @@ the logical size as exported by the device. By default, a reservation of equal s
.ne 2
.mk
.na
+\fB\fB-p\fR\fR
+.ad
+.RS 21n
+.rt
+Creates all the non-existing parent datasets. Datasets created in this manner are automatically mounted according to the "mountpoint" property inherited from their parent. Any property specified on the command line using the \fB-o\fR option is ignored. If
+the target filesystem already exists, the operation completes successfully.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
\fB\fB-s\fR\fR
.ad
.RS 21n
@@ -798,7 +1033,7 @@ Creates a sparse volume with no reservation. See "volsize" in the Native Propert
.ne 2
.mk
.na
-\fB\fB-o\fR property=value\fR
+\fB\fB-o\fR \fIproperty\fR=\fIvalue\fR\fR
.ad
.RS 21n
.rt
@@ -837,7 +1072,7 @@ Destroys the given dataset. By default, the command unshares any file systems th
.ad
.RS 6n
.rt
-Recursively destroy all children. If a snapshot is specified, destroy all snapshots with this name in descendant file systems.
+Recursively destroy all children. If a snapshot is specified, destroy all snapshots with this name in descendent file systems.
.RE
.sp
@@ -848,7 +1083,7 @@ Recursively destroy all children. If a snapshot is specified, destroy all snapsh
.ad
.RS 6n
.rt
-Recursively destroy all dependents, including cloned file systems outside the target hierarchy. If a snapshot is specified, destroy all snapshots with this name in descendant file systems.
+Recursively destroy all dependents, including cloned file systems outside the target hierarchy. If a snapshot is specified, destroy all snapshots with this name in descendent file systems.
.RE
.sp
@@ -869,58 +1104,65 @@ Extreme care should be taken when applying either the \fB-r\fR or the \fB-f\fR o
.ne 2
.mk
.na
-\fB\fBzfs clone\fR \fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR\fR
+\fB\fBzfs snapshot\fR [\fB-r\fR] \fIfilesystem@snapname\fR|\fIvolume@snapname\fR\fR
.ad
.sp .6
.RS 4n
-Creates a clone of the given snapshot. See the "Clones" section for details. The target dataset can be located anywhere in the \fBZFS\fR hierarchy, and is created as the same type as the original.
-.RE
-
+Creates a snapshot with the given name. See the "Snapshots" section for details.
.sp
.ne 2
.mk
.na
-\fB\fBzfs promote\fR \fIfilesystem\fR\fR
+\fB\fB-r\fR\fR
.ad
-.sp .6
-.RS 4n
-Promotes a clone file system to no longer be dependent on its "origin" snapshot. This makes it possible to destroy the file system that the clone was created from. The clone parent-child dependency relationship is reversed, so that the "origin" file system
-becomes a clone of the specified file system.
-.sp
-The snaphot that was cloned, and any snapshots previous to this snapshot, are now owned by the promoted clone. The space they use moves from the "origin" file system to the promoted clone, so enough space must be available to accommodate these snapshots. No new space is consumed
-by this operation, but the space accounting is adjusted. The promoted clone must not have any conflicting snapshot names of its own. The "\fBrename\fR" subcommand can be used to rename any conflicting snapshots.
+.RS 6n
+.rt
+Recursively create snapshots of all descendent datasets. Snapshots are taken atomically, so that all recursive snapshots correspond to the same moment in time.
+.RE
+
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzfs rename\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
+\fB\fBzfs rollback\fR [\fB-rRf\fR] \fIsnapshot\fR\fR
.ad
.sp .6
.RS 4n
-Renames the given dataset. The new target can be located anywhere in the \fBZFS\fR hierarchy, with the exception of snapshots. Snapshots can only be renamed within the parent file system or volume. When renaming a snapshot, the parent file system of the snapshot does
-not need to be specified as part of the second argument. Renamed file systems can inherit new mount points, in which case they are unmounted and remounted at the new mount point.
+Roll back the given dataset to a previous snapshot. When a dataset is rolled back, all data that has changed since the snapshot is discarded, and the dataset reverts to the state at the time of the snapshot. By default, the command refuses to roll back to a snapshot other than
+the most recent one. In order to do so, all intermediate snapshots must be destroyed by specifying the \fB-r\fR option.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-r\fR\fR
+.ad
+.RS 6n
+.rt
+Recursively destroy any snapshots more recent than the one specified.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzfs snapshot\fR [\fB-r\fR] \fIfilesystem@name\fR|\fIvolume@name\fR\fR
+\fB\fB-R\fR\fR
.ad
-.sp .6
-.RS 4n
-Creates a snapshot with the given name. See the "Snapshots" section for details.
+.RS 6n
+.rt
+Recursively destroy any more recent snapshots, as well as any clones of those snapshots.
+.RE
+
.sp
.ne 2
.mk
.na
-\fB\fB-r\fR\fR
+\fB\fB-f\fR\fR
.ad
.RS 6n
.rt
-Recursively create snapshots of all descendant datasets. Snapshots are taken atomically, so that all recursive snapshots correspond to the same moment in time.
+Used with the \fB-R\fR option to force an unmount of any clone file systems that are to be destroyed.
.RE
.RE
@@ -929,43 +1171,67 @@ Recursively create snapshots of all descendant datasets. Snapshots are taken ato
.ne 2
.mk
.na
-\fB\fBzfs rollback\fR [\fB-rRf\fR] \fIsnapshot\fR\fR
+\fB\fBzfs clone\fR [\fB-p\fR] \fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR\fR
.ad
.sp .6
.RS 4n
-Roll back the given dataset to a previous snapshot. When a dataset is rolled back, all data that has changed since the snapshot is discarded, and the dataset reverts to the state at the time of the snapshot. By default, the command refuses to roll back to a snapshot other than
-the most recent one. In order to do so, all intermediate snapshots must be destroyed by specifying the \fB-r\fR option. The file system is unmounted and remounted, if necessary.
+Creates a clone of the given snapshot. See the "Clones" section for details. The target dataset can be located anywhere in the \fBZFS\fR hierarchy, and is created as the same type as the original.
.sp
.ne 2
.mk
.na
-\fB\fB-r\fR\fR
+\fB\fB-p\fR\fR
.ad
.RS 6n
.rt
-Recursively destroy any snapshots more recent than the one specified.
+Creates all the non-existing parent datasets. Datasets created in this manner are automatically mounted according to the "mountpoint" property inherited from their parent. If the target filesystem or volume already exists, the operation completes successfully.
+.RE
+
.RE
.sp
.ne 2
.mk
.na
-\fB\fB-R\fR\fR
+\fB\fBzfs promote\fR \fIclone-filesystem\fR\fR
.ad
-.RS 6n
-.rt
-Recursively destroy any more recent snapshots, as well as any clones of those snapshots.
+.sp .6
+.RS 4n
+Promotes a clone file system to no longer be dependent on its "origin" snapshot. This makes it possible to destroy the file system that the clone was created from. The clone parent-child dependency relationship is reversed, so that the "origin" file system
+becomes a clone of the specified file system.
+.sp
+The snapshot that was cloned, and any snapshots previous to this snapshot, are now owned by the promoted clone. The space they use moves from the "origin" file system to the promoted clone, so enough space must be available to accommodate these snapshots. No new space is consumed
+by this operation, but the space accounting is adjusted. The promoted clone must not have any conflicting snapshot names of its own. The "\fBrename\fR" subcommand can be used to rename any conflicting snapshots.
.RE
.sp
.ne 2
.mk
.na
-\fB\fB-f\fR\fR
+\fB\fBzfs rename\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
+.ad
+.br
+.na
+\fB\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
+.ad
+.br
+.na
+\fB\fBzfs
+rename\fR [\fB-p\fR] \fIfilesystem\fR|\fIvolume\fR \fIfilesystem\fR|\fIvolume\fR\fR
+.ad
+.sp .6
+.RS 4n
+Renames the given dataset. The new target can be located anywhere in the \fBZFS\fR hierarchy, with the exception of snapshots. Snapshots can only be renamed within the parent file system or volume. When renaming a snapshot, the parent file system of the snapshot does
+not need to be specified as part of the second argument. Renamed file systems can inherit new mount points, in which case they are unmounted and remounted at the new mount point.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-p\fR\fR
.ad
.RS 6n
.rt
-Force an unmount of any file systems using the "\fBunmount -f\fR" command.
+Creates all the non-existing parent datasets. Datasets created in this manner are automatically mounted according to the "mountpoint" property inherited from their parent.
.RE
.RE
@@ -974,7 +1240,22 @@ Force an unmount of any file systems using the "\fBunmount -f\fR" command.
.ne 2
.mk
.na
-\fB\fBzfs\fR \fBlist\fR [\fB-rH\fR] [\fB-o\fR \fIprop\fR[,\fIprop\fR] ]... [ \fB-t\fR \fItype\fR[,\fItype\fR]...] [ \fB-s\fR \fIprop\fR [\fB-s\fR \fIprop\fR]... [ \fB-S\fR \fIprop\fR [\fB-S\fR \fIprop\fR]... [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR|\fI/pathname\fR|.\fI/pathname\fR ...\fR
+\fB\fBzfs rename\fR \fB-r\fR \fIsnapshot\fR \fIsnapshot\fR\fR
+.ad
+.sp .6
+.RS 4n
+Recursively rename the snapshots of all descendent datasets. Snapshots are the only dataset that can be renamed recursively.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs\fR \fBlist\fR [\fB-rH\fR] [\fB-o\fR \fIproperty\fR[,\fI\&...\fR]] [ \fB-t\fR \fItype\fR[,\fI\&...\fR]]\fR
+.ad
+.br
+.na
+\fB[ \fB-s\fR \fIproperty\fR ] ... [ \fB-S\fR \fIproperty\fR ] ... [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR] ...\fR
.ad
.sp .6
.RS 4n
@@ -993,7 +1274,7 @@ name,used,available,referenced,mountpoint
.na
\fB\fB-H\fR\fR
.ad
-.RS 11n
+.RS 15n
.rt
Used for scripting mode. Do not print headers and separate fields by a single tab instead of arbitrary whitespace.
.RE
@@ -1004,7 +1285,7 @@ Used for scripting mode. Do not print headers and separate fields by a single ta
.na
\fB\fB-r\fR\fR
.ad
-.RS 11n
+.RS 15n
.rt
Recursively display any children of the dataset on the command line.
.RE
@@ -1013,9 +1294,9 @@ Recursively display any children of the dataset on the command line.
.ne 2
.mk
.na
-\fB\fB-o\fR \fIprop\fR\fR
+\fB\fB-o\fR \fIproperty\fR\fR
.ad
-.RS 11n
+.RS 15n
.rt
A comma-separated list of properties to display. The property must be one of the properties described in the "Native Properties" section, or the special value "name" to display the dataset name.
.RE
@@ -1024,12 +1305,12 @@ A comma-separated list of properties to display. The property must be one of the
.ne 2
.mk
.na
-\fB\fB-s\fR \fIprop\fR\fR
+\fB\fB-s\fR \fIproperty\fR\fR
.ad
-.RS 11n
+.RS 15n
.rt
A property to use for sorting the output by column in ascending order based on the value of the property. The property must be one of the properties described in the "Properties" section, or the special value "name" to sort by the dataset name. Multiple
-properties can be specified at one time using multiple \fB-s\fR property options. Multiple \fB-s\fR options are evaluated from left to right in decreasing order of importance.
+properties can be specified at one time using multiple \fB-s\fR property options. Multiple \fB-s\fR options are evaluated from left to right in decreasing order of importance.
.sp
The following is a list of sorting criteria:
.RS +4
@@ -1062,9 +1343,9 @@ If no sorting options are specified the existing behavior of "\fBzfs list\fR" is
.ne 2
.mk
.na
-\fB\fB-S\fR \fIprop\fR\fR
+\fB\fB-S\fR \fIproperty\fR\fR
.ad
-.RS 11n
+.RS 15n
.rt
Same as the \fB-s\fR option, but sorts by property in descending order.
.RE
@@ -1075,7 +1356,7 @@ Same as the \fB-s\fR option, but sorts by property in descending order.
.na
\fB\fB-t\fR \fItype\fR\fR
.ad
-.RS 11n
+.RS 15n
.rt
A comma-separated list of types to display, where "type" is one of "filesystem", "snapshot" or "volume". For example, specifying "\fB-t snapshot\fR" displays only snapshots.
.RE
@@ -1098,7 +1379,7 @@ form with a suffix of "B", "K", "M", "G", "T", "P", "E", "Z" (for bytes, Kbytes,
.ne 2
.mk
.na
-\fB\fBzfs get\fR [\fB-rHp\fR] [\fB-o\fR \fIfield\fR[,\fIfield\fR]...] [\fB-s\fR \fIsource\fR[,\fIsource\fR]...] \fIall\fR | \fIproperty\fR[,\fIproperty\fR]... \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...\fR
+\fB\fBzfs get\fR [\fB-rHp\fR] [\fB-o\fR \fIfield\fR[,...] [\fB-s\fR \fIsource\fR[,...] "\fIall\fR" | \fIproperty\fR[,...] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...\fR
.ad
.sp .6
.RS 4n
@@ -1201,42 +1482,70 @@ Recursively inherit the given property for all children.
.ne 2
.mk
.na
-\fB\fBzfs mount\fR\fR
+\fB\fBzfs upgrade\fR [\fB-v\fR]\fR
.ad
.sp .6
.RS 4n
-Displays all \fBZFS\fR file systems currently mounted.
+Displays a list of file systems that are not the most recent version.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzfs mount\fR[\fB-o\fR \fIopts\fR] [\fB-O\fR] \fB-a\fR\fR
+\fB\fBzfs upgrade\fR [\fB-r\fR] [\fB-V\fR \fIversion\fR] [\fB-a\fR | \fIfilesystem\fR]\fR
.ad
.sp .6
.RS 4n
-Mounts all available \fBZFS\fR file systems. Invoked automatically as part of the boot process.
+Upgrades file systems to a new on-disk version. Once this is done, the file systems will no longer be accessible on systems running older versions of the software. "\fBzfs send\fR" streams generated from new snapshots of these file systems can not be accessed
+on systems running older versions of the software.
+.sp
+The file system version is independent of the pool version (see \fBzpool\fR(1M) for information on the "\fBzpool upgrade\fR" command).
+.sp
+The file system version does not have to be upgraded when the pool version is upgraded, and vice versa.
.sp
.ne 2
.mk
.na
-\fB\fB-o\fR \fIopts\fR\fR
+\fB\fB-a\fR\fR
.ad
-.RS 11n
+.RS 14n
.rt
-An optional comma-separated list of mount options to use temporarily for the duration of the mount. See the "Temporary Mount Point Properties" section for details.
+Upgrade all file systems on all imported pools.
.RE
.sp
.ne 2
.mk
.na
-\fB\fB-O\fR\fR
+\fB\fIfilesystem\fR\fR
.ad
-.RS 11n
+.RS 14n
.rt
-Perform an overlay mount. See \fBmount\fR(1M) for more information.
+Upgrade the specified file system.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-r\fR\fR
+.ad
+.RS 14n
+.rt
+Upgrade the specified file system and all descendent file systems
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-V\fR \fIversion\fR\fR
+.ad
+.RS 14n
+.rt
+Upgrade to the specified \fIversion\fR. If the \fB-V\fR flag is not specified, this command upgrades to the most recent version. This option can only be used to increase the version number, and only up to the most recent version supported by this
+software.
.RE
.RE
@@ -1245,18 +1554,29 @@ Perform an overlay mount. See \fBmount\fR(1M) for more information.
.ne 2
.mk
.na
-\fB\fBzfs mount\fR [\fB-o\fR \fIopts\fR] [\fB-O\fR] \fIfilesystem\fR\fR
+\fB\fBzfs mount\fR\fR
.ad
.sp .6
.RS 4n
-Mounts a specific \fBZFS\fR file system. This is typically not necessary, as file systems are automatically mounted when they are created or the mountpoint property has changed. See the "Mount Points" section for details.
+Displays all \fBZFS\fR file systems currently mounted.
+.RE
+
.sp
.ne 2
.mk
.na
-\fB\fB-o\fR \fIopts\fR\fR
+\fB\fBzfs mount\fR [\fB-vO\fR] [\fB-o\fR \fIoptions\fR] \fB-a\fR | \fIfilesystem\fR\fR
.ad
-.RS 11n
+.sp .6
+.RS 4n
+Mounts \fBZFS\fR file systems. Invoked automatically as part of the boot process.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-o\fR \fIoptions\fR\fR
+.ad
+.RS 14n
.rt
An optional comma-separated list of mount options to use temporarily for the duration of the mount. See the "Temporary Mount Point Properties" section for details.
.RE
@@ -1267,97 +1587,152 @@ An optional comma-separated list of mount options to use temporarily for the dur
.na
\fB\fB-O\fR\fR
.ad
-.RS 11n
+.RS 14n
.rt
Perform an overlay mount. See \fBmount\fR(1M) for more information.
.RE
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-v\fR\fR
+.ad
+.RS 14n
+.rt
+Report mount progress.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzfs unmount\fR \fB-a\fR\fR
+\fB\fB-a\fR\fR
.ad
-.sp .6
-.RS 4n
-Unmounts all currently mounted \fBZFS\fR file systems. Invoked automatically as part of the shutdown process.
+.RS 14n
+.rt
+Mount all available \fBZFS\fR file systems. Invoked automatically as part of the boot process.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzfs unmount\fR [\fB-f\fR] \fIfilesystem\fR|\fImountpoint\fR\fR
+\fB\fIfilesystem\fR\fR
+.ad
+.RS 14n
+.rt
+Mount the specified filesystem.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs unmount\fR [\fB-f\fR] \fB-a\fR | \fIfilesystem\fR|\fImountpoint\fR\fR
.ad
.sp .6
.RS 4n
-Unmounts the given file system. The command can also be given a path to a \fBZFS\fR file system mount point on the system.
+Unmounts currently mounted \fBZFS\fR file systems. Invoked automatically as part of the shutdown process.
.sp
.ne 2
.mk
.na
\fB\fB-f\fR\fR
.ad
-.RS 6n
+.RS 25n
.rt
Forcefully unmount the file system, even if it is currently in use.
.RE
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-a\fR\fR
+.ad
+.RS 25n
+.rt
+Unmount all available \fBZFS\fR file systems. Invoked automatically as part of the boot process.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzfs share\fR \fB-a\fR\fR
+\fB\fIfilesystem\fR|\fImountpoint\fR\fR
.ad
-.sp .6
-.RS 4n
-Shares all available \fBZFS\fR file systems. This is invoked automatically as part of the boot process.
+.RS 25n
+.rt
+Unmount the specified filesystem. The command can also be given a path to a \fBZFS\fR file system mount point on the system.
+.RE
+
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzfs share\fR \fIfilesystem\fR\fR
+\fB\fBzfs share\fR \fB-a\fR | \fIfilesystem\fR\fR
.ad
.sp .6
.RS 4n
-Shares a specific \fBZFS\fR file system according to the "sharenfs" property. File systems are shared when the "sharenfs" property is set.
+Shares available \fBZFS\fR file systems.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-a\fR\fR
+.ad
+.RS 14n
+.rt
+Share all available \fBZFS\fR file systems. Invoked automatically as part of the boot process.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzfs unshare\fR \fB-a\fR\fR
+\fB\fIfilesystem\fR\fR
.ad
-.sp .6
-.RS 4n
-Unshares all currently shared \fBZFS\fR file systems. This is invoked automatically as part of the shutdown process.
+.RS 14n
+.rt
+Share the specified filesystem according to the "sharenfs" and "sharesmb" properties. File systems are shared when the "sharenfs" or "sharesmb" property is set.
+.RE
+
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzfs unshare\fR [\fB-F\fR] \fIfilesystem\fR|\fImountpoint\fR\fR
+\fB\fBzfs unshare\fR \fB-a\fR | \fIfilesystem\fR|\fImountpoint\fR\fR
.ad
.sp .6
.RS 4n
-Unshares the given file system. The command can also be given a path to a \fBZFS\fR file system shared on the system.
+Unshares currently shared \fBZFS\fR file systems. This is invoked automatically as part of the shutdown process.
.sp
.ne 2
.mk
.na
-\fB\fB-F\fR\fR
+\fB\fB-a\fR\fR
.ad
-.RS 6n
+.RS 25n
.rt
-Forcefully unshare the file system, even if it is currently in use.
+Unshare all available \fBZFS\fR file systems. Invoked automatically as part of the boot process.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fIfilesystem\fR|\fImountpoint\fR\fR
+.ad
+.RS 25n
+.rt
+Unshare the specified filesystem. The command can also be given a path to a \fBZFS\fR file system shared on the system.
.RE
.RE
@@ -1366,27 +1741,64 @@ Forcefully unshare the file system, even if it is currently in use.
.ne 2
.mk
.na
-\fB\fBzfs send\fR [\fB-i\fR \fIsnapshot1\fR] \fIsnapshot2\fR\fR
+\fB\fBzfs send\fR [\fB-vR\fR] [\fB-\fR[\fB-iI\fR] \fIsnapshot\fR] \fIsnapshot\fR\fR
.ad
.sp .6
.RS 4n
-Creates a stream representation of snapshot2, which is written to standard output. The output can be redirected to a file or to a different system (for example, using \fBssh\fR(1). By default, a full stream is generated.
+Creates a stream representation of the second \fIsnapshot\fR, which is written to standard output. The output can be redirected to a file or to a different system (for example, using \fBssh\fR(1). By default, a full stream is generated.
.sp
.ne 2
.mk
.na
-\fB\fB-i\fR \fIsnapshot1\fR\fR
+\fB\fB-i\fR \fIsnapshot\fR\fR
.ad
-.RS 16n
+.RS 15n
.rt
-Generate an incremental stream from \fIsnapshot1\fR to \fIsnapshot2\fR. The incremental source \fIsnapshot1\fR can be specified as the last component of the snapshot name (for example, the part after the "@"),
-and it is assumed to be from the same file system as \fIsnapshot2\fR.
+Generate an incremental stream from the first \fIsnapshot\fR to the second \fIsnapshot\fR. The incremental source (the first \fIsnapshot\fR) can be specified as the last component of the snapshot name (for example,
+the part after the "@"), and it is assumed to be from the same file system as the second \fIsnapshot\fR.
+.sp
+If the destination is a clone, the source may be the origin snapshot, which must be fully specified (for example, "pool/fs@origin", not just "@origin").
.RE
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-I\fR \fIsnapshot\fR\fR
+.ad
+.RS 15n
+.rt
+Generate a stream package that sends all intermediary snapshots from the first snapshot to the second snapshot. For example, "\fB-I @a fs@d\fR" is similar to "\fB-i @a fs@b; -i @b fs@c; -i @c fs@d\fR". The incremental source snapshot
+may be specified as with the \fB-i\fR option.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-R\fR\fR
+.ad
+.RS 15n
+.rt
+Generate a replication stream package, which will replicate the specified filesystem, and all descendant file systems, up to the named snapshot. When received, all properties, snapshots, descendent file systems, and clones are preserved.
+.sp
+If the \fB-i\fR or \fB-I\fR flags are used in conjunction with the \fB-R\fR flag, an incremental replication stream is generated. The current values of properties, and current snapshot and file system names are set when the stream is received. If the \fB-F\fR flag is specified when this stream is recieved, snapshots and file systems that do not exist on the sending side are destroyed.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-v\fR\fR
+.ad
+.RS 15n
+.rt
+Print verbose information about the stream package generated.
.RE
-.LP
The format of the stream is evolving. No backwards compatibility is guaranteed. You may not be able to receive your streams on future versions of \fBZFS\fR.
+.RE
+
.sp
.ne 2
.mk
@@ -1402,7 +1814,8 @@ The format of the stream is evolving. No backwards compatibility is guaranteed.
Creates a snapshot whose contents are as specified in the stream provided on standard input. If a full stream is received, then a new file system is created as well. Streams are created using the "\fBzfs send\fR" subcommand, which by default creates a full
stream. "\fBzfs recv\fR" can be used as an alias for "\fBzfs receive\fR".
.sp
-If an incremental stream is received, then the destination file system must already exist, and its most recent snapshot must match the incremental stream's source. The destination file system and all of its child file systems are unmounted and cannot be accessed during the receive operation.
+If an incremental stream is received, then the destination file system must already exist, and its most recent snapshot must match the incremental stream's source. For \fBzvols\fR, the destination device link is destroyed and re-created, which means the \fBzvol\fR cannot
+be accessed during the \fBreceive\fR operation.
.sp
The name of the snapshot (and file system, if a full stream is received) that this subcommand creates depends on the argument type and the \fB-d\fR option.
.sp
@@ -1438,7 +1851,7 @@ Print verbose information about the stream and the time required to perform the
.ad
.RS 6n
.rt
-Do not actually receive the stream. This can be useful in conjunction with the \fB-v\fR option to determine what name the receive operation would use.
+Do not actually receive the stream. This can be useful in conjunction with the \fB-v\fR option to verify the name the receive operation would use.
.RE
.sp
@@ -1449,7 +1862,8 @@ Do not actually receive the stream. This can be useful in conjunction with the \
.ad
.RS 6n
.rt
-Force a rollback of the \fIfilesystem\fR to the most recent snapshot before performing the receive operation.
+Force a rollback of the file system to the most recent snapshot before performing the receive operation. If receiving an incremental replication stream (for example, one generated by "z\fBfs send -R -[iI]\fR"), destroy snapshots and file systems that do
+not exist on the sending side.
.RE
.RE
@@ -1458,6 +1872,186 @@ Force a rollback of the \fIfilesystem\fR to the most recent snapshot before perf
.ne 2
.mk
.na
+\fB\fBzfs allow\fR [\fB-ldug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...] \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR| \fIvolume\fR\fR
+.ad
+.br
+.na
+\fB\fBzfs allow\fR [\fB-ld\fR] \fB-e\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR\fR
+.ad
+.sp .6
+.RS 4n
+Delegates \fBZFS\fR administration permission for the file systems to non-privileged users.
+.sp
+.ne 2
+.mk
+.na
+\fB[\fB-ug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...]\fR
+.ad
+.sp .6
+.RS 4n
+Specifies to whom the permissions are delegated. Multiple entities can be specified as a comma-separated list. If neither of the \fB-ug\fR options are specified, then the argument is interpreted preferentially as the keyword "everyone", then as a user name,
+and lastly as a group name. To specify a user or group named "everyone", use the \fB-u\fR or \fB-g\fR options. To specify a group with the same name as a user, use the \fB-g\fR options.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB[\fB-e\fR] \fIperm\fR|@\fIsetname\fR[,...]\fR
+.ad
+.sp .6
+.RS 4n
+Specifies that the permissions be delegated to "everyone." Multiple permissions may be specified as a comma-separated list. Permission names are the same as \fBZFS\fR subcommand and property names. See the property list below. Property set names, which
+begin with an "at sign" ("@") , may be specified. See the \fB-s\fR form below for details.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB[\fB-ld\fR] \fIfilesystem\fR|\fIvolume\fR\fR
+.ad
+.sp .6
+.RS 4n
+Specifies where the permissions are delegated. If neither of the \fB-ld\fR options are specified, or both are, then the permissions are allowed for the file system or volume, and all of its descendents. If only the \fB-l\fR option is used, then is allowed "locally"
+only for the specified file system. If only the \fB-d\fR option is used, then is allowed only for the descendent file systems.
+.RE
+
+.RE
+
+.sp
+.LP
+Permissions are generally the ability to use a \fBZFS\fR subcommand or change a \fBZFS\fR property. The following permissions are available:
+.sp
+.in +2
+.nf
+NAME TYPE NOTES
+allow subcommand Must also have the permission
+ that is being allowed.
+clone subcommand Must also have the 'create' ability
+ and the 'mount' ability in the origin
+ file system.
+create subcommand Must also have the 'mount' ability.
+destroy subcommand Must also have the 'mount' ability.
+mount subcommand Allows mount, unmount, and
+ create/remove zvol device links.
+promote subcommand Must also have the 'mount' ability and
+ 'promote' ability in the origin file system.
+receive subcommand Must also have the 'mount' ability and
+ the 'create' ability.
+rename subcommand Must also have the 'mount' ability and
+ the 'create' ability in the new parent.
+rollback subcommand Must also have the 'mount' ability.
+snapshot subcommand Must also have the 'mount' ability.
+share subcommand Allows share and unshare.
+send subcommand
+
+
+aclinherit property
+aclmode property
+atime property
+canmount property
+checksum property
+compression property
+copies property
+devices property
+exec property
+mountpoint property
+quota property
+readonly property
+recordsize property
+reservation property
+setuid property
+shareiscsi property
+sharenfs property
+snapdir property
+version property
+volsize property
+xattr property
+zoned property
+userprop other Allows changing any user property.
+.fi
+.in -2
+.sp
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs allow\fR \fB-c\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR\fR
+.ad
+.sp .6
+.RS 4n
+Sets "create time" permissions. These permissions are granted (locally) to the creator of any newly-created descendent file system.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs allow\fR \fB-s\fR @setname \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR\fR
+.ad
+.sp .6
+.RS 4n
+Defines or adds permissions to a permission set. The set can be used by other \fBzfs allow\fR commands for the specified file system and its descendents. Sets are evaluated dynamically, so changes to a set are immediately reflected. Permission sets follow the same
+naming restrictions as ZFS file systems, but the name must begin with an "at sign" ("@"), and can be no more than 64 characters long.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs unallow\fR [\fB-rldug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...] [\fIperm\fR|@\fIsetname\fR[, ...]] \fIfilesystem\fR|\fIvolume\fR\fR
+.ad
+.br
+.na
+\fB\fBzfs unallow\fR [\fB-rld\fR] \fB-e\fR [\fIperm\fR|@\fIsetname\fR [,...]] \fIfilesystem\fR|\fIvolume\fR\fR
+.ad
+.br
+.na
+\fB\fBzfs unallow\fR [\fB-r\fR] \fB-c\fR [\fIperm\fR|@\fIsetname\fR[,...]]\fR
+.ad
+.br
+.na
+\fB\fIfilesystem\fR|\fIvolume\fR\fR
+.ad
+.sp .6
+.RS 4n
+Removes permissions that were granted with the "\fBzfs allow\fR" command. No permissions are explicitly denied, so other permissions granted are still in effect. For example, if the permission is granted by an ancestor. If no permissions are specified,
+then all permissions for the specified \fIuser\fR, \fIgroup\fR, or \fIeveryone\fR are removed. Specifying "everyone" (or using the \fB-e\fR option) only removes the permissions that were granted to "everyone",
+not all permissions for every user and group. See the "\fBzfs allow\fR" command for a description of the \fB-ldugec\fR options.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-r\fR\fR
+.ad
+.RS 6n
+.rt
+Recursively remove the permissions from this file system and all descendents.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzfs unallow\fR [\fB-r\fR] \fB-s\fR @setname [\fIperm\fR|@\fIsetname\fR[,...]]\fR
+.ad
+.br
+.na
+\fB\fIfilesystem\fR|\fIvolume\fR\fR
+.ad
+.sp .6
+.RS 4n
+Removes permissions from a permission set. If no permissions are specified, then all permissions are removed, thus removing the set entirely.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
\fB\fBzfs jail\fR \fIjailid\fR \fIfilesystem\fR\fR
.ad
.sp .6
@@ -1480,6 +2074,7 @@ Detaches the given file system from the given jail.
.SH EXAMPLES
.LP
\fBExample 1 \fRCreating a ZFS File System Hierarchy
+.sp
.LP
The following commands create a file system named "\fBpool/home\fR" and a file system named "\fBpool/home/bob\fR". The mount point "\fB/export/home\fR" is set for the parent file system, and automatically inherited
by the child file system.
@@ -1496,6 +2091,7 @@ by the child file system.
.LP
\fBExample 2 \fRCreating a ZFS Snapshot
+.sp
.LP
The following command creates a snapshot named "yesterday". This snapshot is mounted on demand in the ".zfs/snapshot" directory at the root of the "\fBpool/home/bob\fR" file system.
@@ -1509,8 +2105,9 @@ The following command creates a snapshot named "yesterday". This snapshot is mou
.LP
\fBExample 3 \fRTaking and destroying multiple snapshots
+.sp
.LP
-The following command creates snapshots named "\fByesterday\fR" of "\fBpool/home\fR" and all of its descendant file systems. Each snapshot is mounted on demand in the ".zfs/snapshot" directory at the root of its file system. The
+The following command creates snapshots named "\fByesterday\fR" of "\fBpool/home\fR" and all of its descendent file systems. Each snapshot is mounted on demand in the ".zfs/snapshot" directory at the root of its file system. The
second command destroys the newly created snapshots.
.sp
@@ -1524,6 +2121,7 @@ second command destroys the newly created snapshots.
.LP
\fBExample 4 \fRTurning Off Compression
+.sp
.LP
The following commands turn compression off for all file systems under "\fBpool/home\fR", but explicitly turns it on for "\fBpool/home/anne\fR".
@@ -1538,6 +2136,7 @@ The following commands turn compression off for all file systems under "\fBpool/
.LP
\fBExample 5 \fRListing ZFS Datasets
+.sp
.LP
The following command lists all active file systems and volumes in the system.
@@ -1548,17 +2147,18 @@ The following command lists all active file systems and volumes in the system.
NAME USED AVAIL REFER MOUNTPOINT
- pool 100G 60G - /pool
- pool/home 100G 60G - /export/home
- pool/home/bob 40G 60G 40G /export/home/bob
- pool/home/bob@yesterday 3M - 40G -
- pool/home/anne 60G 60G 40G /export/home/anne
+ pool 450K 457G 18K /pool
+ pool/home 315K 457G 21K /export/home
+ pool/home/anne 18K 457G 18K /export/home/anne
+ pool/home/bob 276K 457G 276K /export/home/bob
+ pool/home/bob@yesterday 0 - 276K -
.fi
.in -2
.sp
.LP
\fBExample 6 \fRSetting a Quota on a ZFS File System
+.sp
.LP
The following command sets a quota of 50 gbytes for "\fBpool/home/bob\fR".
@@ -1572,6 +2172,7 @@ The following command sets a quota of 50 gbytes for "\fBpool/home/bob\fR".
.LP
\fBExample 7 \fRListing ZFS Properties
+.sp
.LP
The following command lists all properties for "\fBpool/home/bob\fR".
@@ -1583,18 +2184,17 @@ The following command lists all properties for "\fBpool/home/bob\fR".
NAME PROPERTY VALUE SOURCE
pool/home/bob type filesystem -
- pool/home/bob creation Fri Feb 23 14:20 2007 -
- pool/home/bob used 24.5K -
+ pool/home/bob creation Thu Jul 12 14:44 2007 -
+ pool/home/bob used 276K -
pool/home/bob available 50.0G -
- pool/home/bob referenced 24.5K -
+ pool/home/bob referenced 276K -
pool/home/bob compressratio 1.00x -
pool/home/bob mounted yes -
pool/home/bob quota 50G local
pool/home/bob reservation none default
pool/home/bob recordsize 128K default
- pool/home/bob mountpoint /pool/home/bob default
- pool/home/bob sharenfs off default
- pool/home/bob shareiscsi off default
+ pool/home/bob mountpoint /export/home/bob inherited from
+ pool/home
pool/home/bob checksum on default
pool/home/bob compression off default
pool/home/bob atime on default
@@ -1605,15 +2205,24 @@ The following command lists all properties for "\fBpool/home/bob\fR".
pool/home/bob zoned off default
pool/home/bob snapdir hidden default
pool/home/bob aclmode groupmask default
- pool/home/bob aclinherit secure default
+ pool/home/bob aclinherit restricted default
pool/home/bob canmount on default
+ pool/home/bob nbmand off default
+ pool/home/bob shareiscsi off default
+ pool/home/bob sharesmb off default
+ pool/home/bob sharenfs off default
pool/home/bob xattr on default
+ pool/home/bob refquota 10M local
+ pool/home/bob refreservation none default
+ pool/home/bob copies 1 default
+ pool/home/bob version 1 -
.fi
.in -2
.sp
+.sp
.LP
The following command gets a single property value.
@@ -1626,6 +2235,7 @@ on
.in -2
.sp
+.sp
.LP
The following command lists all properties with local settings for "\fBpool/home/bob\fR".
@@ -1643,6 +2253,7 @@ The following command lists all properties with local settings for "\fBpool/home
.LP
\fBExample 8 \fRRolling Back a ZFS File System
+.sp
.LP
The following command reverts the contents of "\fBpool/home/anne\fR" to the snapshot named "\fByesterday\fR", deleting all intermediate snapshots.
@@ -1656,6 +2267,7 @@ The following command reverts the contents of "\fBpool/home/anne\fR" to the snap
.LP
\fBExample 9 \fRCreating a ZFS Clone
+.sp
.LP
The following command creates a writable file system whose initial contents are the same as "\fBpool/home/bob@yesterday\fR".
@@ -1669,6 +2281,7 @@ The following command creates a writable file system whose initial contents are
.LP
\fBExample 10 \fRPromoting a ZFS Clone
+.sp
.LP
The following commands illustrate how to test out changes to a file system, and then replace the original file system with the changed one, using clones, clone promotion, and renaming:
@@ -1692,6 +2305,7 @@ The following commands illustrate how to test out changes to a file system, and
.LP
\fBExample 11 \fRInheriting ZFS Properties
+.sp
.LP
The following command causes "\fBpool/home/bob\fR" and "\fBpool/home/anne\fR" to inherit the "checksum" property from their parent.
@@ -1705,6 +2319,7 @@ The following command causes "\fBpool/home/bob\fR" and "\fBpool/home/anne\fR" to
.LP
\fBExample 12 \fRRemotely Replicating ZFS Data
+.sp
.LP
The following commands send a full stream and then an incremental stream to a remote machine, restoring them into "\fBpoolB/received/fs\fR@a" and "\fBpoolB/received/fs@b\fR", respectively. "\fBpoolB\fR" must contain
the file system "\fBpoolB/received\fR", and must not initially contain "\fBpoolB/received/fs\fR".
@@ -1721,30 +2336,31 @@ the file system "\fBpoolB/received\fR", and must not initially contain "\fBpoolB
.sp
.LP
-\fBExample 13 \fRUsing the zfs receive -d Option
+\fBExample 13 \fRUsing the zfs receive -d Option
+.sp
.LP
The following command sends a full stream of "\fBpoolA/fsA/fsB@snap\fR" to a remote machine, receiving it into "\fBpoolB/received/fsA/fsB@snap\fR". The "\fBfsA/fsB@snap\fR" portion of the received snapshot's name
-is determined from the name of the sent snapshot. "\fBpoolB\fR" must contain the file system "\fBpoolB/received\fR". If "\fBpoolB/received/fsA\fR" does not exist, it will be created as an empty file system.
+is determined from the name of the sent snapshot. "\fBpoolB\fR" must contain the file system "\fBpoolB/received\fR". If "\fBpoolB/received/fsA\fR" does not exist, it is be created as an empty file system.
.sp
.in +2
.nf
\fB# zfs send poolA/fsA/fsB@snap | \e
- ssh host zfs receive -d poolB/received
- \fR
+ ssh host zfs receive -d poolB/received\fR
.fi
.in -2
.sp
.LP
\fBExample 14 \fRCreating a ZFS volume as a Swap Device
+.sp
.LP
The following example shows how to create a 5-Gbyte ZFS volume and then add the volume as a swap device.
.sp
.in +2
.nf
-\fB# zfs create -V 5gb tank/vol
+\fB# zfs create -V 5gb tank/vol
# swap -a /dev/zvol/dsk/tank/vol\fR
.fi
.in -2
@@ -1752,6 +2368,7 @@ The following example shows how to create a 5-Gbyte ZFS volume and then add the
.LP
\fBExample 15 \fRSetting User Properties
+.sp
.LP
The following example sets the user defined "com.example:department" property for a dataset.
@@ -1765,6 +2382,7 @@ The following example sets the user defined "com.example:department" property fo
.LP
\fBExample 16 \fRCreating a ZFS Volume as a iSCSI Target Device
+.sp
.LP
The following example shows how to create a \fBZFS\fR volume as an \fBiSCSI\fR target.
@@ -1782,9 +2400,173 @@ Connections: 0
.in -2
.sp
+.sp
.LP
After the \fBiSCSI\fR target is created, set up the \fBiSCSI\fR initiator. For more information about the Solaris \fBiSCSI\fR initiator, see the Solaris Administration Guide: Devices and File Systems.
+.LP
+\fBExample 17 \fRPerforming a Rolling Snapshot
+.sp
+.LP
+The following example shows how to maintain a history of snapshots with a consistent naming scheme. To keep a week's worth of snapshots, the user destroys the oldest snapshot, renames the remaining snapshots, and then creates a new snapshot, as follows:
+
+.sp
+.in +2
+.nf
+\fB# zfs destroy -r pool/users@7daysago
+# zfs rename -r pool/users@6daysago @7daysago
+# zfs rename -r pool/users@5daysago @6daysago
+\&...
+# zfs rename -r pool/users@yesterday @2daysago
+# zfs rename -r pool/users@today @yesterday
+# zfs snapshot -r pool/users@today\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 18 \fRSetting sharenfs Property Options on a ZFS File System
+.sp
+.LP
+The following commands show how to set "sharenfs" property options to enable \fBrw\fR access for a set of \fBIP\fR addresses and to enable root access for system \fBneo\fR on the \fBtank/home\fR file system.
+
+.sp
+.in +2
+.nf
+\fB# zfs set sharenfs='rw=@123.123.0.0/16,root=neo' tank/home\fR
+
+.fi
+.in -2
+.sp
+
+.sp
+.LP
+If you are using \fBDNS\fR for host name resolution, specify the fully qualified hostname.
+
+.LP
+\fBExample 19 \fRDelegating ZFS Administration Permissions on a ZFS Dataset
+.sp
+.LP
+The following example shows how to set permissions so that user "\fBcindys\fR" can create, destroy, mount and take snapshots on \fBtank/cindys\fR. The permissions on \fBtank/cindys\fR are also displayed.
+
+.sp
+.in +2
+.nf
+\fB# zfs allow cindys create,destroy,mount,snapshot tank/cindys
+# zfs allow tank/cindys\fR
+-------------------------------------------------------------
+Local+Descendent permissions on (tank/cindys)
+ user cindys create,destroy,mount,snapshot
+-------------------------------------------------------------
+
+.fi
+.in -2
+.sp
+
+.sp
+.LP
+Because the \fBtank/cindys\fR mount point permission is set to 755 by default, user \fBcindys\fR will be unable to mount file systems under \fBtank/cindys\fR. Set an \fBACL\fR similar to the following syntax to provide mount point access:
+.sp
+.in +2
+.nf
+# chmod A+user:cindys:add_subdirectory:allow /tank/cindys
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 20 \fRDelegating Create Time Permissions on a ZFS Dataset
+.sp
+.LP
+The following example shows how to grant anyone in the group \fBstaff\fR to create file systems in \fBtank/users\fR. This syntax also allows staff members to destroy their own file systems, but not destroy anyone else's file system. The permissions on \fBtank/users\fR are also displayed.
+
+.sp
+.in +2
+.nf
+\fB# zfs allow staff create,mount tank/users
+# zfs allow -c destroy tank/users
+# zfs allow tank/users\fR
+-------------------------------------------------------------
+Create time permissions on (tank/users)
+ create,destroy
+Local+Descendent permissions on (tank/users)
+ group staff create,mount
+-------------------------------------------------------------
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 21 \fRDefining and Granting a Permission Set on a ZFS Dataset
+.sp
+.LP
+The following example shows how to define and grant a permission set on the \fBtank/users\fR file system. The permissions on \fBtank/users\fR are also displayed.
+
+.sp
+.in +2
+.nf
+\fB# zfs allow -s @pset create,destroy,snapshot,mount tank/users
+# zfs allow staff @pset tank/users
+# zfs allow tank/users
+-------------------------------------------------------------
+Permission sets on (tank/users)
+ @pset create,destroy,mount,snapshot
+Create time permissions on (tank/users)
+ create,destroy
+Local+Descendent permissions on (tank/users)
+ group staff @pset,create,mount
+-------------------------------------------------------------\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 22 \fRDelegating Property Permissions on a ZFS Dataset
+.sp
+.LP
+The following example shows to grant the ability to set quotas and reservations on the \fBusers/home\fR file system. The permissions on \fBusers/home\fR are also displayed.
+
+.sp
+.in +2
+.nf
+\fB# zfs allow cindys quota,reservation users/home
+# zfs allow users/home\fR
+-------------------------------------------------------------
+Local+Descendent permissions on (users/home)
+ user cindys quota,reservation
+-------------------------------------------------------------
+cindys% zfs set quota=10G users/home/marks
+cindys% zfs get quota users/home/marks
+NAME PROPERTY VALUE SOURCE
+users/home/marks quota 10G local
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 23 \fRRemoving ZFS Delegated Permissions on a ZFS Dataset
+.sp
+.LP
+The following example shows how to remove the snapshot permission from the \fBstaff\fR group on the \fBtank/users\fR file system. The permissions on \fBtank/users\fR are also displayed.
+
+.sp
+.in +2
+.nf
+\fB# zfs unallow staff snapshot tank/users
+# zfs allow tank/users\fR
+-------------------------------------------------------------
+Permission sets on (tank/users)
+ @pset create,destroy,mount,snapshot
+Create time permissions on (tank/users)
+ create,destroy
+Local+Descendent permissions on (tank/users)
+ group staff @pset,create,mount
+-------------------------------------------------------------
+.fi
+.in -2
+.sp
+
.SH EXIT STATUS
+.sp
.LP
The following exit values are returned:
.sp
@@ -1821,6 +2603,7 @@ Invalid command line options were specified.
.RE
.SH ATTRIBUTES
+.sp
.LP
See \fBattributes\fR(5) for descriptions of the following attributes:
.sp
@@ -1835,9 +2618,13 @@ ATTRIBUTE TYPEATTRIBUTE VALUE
_
AvailabilitySUNWzfsu
_
-Interface StabilityEvolving
+Interface StabilityCommitted
.TE
.SH SEE ALSO
+.sp
+.LP
+\fBgzip\fR(1), \fBssh\fR(1), \fBmount\fR(1M), \fBshare\fR(1M), \fBsharemgr\fR(1M), \fBunshare\fR(1M), \fBzonecfg\fR(1M), \fBzpool\fR(1M), \fBchmod\fR(2), \fBstat\fR(2), \fBfsync\fR(3c), \fBdfstab\fR(4), \fBattributes\fR(5)
+.sp
.LP
-\fBgzip\fR(1), \fBssh\fR(1), \fBmount\fR(1M), \fBshare\fR(1M), \fBunshare\fR(1M), \fBzonecfg\fR(1M), \fBzpool\fR(1M), \fBchmod\fR(2), \fBstat\fR(2), \fBfsync\fR(3c), \fBdfstab\fR(4), \fBattributes\fR(5)
+For information about using the \fBZFS\fR web-based management tool and other \fBZFS\fR features, see the \fIZFS Administration Guide\fR.
diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c b/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c
index eb6b8b1..a22370a 100644
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <libintl.h>
#include <libuutil.h>
#include <stddef.h>
@@ -56,28 +54,43 @@ typedef struct zfs_node {
typedef struct callback_data {
uu_avl_t *cb_avl;
- int cb_recurse;
+ int cb_flags;
zfs_type_t cb_types;
zfs_sort_column_t *cb_sortcol;
- zfs_proplist_t **cb_proplist;
+ zprop_list_t **cb_proplist;
} callback_data_t;
uu_avl_pool_t *avl_pool;
/*
- * Called for each dataset. If the object the object is of an appropriate type,
+ * Include snaps if they were requested or if this a zfs list where types
+ * were not specified and the "listsnapshots" property is set on this pool.
+ */
+static int
+zfs_include_snapshots(zfs_handle_t *zhp, callback_data_t *cb)
+{
+ zpool_handle_t *zph;
+
+ if ((cb->cb_flags & ZFS_ITER_PROP_LISTSNAPS) == 0)
+ return (cb->cb_types & ZFS_TYPE_SNAPSHOT);
+
+ zph = zfs_get_pool_handle(zhp);
+ return (zpool_get_prop_int(zph, ZPOOL_PROP_LISTSNAPS, NULL));
+}
+
+/*
+ * Called for each dataset. If the object is of an appropriate type,
* add it to the avl tree and recurse over any children as necessary.
*/
-int
+static int
zfs_callback(zfs_handle_t *zhp, void *data)
{
callback_data_t *cb = data;
int dontclose = 0;
+ int include_snaps = zfs_include_snapshots(zhp, cb);
- /*
- * If this object is of the appropriate type, add it to the AVL tree.
- */
- if (zfs_get_type(zhp) & cb->cb_types) {
+ if ((zfs_get_type(zhp) & cb->cb_types) ||
+ ((zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT) && include_snaps)) {
uu_avl_index_t idx;
zfs_node_t *node = safe_malloc(sizeof (zfs_node_t));
@@ -100,10 +113,12 @@ zfs_callback(zfs_handle_t *zhp, void *data)
/*
* Recurse if necessary.
*/
- if (cb->cb_recurse && (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM ||
- (zfs_get_type(zhp) == ZFS_TYPE_VOLUME && (cb->cb_types &
- ZFS_TYPE_SNAPSHOT))))
- (void) zfs_iter_children(zhp, zfs_callback, data);
+ if (cb->cb_flags & ZFS_ITER_RECURSE) {
+ if (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM)
+ (void) zfs_iter_filesystems(zhp, zfs_callback, data);
+ if ((zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) && include_snaps)
+ (void) zfs_iter_snapshots(zhp, zfs_callback, data);
+ }
if (!dontclose)
zfs_close(zhp);
@@ -118,7 +133,7 @@ zfs_add_sort_column(zfs_sort_column_t **sc, const char *name,
zfs_sort_column_t *col;
zfs_prop_t prop;
- if ((prop = zfs_name_to_prop(name)) == ZFS_PROP_INVAL &&
+ if ((prop = zfs_name_to_prop(name)) == ZPROP_INVAL &&
!zfs_prop_user(name))
return (-1);
@@ -126,7 +141,7 @@ zfs_add_sort_column(zfs_sort_column_t **sc, const char *name,
col->sc_prop = prop;
col->sc_reverse = reverse;
- if (prop == ZFS_PROP_INVAL) {
+ if (prop == ZPROP_INVAL) {
col->sc_user_prop = safe_malloc(strlen(name) + 1);
(void) strcpy(col->sc_user_prop, name);
}
@@ -243,7 +258,7 @@ zfs_sort(const void *larg, const void *rarg, void *data)
* Otherwise, we compare 'lnum' and 'rnum'.
*/
lstr = rstr = NULL;
- if (psc->sc_prop == ZFS_PROP_INVAL) {
+ if (psc->sc_prop == ZPROP_INVAL) {
nvlist_t *luser, *ruser;
nvlist_t *lval, *rval;
@@ -257,10 +272,10 @@ zfs_sort(const void *larg, const void *rarg, void *data)
if (lvalid)
verify(nvlist_lookup_string(lval,
- ZFS_PROP_VALUE, &lstr) == 0);
+ ZPROP_VALUE, &lstr) == 0);
if (rvalid)
verify(nvlist_lookup_string(rval,
- ZFS_PROP_VALUE, &rstr) == 0);
+ ZPROP_VALUE, &rstr) == 0);
} else if (zfs_prop_is_string(psc->sc_prop)) {
lvalid = (zfs_prop_get(l, psc->sc_prop, lbuf,
@@ -293,7 +308,7 @@ zfs_sort(const void *larg, const void *rarg, void *data)
if (lstr)
ret = strcmp(lstr, rstr);
- if (lnum < rnum)
+ else if (lnum < rnum)
ret = -1;
else if (lnum > rnum)
ret = 1;
@@ -309,9 +324,9 @@ zfs_sort(const void *larg, const void *rarg, void *data)
}
int
-zfs_for_each(int argc, char **argv, boolean_t recurse, zfs_type_t types,
- zfs_sort_column_t *sortcol, zfs_proplist_t **proplist, zfs_iter_f callback,
- void *data, boolean_t args_can_be_paths)
+zfs_for_each(int argc, char **argv, int flags, zfs_type_t types,
+ zfs_sort_column_t *sortcol, zprop_list_t **proplist,
+ zfs_iter_f callback, void *data)
{
callback_data_t cb;
int ret = 0;
@@ -328,7 +343,7 @@ zfs_for_each(int argc, char **argv, boolean_t recurse, zfs_type_t types,
}
cb.cb_sortcol = sortcol;
- cb.cb_recurse = recurse;
+ cb.cb_flags = flags;
cb.cb_proplist = proplist;
cb.cb_types = types;
if ((cb.cb_avl = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) {
@@ -341,7 +356,7 @@ zfs_for_each(int argc, char **argv, boolean_t recurse, zfs_type_t types,
/*
* If given no arguments, iterate over all datasets.
*/
- cb.cb_recurse = 1;
+ cb.cb_flags |= ZFS_ITER_RECURSE;
ret = zfs_iter_root(g_zfs, zfs_callback, &cb);
} else {
int i;
@@ -354,14 +369,14 @@ zfs_for_each(int argc, char **argv, boolean_t recurse, zfs_type_t types,
* can take volumes as well.
*/
argtype = types;
- if (recurse) {
+ if (flags & ZFS_ITER_RECURSE) {
argtype |= ZFS_TYPE_FILESYSTEM;
if (types & ZFS_TYPE_SNAPSHOT)
argtype |= ZFS_TYPE_VOLUME;
}
for (i = 0; i < argc; i++) {
- if (args_can_be_paths) {
+ if (flags & ZFS_ITER_ARGS_CAN_BE_PATHS) {
zhp = zfs_path_to_zhandle(g_zfs, argv[i],
argtype);
} else {
diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h b/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h
index 1f0ce8e..76a1108 100644
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_iter.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef ZFS_ITER_H
#define ZFS_ITER_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -40,8 +38,12 @@ typedef struct zfs_sort_column {
boolean_t sc_reverse;
} zfs_sort_column_t;
-int zfs_for_each(int, char **, boolean_t, zfs_type_t, zfs_sort_column_t *,
- zfs_proplist_t **, zfs_iter_f, void *, boolean_t);
+#define ZFS_ITER_RECURSE (1 << 0)
+#define ZFS_ITER_ARGS_CAN_BE_PATHS (1 << 1)
+#define ZFS_ITER_PROP_LISTSNAPS (1 << 2)
+
+int zfs_for_each(int, char **, int options, zfs_type_t,
+ zfs_sort_column_t *, zprop_list_t **, zfs_iter_f, void *);
int zfs_add_sort_column(zfs_sort_column_t **, const char *, boolean_t);
void zfs_free_sort_columns(zfs_sort_column_t *);
diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
index de15b00..6f5f92e 100644
--- a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
+++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
@@ -20,18 +20,17 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <libgen.h>
#include <libintl.h>
#include <libuutil.h>
+#include <libnvpair.h>
#include <locale.h>
#include <stddef.h>
#include <stdio.h>
@@ -44,8 +43,10 @@
#include <sys/mnttab.h>
#include <sys/mount.h>
#include <sys/stat.h>
+#include <sys/avl.h>
#include <libzfs.h>
+#include <libuutil.h>
#include "zfs_iter.h"
#include "zfs_util.h"
@@ -53,6 +54,7 @@
libzfs_handle_t *g_zfs;
static FILE *mnttab_file;
+static char history_str[HIS_MAX_RECORD_LEN];
static int zfs_do_clone(int argc, char **argv);
static int zfs_do_create(int argc, char **argv);
@@ -64,6 +66,7 @@ static int zfs_do_mount(int argc, char **argv);
static int zfs_do_rename(int argc, char **argv);
static int zfs_do_rollback(int argc, char **argv);
static int zfs_do_set(int argc, char **argv);
+static int zfs_do_upgrade(int argc, char **argv);
static int zfs_do_snapshot(int argc, char **argv);
static int zfs_do_unmount(int argc, char **argv);
static int zfs_do_share(int argc, char **argv);
@@ -71,13 +74,16 @@ static int zfs_do_unshare(int argc, char **argv);
static int zfs_do_send(int argc, char **argv);
static int zfs_do_receive(int argc, char **argv);
static int zfs_do_promote(int argc, char **argv);
+static int zfs_do_allow(int argc, char **argv);
+static int zfs_do_unallow(int argc, char **argv);
static int zfs_do_jail(int argc, char **argv);
static int zfs_do_unjail(int argc, char **argv);
/*
- * These libumem hooks provide a reasonable set of defaults for the allocator's
- * debugging facilities.
+ * Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
*/
+
+#ifdef DEBUG
const char *
_umem_debug_init(void)
{
@@ -89,6 +95,7 @@ _umem_logging_init(void)
{
return ("fail,contents"); /* $UMEM_LOGGING setting */
}
+#endif
typedef enum {
HELP_CLONE,
@@ -96,6 +103,7 @@ typedef enum {
HELP_DESTROY,
HELP_GET,
HELP_INHERIT,
+ HELP_UPGRADE,
HELP_JAIL,
HELP_UNJAIL,
HELP_LIST,
@@ -109,7 +117,9 @@ typedef enum {
HELP_SHARE,
HELP_SNAPSHOT,
HELP_UNMOUNT,
- HELP_UNSHARE
+ HELP_UNSHARE,
+ HELP_ALLOW,
+ HELP_UNALLOW
} zfs_help_t;
typedef struct zfs_command {
@@ -142,18 +152,20 @@ static zfs_command_t command_table[] = {
{ "set", zfs_do_set, HELP_SET },
{ "get", zfs_do_get, HELP_GET },
{ "inherit", zfs_do_inherit, HELP_INHERIT },
+ { "upgrade", zfs_do_upgrade, HELP_UPGRADE },
{ NULL },
{ "mount", zfs_do_mount, HELP_MOUNT },
- { NULL },
{ "unmount", zfs_do_unmount, HELP_UNMOUNT },
- { NULL },
{ "share", zfs_do_share, HELP_SHARE },
- { NULL },
{ "unshare", zfs_do_unshare, HELP_UNSHARE },
{ NULL },
{ "send", zfs_do_send, HELP_SEND },
{ "receive", zfs_do_receive, HELP_RECEIVE },
{ NULL },
+ { "allow", zfs_do_allow, HELP_ALLOW },
+ { NULL },
+ { "unallow", zfs_do_unallow, HELP_UNALLOW },
+ { NULL },
{ "jail", zfs_do_jail, HELP_JAIL },
{ "unjail", zfs_do_unjail, HELP_UNJAIL },
};
@@ -167,39 +179,41 @@ get_usage(zfs_help_t idx)
{
switch (idx) {
case HELP_CLONE:
- return (gettext("\tclone <snapshot> <filesystem|volume>\n"));
+ return (gettext("\tclone [-p] [-o property=value] ... "
+ "<snapshot> <filesystem|volume>\n"));
case HELP_CREATE:
- return (gettext("\tcreate [[-o property=value] ... ] "
+ return (gettext("\tcreate [-p] [-o property=value] ... "
"<filesystem>\n"
- "\tcreate [-s] [-b blocksize] [[-o property=value] ...]\n"
- "\t -V <size> <volume>\n"));
+ "\tcreate [-ps] [-b blocksize] [-o property=value] ... "
+ "-V <size> <volume>\n"));
case HELP_DESTROY:
return (gettext("\tdestroy [-rRf] "
"<filesystem|volume|snapshot>\n"));
case HELP_GET:
- return (gettext("\tget [-rHp] [-o field[,field]...] "
- "[-s source[,source]...]\n"
- "\t <all | property[,property]...> "
+ return (gettext("\tget [-rHp] [-o field[,...]] "
+ "[-s source[,...]]\n"
+ "\t <\"all\" | property[,...]> "
"[filesystem|volume|snapshot] ...\n"));
case HELP_INHERIT:
return (gettext("\tinherit [-r] <property> "
- "<filesystem|volume> ...\n"));
+ "<filesystem|volume|snapshot> ...\n"));
+ case HELP_UPGRADE:
+ return (gettext("\tupgrade [-v]\n"
+ "\tupgrade [-r] [-V version] <-a | filesystem ...>\n"));
case HELP_JAIL:
return (gettext("\tjail <jailid> <filesystem>\n"));
case HELP_UNJAIL:
return (gettext("\tunjail <jailid> <filesystem>\n"));
case HELP_LIST:
- return (gettext("\tlist [-rH] [-o property[,property]...] "
- "[-t type[,type]...]\n"
- "\t [-s property [-s property]...]"
- " [-S property [-S property]...]\n"
- "\t [filesystem|volume|snapshot] ...\n"));
+ return (gettext("\tlist [-rH] [-o property[,...]] "
+ "[-t type[,...]] [-s property] ...\n"
+ "\t [-S property] ... "
+ "[filesystem|volume|snapshot] ...\n"));
case HELP_MOUNT:
return (gettext("\tmount\n"
- "\tmount [-o opts] [-O] -a\n"
- "\tmount [-o opts] [-O] <filesystem>\n"));
+ "\tmount [-vO] [-o opts] <-a | filesystem>\n"));
case HELP_PROMOTE:
- return (gettext("\tpromote <clone filesystem>\n"));
+ return (gettext("\tpromote <clone-filesystem>\n"));
case HELP_RECEIVE:
return (gettext("\treceive [-vnF] <filesystem|volume|"
"snapshot>\n"
@@ -207,26 +221,45 @@ get_usage(zfs_help_t idx)
case HELP_RENAME:
return (gettext("\trename <filesystem|volume|snapshot> "
"<filesystem|volume|snapshot>\n"
+ "\trename -p <filesystem|volume> <filesystem|volume>\n"
"\trename -r <snapshot> <snapshot>"));
case HELP_ROLLBACK:
return (gettext("\trollback [-rRf] <snapshot>\n"));
case HELP_SEND:
- return (gettext("\tsend [-i <snapshot>] <snapshot>\n"));
+ return (gettext("\tsend [-R] [-[iI] snapshot] <snapshot>\n"));
case HELP_SET:
return (gettext("\tset <property=value> "
- "<filesystem|volume> ...\n"));
+ "<filesystem|volume|snapshot> ...\n"));
case HELP_SHARE:
- return (gettext("\tshare -a\n"
- "\tshare <filesystem>\n"));
+ return (gettext("\tshare <-a | filesystem>\n"));
case HELP_SNAPSHOT:
- return (gettext("\tsnapshot [-r] "
- "<filesystem@name|volume@name>\n"));
+ return (gettext("\tsnapshot [-r] [-o property=value] ... "
+ "<filesystem@snapname|volume@snapname>\n"));
case HELP_UNMOUNT:
- return (gettext("\tunmount [-f] -a\n"
- "\tunmount [-f] <filesystem|mountpoint>\n"));
+ return (gettext("\tunmount [-f] "
+ "<-a | filesystem|mountpoint>\n"));
case HELP_UNSHARE:
- return (gettext("\tunshare [-f] -a\n"
- "\tunshare [-f] <filesystem|mountpoint>\n"));
+ return (gettext("\tunshare [-f] "
+ "<-a | filesystem|mountpoint>\n"));
+ case HELP_ALLOW:
+ return (gettext("\tallow [-ldug] "
+ "<\"everyone\"|user|group>[,...] <perm|@setname>[,...]\n"
+ "\t <filesystem|volume>\n"
+ "\tallow [-ld] -e <perm|@setname>[,...] "
+ "<filesystem|volume>\n"
+ "\tallow -c <perm|@setname>[,...] <filesystem|volume>\n"
+ "\tallow -s @setname <perm|@setname>[,...] "
+ "<filesystem|volume>\n"));
+ case HELP_UNALLOW:
+ return (gettext("\tunallow [-rldug] "
+ "<\"everyone\"|user|group>[,...]\n"
+ "\t [<perm|@setname>[,...]] <filesystem|volume>\n"
+ "\tunallow [-rld] -e [<perm|@setname>[,...]] "
+ "<filesystem|volume>\n"
+ "\tunallow [-r] -c [<perm|@setname>[,...]] "
+ "<filesystem|volume>\n"
+ "\tunallow [-r] -s @setname [<perm|@setname>[,...]] "
+ "<filesystem|volume>\n"));
}
abort();
@@ -250,20 +283,20 @@ safe_malloc(size_t size)
}
/*
- * Callback routinue that will print out information for each of the
+ * Callback routine that will print out information for each of
* the properties.
*/
-static zfs_prop_t
-usage_prop_cb(zfs_prop_t prop, void *cb)
+static int
+usage_prop_cb(int prop, void *cb)
{
FILE *fp = cb;
- (void) fprintf(fp, "\t%-13s ", zfs_prop_to_name(prop));
+ (void) fprintf(fp, "\t%-15s ", zfs_prop_to_name(prop));
if (zfs_prop_readonly(prop))
- (void) fprintf(fp, " NO ");
+ (void) fprintf(fp, " NO ");
else
- (void) fprintf(fp, " YES ");
+ (void) fprintf(fp, "YES ");
if (zfs_prop_inheritable(prop))
(void) fprintf(fp, " YES ");
@@ -275,7 +308,7 @@ usage_prop_cb(zfs_prop_t prop, void *cb)
else
(void) fprintf(fp, "%s\n", zfs_prop_values(prop));
- return (ZFS_PROP_CONT);
+ return (ZPROP_CONT);
}
/*
@@ -288,6 +321,7 @@ usage(boolean_t requested)
{
int i;
boolean_t show_properties = B_FALSE;
+ boolean_t show_permissions = B_FALSE;
FILE *fp = requested ? stdout : stderr;
if (current_command == NULL) {
@@ -318,29 +352,46 @@ usage(boolean_t requested)
strcmp(current_command->name, "list") == 0))
show_properties = B_TRUE;
+ if (current_command != NULL &&
+ (strcmp(current_command->name, "allow") == 0 ||
+ strcmp(current_command->name, "unallow") == 0))
+ show_permissions = B_TRUE;
+
if (show_properties) {
(void) fprintf(fp,
gettext("\nThe following properties are supported:\n"));
- (void) fprintf(fp, "\n\t%-13s %s %s %s\n\n",
+ (void) fprintf(fp, "\n\t%-14s %s %s %s\n\n",
"PROPERTY", "EDIT", "INHERIT", "VALUES");
/* Iterate over all properties */
- (void) zfs_prop_iter(usage_prop_cb, fp, B_FALSE);
+ (void) zprop_iter(usage_prop_cb, fp, B_FALSE, B_TRUE,
+ ZFS_TYPE_DATASET);
(void) fprintf(fp, gettext("\nSizes are specified in bytes "
"with standard units such as K, M, G, etc.\n"));
- (void) fprintf(fp, gettext("\n\nUser-defined properties can "
+ (void) fprintf(fp, gettext("\nUser-defined properties can "
"be specified by using a name containing a colon (:).\n"));
+
+ } else if (show_permissions) {
+ (void) fprintf(fp,
+ gettext("\nThe following permissions are supported:\n"));
+
+ zfs_deleg_permissions();
} else {
/*
* TRANSLATION NOTE:
* "zfs set|get" must not be localised this is the
* command name and arguments.
*/
+
(void) fprintf(fp,
gettext("\nFor the property list, run: zfs set|get\n"));
+
+ (void) fprintf(fp,
+ gettext("\nFor the delegated permission list, run:"
+ " zfs allow|unallow\n"));
}
/*
@@ -354,69 +405,143 @@ usage(boolean_t requested)
exit(requested ? 0 : 2);
}
+static int
+parseprop(nvlist_t *props)
+{
+ char *propname = optarg;
+ char *propval, *strval;
+
+ if ((propval = strchr(propname, '=')) == NULL) {
+ (void) fprintf(stderr, gettext("missing "
+ "'=' for -o option\n"));
+ return (-1);
+ }
+ *propval = '\0';
+ propval++;
+ if (nvlist_lookup_string(props, propname, &strval) == 0) {
+ (void) fprintf(stderr, gettext("property '%s' "
+ "specified multiple times\n"), propname);
+ return (-1);
+ }
+ if (nvlist_add_string(props, propname, propval) != 0) {
+ (void) fprintf(stderr, gettext("internal "
+ "error: out of memory\n"));
+ return (-1);
+ }
+ return (0);
+
+}
+
/*
- * zfs clone <fs, snap, vol> fs
+ * zfs clone [-p] [-o prop=value] ... <snap> <fs | vol>
*
* Given an existing dataset, create a writable copy whose initial contents
* are the same as the source. The newly created dataset maintains a
* dependency on the original; the original cannot be destroyed so long as
* the clone exists.
+ *
+ * The '-p' flag creates all the non-existing ancestors of the target first.
*/
static int
zfs_do_clone(int argc, char **argv)
{
- zfs_handle_t *zhp;
+ zfs_handle_t *zhp = NULL;
+ boolean_t parents = B_FALSE;
+ nvlist_t *props;
int ret;
+ int c;
+
+ if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
+ (void) fprintf(stderr, gettext("internal error: "
+ "out of memory\n"));
+ return (1);
+ }
/* check options */
- if (argc > 1 && argv[1][0] == '-') {
- (void) fprintf(stderr, gettext("invalid option '%c'\n"),
- argv[1][1]);
- usage(B_FALSE);
+ while ((c = getopt(argc, argv, "o:p")) != -1) {
+ switch (c) {
+ case 'o':
+ if (parseprop(props))
+ return (1);
+ break;
+ case 'p':
+ parents = B_TRUE;
+ break;
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ goto usage;
+ }
}
+ argc -= optind;
+ argv += optind;
+
/* check number of arguments */
- if (argc < 2) {
+ if (argc < 1) {
(void) fprintf(stderr, gettext("missing source dataset "
"argument\n"));
- usage(B_FALSE);
+ goto usage;
}
- if (argc < 3) {
+ if (argc < 2) {
(void) fprintf(stderr, gettext("missing target dataset "
"argument\n"));
- usage(B_FALSE);
+ goto usage;
}
- if (argc > 3) {
+ if (argc > 2) {
(void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
+ goto usage;
}
/* open the source dataset */
- if ((zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_SNAPSHOT)) == NULL)
+ if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
return (1);
+ if (parents && zfs_name_valid(argv[1], ZFS_TYPE_FILESYSTEM |
+ ZFS_TYPE_VOLUME)) {
+ /*
+ * Now create the ancestors of the target dataset. If the
+ * target already exists and '-p' option was used we should not
+ * complain.
+ */
+ if (zfs_dataset_exists(g_zfs, argv[1], ZFS_TYPE_FILESYSTEM |
+ ZFS_TYPE_VOLUME))
+ return (0);
+ if (zfs_create_ancestors(g_zfs, argv[1]) != 0)
+ return (1);
+ }
+
/* pass to libzfs */
- ret = zfs_clone(zhp, argv[2], NULL);
+ ret = zfs_clone(zhp, argv[1], props);
/* create the mountpoint if necessary */
if (ret == 0) {
- zfs_handle_t *clone = zfs_open(g_zfs, argv[2], ZFS_TYPE_ANY);
+ zfs_handle_t *clone;
+
+ clone = zfs_open(g_zfs, argv[1], ZFS_TYPE_DATASET);
if (clone != NULL) {
if ((ret = zfs_mount(clone, NULL, 0)) == 0)
ret = zfs_share(clone);
zfs_close(clone);
}
- zpool_log_history(g_zfs, argc, argv, argv[2], B_FALSE, B_FALSE);
}
zfs_close(zhp);
+ nvlist_free(props);
- return (ret == 0 ? 0 : 1);
+ return (!!ret);
+
+usage:
+ if (zhp)
+ zfs_close(zhp);
+ nvlist_free(props);
+ usage(B_FALSE);
+ return (-1);
}
/*
- * zfs create [-o prop=value] ... fs
- * zfs create [-s] [-b blocksize] [-o prop=value] ... -V vol size
+ * zfs create [-p] [-o prop=value] ... fs
+ * zfs create [-ps] [-b blocksize] [-o prop=value] ... -V vol size
*
* Create a new dataset. This command can be used to create filesystems
* and volumes. Snapshot creation is handled by 'zfs snapshot'.
@@ -424,7 +549,10 @@ zfs_do_clone(int argc, char **argv)
*
* The '-s' flag applies only to volumes, and indicates that we should not try
* to set the reservation for this volume. By default we set a reservation
- * equal to the size for any volume.
+ * equal to the size for any volume. For pools with SPA_VERSION >=
+ * SPA_VERSION_REFRESERVATION, we set a refreservation instead.
+ *
+ * The '-p' flag creates all the non-existing ancestors of the target first.
*/
static int
zfs_do_create(int argc, char **argv)
@@ -434,12 +562,12 @@ zfs_do_create(int argc, char **argv)
uint64_t volsize;
int c;
boolean_t noreserve = B_FALSE;
+ boolean_t bflag = B_FALSE;
+ boolean_t parents = B_FALSE;
int ret = 1;
- nvlist_t *props = NULL;
+ nvlist_t *props;
uint64_t intval;
- char *propname;
- char *propval = NULL;
- char *strval;
+ int canmount;
if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
(void) fprintf(stderr, gettext("internal error: "
@@ -448,7 +576,7 @@ zfs_do_create(int argc, char **argv)
}
/* check options */
- while ((c = getopt(argc, argv, ":V:b:so:")) != -1) {
+ while ((c = getopt(argc, argv, ":V:b:so:p")) != -1) {
switch (c) {
case 'V':
type = ZFS_TYPE_VOLUME;
@@ -468,7 +596,11 @@ zfs_do_create(int argc, char **argv)
}
volsize = intval;
break;
+ case 'p':
+ parents = B_TRUE;
+ break;
case 'b':
+ bflag = B_TRUE;
if (zfs_nicestrtonum(g_zfs, optarg, &intval) != 0) {
(void) fprintf(stderr, gettext("bad volume "
"block size '%s': %s\n"), optarg,
@@ -485,25 +617,8 @@ zfs_do_create(int argc, char **argv)
}
break;
case 'o':
- propname = optarg;
- if ((propval = strchr(propname, '=')) == NULL) {
- (void) fprintf(stderr, gettext("missing "
- "'=' for -o option\n"));
- goto error;
- }
- *propval = '\0';
- propval++;
- if (nvlist_lookup_string(props, propname,
- &strval) == 0) {
- (void) fprintf(stderr, gettext("property '%s' "
- "specified multiple times\n"), propname);
- goto error;
- }
- if (nvlist_add_string(props, propname, propval) != 0) {
- (void) fprintf(stderr, gettext("internal "
- "error: out of memory\n"));
+ if (parseprop(props))
goto error;
- }
break;
case 's':
noreserve = B_TRUE;
@@ -520,9 +635,9 @@ zfs_do_create(int argc, char **argv)
}
}
- if (noreserve && type != ZFS_TYPE_VOLUME) {
- (void) fprintf(stderr, gettext("'-s' can only be used when "
- "creating a volume\n"));
+ if ((bflag || noreserve) && type != ZFS_TYPE_VOLUME) {
+ (void) fprintf(stderr, gettext("'-s' and '-b' can only be "
+ "used when creating a volume\n"));
goto badusage;
}
@@ -540,46 +655,83 @@ zfs_do_create(int argc, char **argv)
goto badusage;
}
- if (type == ZFS_TYPE_VOLUME && !noreserve &&
- nvlist_lookup_string(props, zfs_prop_to_name(ZFS_PROP_RESERVATION),
- &strval) != 0) {
- if (nvlist_add_uint64(props,
- zfs_prop_to_name(ZFS_PROP_RESERVATION),
- volsize) != 0) {
- (void) fprintf(stderr, gettext("internal "
- "error: out of memory\n"));
- nvlist_free(props);
- return (1);
+ if (type == ZFS_TYPE_VOLUME && !noreserve) {
+ zpool_handle_t *zpool_handle;
+ uint64_t spa_version;
+ char *p;
+ zfs_prop_t resv_prop;
+ char *strval;
+
+ if (p = strchr(argv[0], '/'))
+ *p = '\0';
+ zpool_handle = zpool_open(g_zfs, argv[0]);
+ if (p != NULL)
+ *p = '/';
+ if (zpool_handle == NULL)
+ goto error;
+ spa_version = zpool_get_prop_int(zpool_handle,
+ ZPOOL_PROP_VERSION, NULL);
+ zpool_close(zpool_handle);
+ if (spa_version >= SPA_VERSION_REFRESERVATION)
+ resv_prop = ZFS_PROP_REFRESERVATION;
+ else
+ resv_prop = ZFS_PROP_RESERVATION;
+
+ if (nvlist_lookup_string(props, zfs_prop_to_name(resv_prop),
+ &strval) != 0) {
+ if (nvlist_add_uint64(props,
+ zfs_prop_to_name(resv_prop), volsize) != 0) {
+ (void) fprintf(stderr, gettext("internal "
+ "error: out of memory\n"));
+ nvlist_free(props);
+ return (1);
+ }
}
}
+ if (parents && zfs_name_valid(argv[0], type)) {
+ /*
+ * Now create the ancestors of target dataset. If the target
+ * already exists and '-p' option was used we should not
+ * complain.
+ */
+ if (zfs_dataset_exists(g_zfs, argv[0], type)) {
+ ret = 0;
+ goto error;
+ }
+ if (zfs_create_ancestors(g_zfs, argv[0]) != 0)
+ goto error;
+ }
+
/* pass to libzfs */
if (zfs_create(g_zfs, argv[0], type, props) != 0)
goto error;
- if (propval != NULL)
- *(propval - 1) = '=';
- zpool_log_history(g_zfs, argc + optind, argv - optind, argv[0],
- B_FALSE, B_FALSE);
-
- if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL)
+ if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL)
goto error;
+ /*
+ * if the user doesn't want the dataset automatically mounted,
+ * then skip the mount/share step
+ */
+
+ canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT);
/*
* Mount and/or share the new filesystem as appropriate. We provide a
* verbose error message to let the user know that their filesystem was
* in fact created, even if we failed to mount or share it.
*/
- if (zfs_mount(zhp, NULL, 0) != 0) {
- (void) fprintf(stderr, gettext("filesystem successfully "
- "created, but not mounted\n"));
- ret = 1;
- } else if (zfs_share(zhp) != 0) {
- (void) fprintf(stderr, gettext("filesystem successfully "
- "created, but not shared\n"));
- ret = 1;
- } else {
- ret = 0;
+ ret = 0;
+ if (canmount == ZFS_CANMOUNT_ON) {
+ if (zfs_mount(zhp, NULL, 0) != 0) {
+ (void) fprintf(stderr, gettext("filesystem "
+ "successfully created, but not mounted\n"));
+ ret = 1;
+ } else if (zfs_share(zhp) != 0) {
+ (void) fprintf(stderr, gettext("filesystem "
+ "successfully created, but not shared\n"));
+ ret = 1;
+ }
}
error:
@@ -789,7 +941,7 @@ zfs_do_destroy(int argc, char **argv)
int ret;
*cp = '\0';
- if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL)
+ if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL)
return (1);
*cp = '@';
cp++;
@@ -807,16 +959,13 @@ zfs_do_destroy(int argc, char **argv)
if (ret) {
(void) fprintf(stderr,
gettext("no snapshots destroyed\n"));
- } else {
- zpool_log_history(g_zfs, argc + optind, argv - optind,
- argv[0], B_FALSE, B_FALSE);
}
return (ret != 0);
}
/* Open the given dataset */
- if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL)
+ if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL)
return (1);
cb.cb_target = zhp;
@@ -849,7 +998,6 @@ zfs_do_destroy(int argc, char **argv)
return (1);
}
-
if (cb.cb_error ||
zfs_iter_dependents(zhp, B_FALSE, destroy_callback, &cb) != 0) {
zfs_close(zhp);
@@ -860,11 +1008,10 @@ zfs_do_destroy(int argc, char **argv)
* Do the real thing. The callback will close the handle regardless of
* whether it succeeds or not.
*/
+
if (destroy_callback(zhp, &cb) != 0)
return (1);
- zpool_log_history(g_zfs, argc + optind, argv - optind, argv[0],
- B_FALSE, B_FALSE);
return (0);
}
@@ -894,11 +1041,11 @@ static int
get_callback(zfs_handle_t *zhp, void *data)
{
char buf[ZFS_MAXPROPLEN];
- zfs_source_t sourcetype;
+ zprop_source_t sourcetype;
char source[ZFS_MAXNAMELEN];
- libzfs_get_cbdata_t *cbp = data;
+ zprop_get_cbdata_t *cbp = data;
nvlist_t *userprop = zfs_get_user_props(zhp);
- zfs_proplist_t *pl = cbp->cb_proplist;
+ zprop_list_t *pl = cbp->cb_proplist;
nvlist_t *propval;
char *strval;
char *sourceval;
@@ -912,7 +1059,7 @@ get_callback(zfs_handle_t *zhp, void *data)
pl == cbp->cb_proplist)
continue;
- if (pl->pl_prop != ZFS_PROP_INVAL) {
+ if (pl->pl_prop != ZPROP_INVAL) {
if (zfs_prop_get(zhp, pl->pl_prop, buf,
sizeof (buf), &sourcetype, source,
sizeof (source),
@@ -920,17 +1067,17 @@ get_callback(zfs_handle_t *zhp, void *data)
if (pl->pl_all)
continue;
if (!zfs_prop_valid_for_type(pl->pl_prop,
- ZFS_TYPE_ANY)) {
+ ZFS_TYPE_DATASET)) {
(void) fprintf(stderr,
gettext("No such property '%s'\n"),
zfs_prop_to_name(pl->pl_prop));
continue;
}
- sourcetype = ZFS_SRC_NONE;
+ sourcetype = ZPROP_SRC_NONE;
(void) strlcpy(buf, "-", sizeof (buf));
}
- libzfs_print_one_property(zfs_get_name(zhp), cbp,
+ zprop_print_one_property(zfs_get_name(zhp), cbp,
zfs_prop_to_name(pl->pl_prop),
buf, sourcetype, source);
} else {
@@ -938,25 +1085,25 @@ get_callback(zfs_handle_t *zhp, void *data)
pl->pl_user_prop, &propval) != 0) {
if (pl->pl_all)
continue;
- sourcetype = ZFS_SRC_NONE;
+ sourcetype = ZPROP_SRC_NONE;
strval = "-";
} else {
verify(nvlist_lookup_string(propval,
- ZFS_PROP_VALUE, &strval) == 0);
+ ZPROP_VALUE, &strval) == 0);
verify(nvlist_lookup_string(propval,
- ZFS_PROP_SOURCE, &sourceval) == 0);
+ ZPROP_SOURCE, &sourceval) == 0);
if (strcmp(sourceval,
zfs_get_name(zhp)) == 0) {
- sourcetype = ZFS_SRC_LOCAL;
+ sourcetype = ZPROP_SRC_LOCAL;
} else {
- sourcetype = ZFS_SRC_INHERITED;
+ sourcetype = ZPROP_SRC_INHERITED;
(void) strlcpy(source,
sourceval, sizeof (source));
}
}
- libzfs_print_one_property(zfs_get_name(zhp), cbp,
+ zprop_print_one_property(zfs_get_name(zhp), cbp,
pl->pl_user_prop, strval, sourcetype,
source);
}
@@ -968,21 +1115,21 @@ get_callback(zfs_handle_t *zhp, void *data)
static int
zfs_do_get(int argc, char **argv)
{
- libzfs_get_cbdata_t cb = { 0 };
- boolean_t recurse = B_FALSE;
- int i, c;
+ zprop_get_cbdata_t cb = { 0 };
+ int i, c, flags = 0;
char *value, *fields;
int ret;
- zfs_proplist_t fake_name = { 0 };
+ zprop_list_t fake_name = { 0 };
/*
* Set up default columns and sources.
*/
- cb.cb_sources = ZFS_SRC_ALL;
+ cb.cb_sources = ZPROP_SRC_ALL;
cb.cb_columns[0] = GET_COL_NAME;
cb.cb_columns[1] = GET_COL_PROPERTY;
cb.cb_columns[2] = GET_COL_VALUE;
cb.cb_columns[3] = GET_COL_SOURCE;
+ cb.cb_type = ZFS_TYPE_DATASET;
/* check options */
while ((c = getopt(argc, argv, ":o:s:rHp")) != -1) {
@@ -991,7 +1138,7 @@ zfs_do_get(int argc, char **argv)
cb.cb_literal = B_TRUE;
break;
case 'r':
- recurse = B_TRUE;
+ flags |= ZFS_ITER_RECURSE;
break;
case 'H':
cb.cb_scripted = B_TRUE;
@@ -1053,19 +1200,19 @@ zfs_do_get(int argc, char **argv)
switch (getsubopt(&optarg, source_subopts,
&value)) {
case 0:
- cb.cb_sources |= ZFS_SRC_LOCAL;
+ cb.cb_sources |= ZPROP_SRC_LOCAL;
break;
case 1:
- cb.cb_sources |= ZFS_SRC_DEFAULT;
+ cb.cb_sources |= ZPROP_SRC_DEFAULT;
break;
case 2:
- cb.cb_sources |= ZFS_SRC_INHERITED;
+ cb.cb_sources |= ZPROP_SRC_INHERITED;
break;
case 3:
- cb.cb_sources |= ZFS_SRC_TEMPORARY;
+ cb.cb_sources |= ZPROP_SRC_TEMPORARY;
break;
case 4:
- cb.cb_sources |= ZFS_SRC_NONE;
+ cb.cb_sources |= ZPROP_SRC_NONE;
break;
default:
(void) fprintf(stderr,
@@ -1094,7 +1241,8 @@ zfs_do_get(int argc, char **argv)
fields = argv[0];
- if (zfs_get_proplist(g_zfs, fields, &cb.cb_proplist) != 0)
+ if (zprop_get_list(g_zfs, fields, &cb.cb_proplist, ZFS_TYPE_DATASET)
+ != 0)
usage(B_FALSE);
argc--;
@@ -1118,13 +1266,13 @@ zfs_do_get(int argc, char **argv)
cb.cb_first = B_TRUE;
/* run for each object */
- ret = zfs_for_each(argc, argv, recurse, ZFS_TYPE_ANY, NULL,
- &cb.cb_proplist, get_callback, &cb, B_FALSE);
+ ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET, NULL,
+ &cb.cb_proplist, get_callback, &cb);
if (cb.cb_proplist == &fake_name)
- zfs_free_proplist(fake_name.pl_next);
+ zprop_free_list(fake_name.pl_next);
else
- zfs_free_proplist(cb.cb_proplist);
+ zprop_free_list(cb.cb_proplist);
return (ret);
}
@@ -1140,37 +1288,46 @@ zfs_do_get(int argc, char **argv)
* useful for setting a property on a hierarchy-wide basis, regardless of any
* local modifications for each dataset.
*/
-typedef struct inherit_cbdata {
- char *cb_propname;
- boolean_t cb_any_successful;
-} inherit_cbdata_t;
static int
-inherit_callback(zfs_handle_t *zhp, void *data)
+inherit_recurse_cb(zfs_handle_t *zhp, void *data)
{
- inherit_cbdata_t *cbp = data;
- int ret;
+ char *propname = data;
+ zfs_prop_t prop = zfs_name_to_prop(propname);
- ret = zfs_prop_inherit(zhp, cbp->cb_propname);
- if (ret == 0)
- cbp->cb_any_successful = B_TRUE;
- return (ret != 0);
+ /*
+ * If we're doing it recursively, then ignore properties that
+ * are not valid for this type of dataset.
+ */
+ if (prop != ZPROP_INVAL &&
+ !zfs_prop_valid_for_type(prop, zfs_get_type(zhp)))
+ return (0);
+
+ return (zfs_prop_inherit(zhp, propname) != 0);
+}
+
+static int
+inherit_cb(zfs_handle_t *zhp, void *data)
+{
+ char *propname = data;
+
+ return (zfs_prop_inherit(zhp, propname) != 0);
}
static int
zfs_do_inherit(int argc, char **argv)
{
- boolean_t recurse = B_FALSE;
int c;
zfs_prop_t prop;
- inherit_cbdata_t cb;
+ char *propname;
int ret;
+ int flags = 0;
/* check options */
while ((c = getopt(argc, argv, "r")) != -1) {
switch (c) {
case 'r':
- recurse = B_TRUE;
+ flags |= ZFS_ITER_RECURSE;
break;
case '?':
default:
@@ -1193,42 +1350,265 @@ zfs_do_inherit(int argc, char **argv)
usage(B_FALSE);
}
- cb.cb_propname = argv[0];
+ propname = argv[0];
argc--;
argv++;
- if ((prop = zfs_name_to_prop(cb.cb_propname)) != ZFS_PROP_INVAL) {
+ if ((prop = zfs_name_to_prop(propname)) != ZPROP_INVAL) {
if (zfs_prop_readonly(prop)) {
(void) fprintf(stderr, gettext(
"%s property is read-only\n"),
- cb.cb_propname);
+ propname);
return (1);
}
if (!zfs_prop_inheritable(prop)) {
(void) fprintf(stderr, gettext("'%s' property cannot "
- "be inherited\n"), cb.cb_propname);
+ "be inherited\n"), propname);
if (prop == ZFS_PROP_QUOTA ||
- prop == ZFS_PROP_RESERVATION)
+ prop == ZFS_PROP_RESERVATION ||
+ prop == ZFS_PROP_REFQUOTA ||
+ prop == ZFS_PROP_REFRESERVATION)
(void) fprintf(stderr, gettext("use 'zfs set "
- "%s=none' to clear\n"), cb.cb_propname);
+ "%s=none' to clear\n"), propname);
return (1);
}
- } else if (!zfs_prop_user(cb.cb_propname)) {
- (void) fprintf(stderr, gettext(
- "invalid property '%s'\n"),
- cb.cb_propname);
+ } else if (!zfs_prop_user(propname)) {
+ (void) fprintf(stderr, gettext("invalid property '%s'\n"),
+ propname);
usage(B_FALSE);
}
- cb.cb_any_successful = B_FALSE;
+ if (flags & ZFS_ITER_RECURSE) {
+ ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET,
+ NULL, NULL, inherit_recurse_cb, propname);
+ } else {
+ ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET,
+ NULL, NULL, inherit_cb, propname);
+ }
+
+ return (ret);
+}
+
+typedef struct upgrade_cbdata {
+ uint64_t cb_numupgraded;
+ uint64_t cb_numsamegraded;
+ uint64_t cb_numfailed;
+ uint64_t cb_version;
+ boolean_t cb_newer;
+ boolean_t cb_foundone;
+ char cb_lastfs[ZFS_MAXNAMELEN];
+} upgrade_cbdata_t;
+
+static int
+same_pool(zfs_handle_t *zhp, const char *name)
+{
+ int len1 = strcspn(name, "/@");
+ const char *zhname = zfs_get_name(zhp);
+ int len2 = strcspn(zhname, "/@");
+
+ if (len1 != len2)
+ return (B_FALSE);
+ return (strncmp(name, zhname, len1) == 0);
+}
+
+static int
+upgrade_list_callback(zfs_handle_t *zhp, void *data)
+{
+ upgrade_cbdata_t *cb = data;
+ int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
+
+ /* list if it's old/new */
+ if ((!cb->cb_newer && version < ZPL_VERSION) ||
+ (cb->cb_newer && version > ZPL_VERSION)) {
+ char *str;
+ if (cb->cb_newer) {
+ str = gettext("The following filesystems are "
+ "formatted using a newer software version and\n"
+ "cannot be accessed on the current system.\n\n");
+ } else {
+ str = gettext("The following filesystems are "
+ "out of date, and can be upgraded. After being\n"
+ "upgraded, these filesystems (and any 'zfs send' "
+ "streams generated from\n"
+ "subsequent snapshots) will no longer be "
+ "accessible by older software versions.\n\n");
+ }
+
+ if (!cb->cb_foundone) {
+ (void) puts(str);
+ (void) printf(gettext("VER FILESYSTEM\n"));
+ (void) printf(gettext("--- ------------\n"));
+ cb->cb_foundone = B_TRUE;
+ }
+
+ (void) printf("%2u %s\n", version, zfs_get_name(zhp));
+ }
+
+ return (0);
+}
+
+static int
+upgrade_set_callback(zfs_handle_t *zhp, void *data)
+{
+ upgrade_cbdata_t *cb = data;
+ int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
+
+ if (cb->cb_version >= ZPL_VERSION_FUID) {
+ int spa_version;
+
+ if (zfs_spa_version(zhp, &spa_version) < 0)
+ return (-1);
+
+ if (spa_version < SPA_VERSION_FUID) {
+ /* can't upgrade */
+ (void) printf(gettext("%s: can not be upgraded; "
+ "the pool version needs to first be upgraded\nto "
+ "version %d\n\n"),
+ zfs_get_name(zhp), SPA_VERSION_FUID);
+ cb->cb_numfailed++;
+ return (0);
+ }
+ }
+
+ /* upgrade */
+ if (version < cb->cb_version) {
+ char verstr[16];
+ (void) snprintf(verstr, sizeof (verstr),
+ "%llu", cb->cb_version);
+ if (cb->cb_lastfs[0] && !same_pool(zhp, cb->cb_lastfs)) {
+ /*
+ * If they did "zfs upgrade -a", then we could
+ * be doing ioctls to different pools. We need
+ * to log this history once to each pool.
+ */
+ verify(zpool_stage_history(g_zfs, history_str) == 0);
+ }
+ if (zfs_prop_set(zhp, "version", verstr) == 0)
+ cb->cb_numupgraded++;
+ else
+ cb->cb_numfailed++;
+ (void) strcpy(cb->cb_lastfs, zfs_get_name(zhp));
+ } else if (version > cb->cb_version) {
+ /* can't downgrade */
+ (void) printf(gettext("%s: can not be downgraded; "
+ "it is already at version %u\n"),
+ zfs_get_name(zhp), version);
+ cb->cb_numfailed++;
+ } else {
+ cb->cb_numsamegraded++;
+ }
+ return (0);
+}
- ret = zfs_for_each(argc, argv, recurse,
- ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, NULL, NULL,
- inherit_callback, &cb, B_FALSE);
+/*
+ * zfs upgrade
+ * zfs upgrade -v
+ * zfs upgrade [-r] [-V <version>] <-a | filesystem>
+ */
+static int
+zfs_do_upgrade(int argc, char **argv)
+{
+ boolean_t all = B_FALSE;
+ boolean_t showversions = B_FALSE;
+ int ret;
+ upgrade_cbdata_t cb = { 0 };
+ char c;
+ int flags = ZFS_ITER_ARGS_CAN_BE_PATHS;
- if (cb.cb_any_successful) {
- zpool_log_history(g_zfs, argc + optind + 1, argv - optind - 1,
- argv[0], B_FALSE, B_FALSE);
+ /* check options */
+ while ((c = getopt(argc, argv, "rvV:a")) != -1) {
+ switch (c) {
+ case 'r':
+ flags |= ZFS_ITER_RECURSE;
+ break;
+ case 'v':
+ showversions = B_TRUE;
+ break;
+ case 'V':
+ if (zfs_prop_string_to_index(ZFS_PROP_VERSION,
+ optarg, &cb.cb_version) != 0) {
+ (void) fprintf(stderr,
+ gettext("invalid version %s\n"), optarg);
+ usage(B_FALSE);
+ }
+ break;
+ case 'a':
+ all = B_TRUE;
+ break;
+ case '?':
+ default:
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if ((!all && !argc) && ((flags & ZFS_ITER_RECURSE) | cb.cb_version))
+ usage(B_FALSE);
+ if (showversions && (flags & ZFS_ITER_RECURSE || all ||
+ cb.cb_version || argc))
+ usage(B_FALSE);
+ if ((all || argc) && (showversions))
+ usage(B_FALSE);
+ if (all && argc)
+ usage(B_FALSE);
+
+ if (showversions) {
+ /* Show info on available versions. */
+ (void) printf(gettext("The following filesystem versions are "
+ "supported:\n\n"));
+ (void) printf(gettext("VER DESCRIPTION\n"));
+ (void) printf("--- -----------------------------------------"
+ "---------------\n");
+ (void) printf(gettext(" 1 Initial ZFS filesystem version\n"));
+ (void) printf(gettext(" 2 Enhanced directory entries\n"));
+ (void) printf(gettext(" 3 Case insensitive and File system "
+ "unique identifer (FUID)\n"));
+ (void) printf(gettext("\nFor more information on a particular "
+ "version, including supported releases, see:\n\n"));
+ (void) printf("http://www.opensolaris.org/os/community/zfs/"
+ "version/zpl/N\n\n");
+ (void) printf(gettext("Where 'N' is the version number.\n"));
+ ret = 0;
+ } else if (argc || all) {
+ /* Upgrade filesystems */
+ if (cb.cb_version == 0)
+ cb.cb_version = ZPL_VERSION;
+ ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_FILESYSTEM,
+ NULL, NULL, upgrade_set_callback, &cb);
+ (void) printf(gettext("%llu filesystems upgraded\n"),
+ cb.cb_numupgraded);
+ if (cb.cb_numsamegraded) {
+ (void) printf(gettext("%llu filesystems already at "
+ "this version\n"),
+ cb.cb_numsamegraded);
+ }
+ if (cb.cb_numfailed != 0)
+ ret = 1;
+ } else {
+ /* List old-version filesytems */
+ boolean_t found;
+ (void) printf(gettext("This system is currently running "
+ "ZFS filesystem version %llu.\n\n"), ZPL_VERSION);
+
+ flags |= ZFS_ITER_RECURSE;
+ ret = zfs_for_each(0, NULL, flags, ZFS_TYPE_FILESYSTEM,
+ NULL, NULL, upgrade_list_callback, &cb);
+
+ found = cb.cb_foundone;
+ cb.cb_foundone = B_FALSE;
+ cb.cb_newer = B_TRUE;
+
+ ret = zfs_for_each(0, NULL, flags, ZFS_TYPE_FILESYSTEM,
+ NULL, NULL, upgrade_list_callback, &cb);
+
+ if (!cb.cb_foundone && !found) {
+ (void) printf(gettext("All filesystems are "
+ "formatted with the current version.\n"));
+ }
}
return (ret);
@@ -1240,7 +1620,7 @@ zfs_do_inherit(int argc, char **argv)
* <dataset> ...
*
* -r Recurse over all children
- * -H Scripted mode; elide headers and separate colums by tabs
+ * -H Scripted mode; elide headers and separate columns by tabs
* -o Control which fields to display.
* -t Control which object types to display.
* -s Specify sort columns, descending order.
@@ -1253,14 +1633,14 @@ zfs_do_inherit(int argc, char **argv)
typedef struct list_cbdata {
boolean_t cb_first;
boolean_t cb_scripted;
- zfs_proplist_t *cb_proplist;
+ zprop_list_t *cb_proplist;
} list_cbdata_t;
/*
* Given a list of columns to display, output appropriate headers for each one.
*/
static void
-print_header(zfs_proplist_t *pl)
+print_header(zprop_list_t *pl)
{
char headerbuf[ZFS_MAXPROPLEN];
const char *header;
@@ -1276,7 +1656,7 @@ print_header(zfs_proplist_t *pl)
}
right_justify = B_FALSE;
- if (pl->pl_prop != ZFS_PROP_INVAL) {
+ if (pl->pl_prop != ZPROP_INVAL) {
header = zfs_prop_column_name(pl->pl_prop);
right_justify = zfs_prop_align_right(pl->pl_prop);
} else {
@@ -1302,7 +1682,7 @@ print_header(zfs_proplist_t *pl)
* to the described layout.
*/
static void
-print_dataset(zfs_handle_t *zhp, zfs_proplist_t *pl, int scripted)
+print_dataset(zfs_handle_t *zhp, zprop_list_t *pl, boolean_t scripted)
{
boolean_t first = B_TRUE;
char property[ZFS_MAXPROPLEN];
@@ -1323,7 +1703,7 @@ print_dataset(zfs_handle_t *zhp, zfs_proplist_t *pl, int scripted)
}
right_justify = B_FALSE;
- if (pl->pl_prop != ZFS_PROP_INVAL) {
+ if (pl->pl_prop != ZPROP_INVAL) {
if (zfs_prop_get(zhp, pl->pl_prop, property,
sizeof (property), NULL, NULL, 0, B_FALSE) != 0)
propstr = "-";
@@ -1337,7 +1717,7 @@ print_dataset(zfs_handle_t *zhp, zfs_proplist_t *pl, int scripted)
propstr = "-";
else
verify(nvlist_lookup_string(propval,
- ZFS_PROP_VALUE, &propstr) == 0);
+ ZPROP_VALUE, &propstr) == 0);
}
width = pl->pl_width;
@@ -1381,18 +1761,17 @@ static int
zfs_do_list(int argc, char **argv)
{
int c;
- boolean_t recurse = B_FALSE;
boolean_t scripted = B_FALSE;
static char default_fields[] =
"name,used,available,referenced,mountpoint";
- int types = ZFS_TYPE_ANY;
+ int types = ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME;
+ boolean_t types_specified = B_FALSE;
char *fields = NULL;
- char *basic_fields = default_fields;
list_cbdata_t cb = { 0 };
char *value;
int ret;
- char *type_subopts[] = { "filesystem", "volume", "snapshot", NULL };
zfs_sort_column_t *sortcol = NULL;
+ int flags = ZFS_ITER_PROP_LISTSNAPS | ZFS_ITER_ARGS_CAN_BE_PATHS;
/* check options */
while ((c = getopt(argc, argv, ":o:rt:Hs:S:")) != -1) {
@@ -1401,7 +1780,7 @@ zfs_do_list(int argc, char **argv)
fields = optarg;
break;
case 'r':
- recurse = B_TRUE;
+ flags |= ZFS_ITER_RECURSE;
break;
case 'H':
scripted = B_TRUE;
@@ -1424,7 +1803,12 @@ zfs_do_list(int argc, char **argv)
break;
case 't':
types = 0;
+ types_specified = B_TRUE;
+ flags &= ~ZFS_ITER_PROP_LISTSNAPS;
while (*optarg != '\0') {
+ static char *type_subopts[] = { "filesystem",
+ "volume", "snapshot", "all", NULL };
+
switch (getsubopt(&optarg, type_subopts,
&value)) {
case 0:
@@ -1436,6 +1820,10 @@ zfs_do_list(int argc, char **argv)
case 2:
types |= ZFS_TYPE_SNAPSHOT;
break;
+ case 3:
+ types = ZFS_TYPE_DATASET;
+ break;
+
default:
(void) fprintf(stderr,
gettext("invalid type '%s'\n"),
@@ -1460,35 +1848,46 @@ zfs_do_list(int argc, char **argv)
argv += optind;
if (fields == NULL)
- fields = basic_fields;
+ fields = default_fields;
/*
- * If the user specifies '-o all', the zfs_get_proplist() doesn't
+ * If "-o space" and no types were specified, don't display snapshots.
+ */
+ if (strcmp(fields, "space") == 0 && types_specified == B_FALSE)
+ types &= ~ZFS_TYPE_SNAPSHOT;
+
+ /*
+ * If the user specifies '-o all', the zprop_get_list() doesn't
* normally include the name of the dataset. For 'zfs list', we always
* want this property to be first.
*/
- if (zfs_get_proplist(g_zfs, fields, &cb.cb_proplist) != 0)
+ if (zprop_get_list(g_zfs, fields, &cb.cb_proplist, ZFS_TYPE_DATASET)
+ != 0)
usage(B_FALSE);
cb.cb_scripted = scripted;
cb.cb_first = B_TRUE;
- ret = zfs_for_each(argc, argv, recurse, types, sortcol, &cb.cb_proplist,
- list_callback, &cb, B_TRUE);
+ ret = zfs_for_each(argc, argv, flags, types, sortcol, &cb.cb_proplist,
+ list_callback, &cb);
- zfs_free_proplist(cb.cb_proplist);
+ zprop_free_list(cb.cb_proplist);
zfs_free_sort_columns(sortcol);
- if (ret == 0 && cb.cb_first)
+ if (ret == 0 && cb.cb_first && !cb.cb_scripted)
(void) printf(gettext("no datasets available\n"));
return (ret);
}
/*
- * zfs rename [-r] <fs | snap | vol> <fs | snap | vol>
+ * zfs rename <fs | snap | vol> <fs | snap | vol>
+ * zfs rename -p <fs | vol> <fs | vol>
+ * zfs rename -r <snap> <snap>
*
* Renames the given dataset to another of the same type.
+ *
+ * The '-p' flag creates all the non-existing ancestors of the target first.
*/
/* ARGSUSED */
static int
@@ -1497,13 +1896,17 @@ zfs_do_rename(int argc, char **argv)
zfs_handle_t *zhp;
int c;
int ret;
- int recurse = 0;
+ boolean_t recurse = B_FALSE;
+ boolean_t parents = B_FALSE;
/* check options */
- while ((c = getopt(argc, argv, "r")) != -1) {
+ while ((c = getopt(argc, argv, "pr")) != -1) {
switch (c) {
+ case 'p':
+ parents = B_TRUE;
+ break;
case 'r':
- recurse = 1;
+ recurse = B_TRUE;
break;
case '?':
default:
@@ -1532,20 +1935,30 @@ zfs_do_rename(int argc, char **argv)
usage(B_FALSE);
}
+ if (recurse && parents) {
+ (void) fprintf(stderr, gettext("-p and -r options are mutually "
+ "exclusive\n"));
+ usage(B_FALSE);
+ }
+
if (recurse && strchr(argv[0], '@') == 0) {
(void) fprintf(stderr, gettext("source dataset for recursive "
"rename must be a snapshot\n"));
usage(B_FALSE);
}
- if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL)
+ if ((zhp = zfs_open(g_zfs, argv[0], parents ? ZFS_TYPE_FILESYSTEM |
+ ZFS_TYPE_VOLUME : ZFS_TYPE_DATASET)) == NULL)
return (1);
- ret = (zfs_rename(zhp, argv[1], recurse) != 0);
+ /* If we were asked and the name looks good, try to create ancestors. */
+ if (parents && zfs_name_valid(argv[1], zfs_get_type(zhp)) &&
+ zfs_create_ancestors(g_zfs, argv[1]) != 0) {
+ zfs_close(zhp);
+ return (1);
+ }
- if (!ret)
- zpool_log_history(g_zfs, argc + optind, argv - optind, argv[1],
- B_FALSE, B_FALSE);
+ ret = (zfs_rename(zhp, argv[1], recurse) != 0);
zfs_close(zhp);
return (ret);
@@ -1587,19 +2000,17 @@ zfs_do_promote(int argc, char **argv)
ret = (zfs_promote(zhp) != 0);
- if (!ret)
- zpool_log_history(g_zfs, argc, argv, argv[1], B_FALSE, B_FALSE);
zfs_close(zhp);
return (ret);
}
/*
- * zfs rollback [-rfR] <snapshot>
+ * zfs rollback [-rRf] <snapshot>
*
* -r Delete any intervening snapshots before doing rollback
* -R Delete any snapshots and their clones
- * -f Force unmount filesystems, even if they are in use.
+ * -f ignored for backwards compatability
*
* Given a filesystem, rollback to a specific snapshot, discarding any changes
* since then and making it the active dataset. If more recent snapshots exist,
@@ -1686,18 +2097,15 @@ zfs_do_rollback(int argc, char **argv)
{
int ret;
int c;
+ boolean_t force = B_FALSE;
rollback_cbdata_t cb = { 0 };
zfs_handle_t *zhp, *snap;
char parentname[ZFS_MAXNAMELEN];
char *delim;
- int force = 0;
/* check options */
- while ((c = getopt(argc, argv, "rfR")) != -1) {
+ while ((c = getopt(argc, argv, "rRf")) != -1) {
switch (c) {
- case 'f':
- force = 1;
- break;
case 'r':
cb.cb_recurse = 1;
break;
@@ -1705,6 +2113,9 @@ zfs_do_rollback(int argc, char **argv)
cb.cb_recurse = 1;
cb.cb_doclones = 1;
break;
+ case 'f':
+ force = B_TRUE;
+ break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
@@ -1733,7 +2144,7 @@ zfs_do_rollback(int argc, char **argv)
(void) strlcpy(parentname, argv[0], sizeof (parentname));
verify((delim = strrchr(parentname, '@')) != NULL);
*delim = '\0';
- if ((zhp = zfs_open(g_zfs, parentname, ZFS_TYPE_ANY)) == NULL) {
+ if ((zhp = zfs_open(g_zfs, parentname, ZFS_TYPE_DATASET)) == NULL) {
zfs_close(snap);
return (1);
}
@@ -1757,11 +2168,6 @@ zfs_do_rollback(int argc, char **argv)
*/
ret = zfs_rollback(zhp, snap, force);
- if (!ret) {
- zpool_log_history(g_zfs, argc + optind, argv - optind, argv[0],
- B_FALSE, B_FALSE);
- }
-
out:
zfs_close(snap);
zfs_close(zhp);
@@ -1780,7 +2186,6 @@ out:
typedef struct set_cbdata {
char *cb_propname;
char *cb_value;
- boolean_t cb_any_successful;
} set_cbdata_t;
static int
@@ -1801,7 +2206,6 @@ set_callback(zfs_handle_t *zhp, void *data)
}
return (1);
}
- cbp->cb_any_successful = B_TRUE;
return (0);
}
@@ -1831,7 +2235,8 @@ zfs_do_set(int argc, char **argv)
/* validate property=value argument */
cb.cb_propname = argv[1];
- if ((cb.cb_value = strchr(cb.cb_propname, '=')) == NULL) {
+ if (((cb.cb_value = strchr(cb.cb_propname, '=')) == NULL) ||
+ (cb.cb_value[1] == '\0')) {
(void) fprintf(stderr, gettext("missing value in "
"property=value argument\n"));
usage(B_FALSE);
@@ -1839,7 +2244,6 @@ zfs_do_set(int argc, char **argv)
*cb.cb_value = '\0';
cb.cb_value++;
- cb.cb_any_successful = B_FALSE;
if (*cb.cb_propname == '\0') {
(void) fprintf(stderr,
@@ -1847,40 +2251,46 @@ zfs_do_set(int argc, char **argv)
usage(B_FALSE);
}
- ret = zfs_for_each(argc - 2, argv + 2, B_FALSE,
- ZFS_TYPE_ANY, NULL, NULL, set_callback, &cb, B_FALSE);
-
- if (cb.cb_any_successful) {
- *(cb.cb_value - 1) = '=';
- zpool_log_history(g_zfs, argc, argv, argv[2], B_FALSE, B_FALSE);
- }
+ ret = zfs_for_each(argc - 2, argv + 2, NULL,
+ ZFS_TYPE_DATASET, NULL, NULL, set_callback, &cb);
return (ret);
}
/*
- * zfs snapshot [-r] <fs@snap>
+ * zfs snapshot [-r] [-o prop=value] ... <fs@snap>
*
* Creates a snapshot with the given name. While functionally equivalent to
- * 'zfs create', it is a separate command to diffferentiate intent.
+ * 'zfs create', it is a separate command to differentiate intent.
*/
static int
zfs_do_snapshot(int argc, char **argv)
{
- int recursive = B_FALSE;
+ boolean_t recursive = B_FALSE;
int ret;
char c;
+ nvlist_t *props;
+
+ if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
+ (void) fprintf(stderr, gettext("internal error: "
+ "out of memory\n"));
+ return (1);
+ }
/* check options */
- while ((c = getopt(argc, argv, ":r")) != -1) {
+ while ((c = getopt(argc, argv, "ro:")) != -1) {
switch (c) {
+ case 'o':
+ if (parseprop(props))
+ return (1);
+ break;
case 'r':
recursive = B_TRUE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(B_FALSE);
+ goto usage;
}
}
@@ -1890,25 +2300,28 @@ zfs_do_snapshot(int argc, char **argv)
/* check number of arguments */
if (argc < 1) {
(void) fprintf(stderr, gettext("missing snapshot argument\n"));
- usage(B_FALSE);
+ goto usage;
}
if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
- usage(B_FALSE);
+ goto usage;
}
- ret = zfs_snapshot(g_zfs, argv[0], recursive);
+ ret = zfs_snapshot(g_zfs, argv[0], recursive, props);
+ nvlist_free(props);
if (ret && recursive)
(void) fprintf(stderr, gettext("no snapshots were created\n"));
- if (!ret) {
- zpool_log_history(g_zfs, argc + optind, argv - optind, argv[0],
- B_FALSE, B_FALSE);
- }
return (ret != 0);
+
+usage:
+ nvlist_free(props);
+ usage(B_FALSE);
+ return (-1);
}
/*
- * zfs send [-i <@snap>] <fs@snap>
+ * zfs send [-v] -R [-i|-I <@snap>] <fs@snap>
+ * zfs send [-v] [-i|-I <@snap>] <fs@snap>
*
* Send a backup stream to stdout.
*/
@@ -1916,18 +2329,35 @@ static int
zfs_do_send(int argc, char **argv)
{
char *fromname = NULL;
+ char *toname = NULL;
char *cp;
zfs_handle_t *zhp;
+ boolean_t doall = B_FALSE;
+ boolean_t replicate = B_FALSE;
+ boolean_t fromorigin = B_FALSE;
+ boolean_t verbose = B_FALSE;
int c, err;
/* check options */
- while ((c = getopt(argc, argv, ":i:")) != -1) {
+ while ((c = getopt(argc, argv, ":i:I:Rv")) != -1) {
switch (c) {
case 'i':
if (fromname)
usage(B_FALSE);
fromname = optarg;
break;
+ case 'I':
+ if (fromname)
+ usage(B_FALSE);
+ fromname = optarg;
+ doall = B_TRUE;
+ break;
+ case 'R':
+ replicate = B_TRUE;
+ break;
+ case 'v':
+ verbose = B_TRUE;
+ break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
@@ -1960,37 +2390,62 @@ zfs_do_send(int argc, char **argv)
return (1);
}
- if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
+ cp = strchr(argv[0], '@');
+ if (cp == NULL) {
+ (void) fprintf(stderr,
+ gettext("argument must be a snapshot\n"));
+ usage(B_FALSE);
+ }
+ *cp = '\0';
+ toname = cp + 1;
+ zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+ if (zhp == NULL)
return (1);
/*
* If they specified the full path to the snapshot, chop off
- * everything except the short name of the snapshot.
+ * everything except the short name of the snapshot, but special
+ * case if they specify the origin.
*/
if (fromname && (cp = strchr(fromname, '@')) != NULL) {
- if (cp != fromname &&
- strncmp(argv[0], fromname, cp - fromname + 1)) {
- (void) fprintf(stderr,
- gettext("incremental source must be "
- "in same filesystem\n"));
- usage(B_FALSE);
- }
- fromname = cp + 1;
- if (strchr(fromname, '@') || strchr(fromname, '/')) {
- (void) fprintf(stderr,
- gettext("invalid incremental source\n"));
- usage(B_FALSE);
+ char origin[ZFS_MAXNAMELEN];
+ zprop_source_t src;
+
+ (void) zfs_prop_get(zhp, ZFS_PROP_ORIGIN,
+ origin, sizeof (origin), &src, NULL, 0, B_FALSE);
+
+ if (strcmp(origin, fromname) == 0) {
+ fromname = NULL;
+ fromorigin = B_TRUE;
+ } else {
+ *cp = '\0';
+ if (cp != fromname && strcmp(argv[0], fromname)) {
+ (void) fprintf(stderr,
+ gettext("incremental source must be "
+ "in same filesystem\n"));
+ usage(B_FALSE);
+ }
+ fromname = cp + 1;
+ if (strchr(fromname, '@') || strchr(fromname, '/')) {
+ (void) fprintf(stderr,
+ gettext("invalid incremental source\n"));
+ usage(B_FALSE);
+ }
}
}
- err = zfs_send(zhp, fromname, STDOUT_FILENO);
+ if (replicate && fromname == NULL)
+ doall = B_TRUE;
+
+ err = zfs_send(zhp, fromname, toname, replicate, doall, fromorigin,
+ verbose, STDOUT_FILENO);
zfs_close(zhp);
return (err != 0);
}
/*
- * zfs receive <fs@snap>
+ * zfs receive [-dnvF] <fs@snap>
*
* Restore a backup stream from stdin.
*/
@@ -1998,25 +2453,23 @@ static int
zfs_do_receive(int argc, char **argv)
{
int c, err;
- boolean_t isprefix = B_FALSE;
- boolean_t dryrun = B_FALSE;
- boolean_t verbose = B_FALSE;
- boolean_t force = B_FALSE;
+ recvflags_t flags;
+ bzero(&flags, sizeof (recvflags_t));
/* check options */
while ((c = getopt(argc, argv, ":dnvF")) != -1) {
switch (c) {
case 'd':
- isprefix = B_TRUE;
+ flags.isprefix = B_TRUE;
break;
case 'n':
- dryrun = B_TRUE;
+ flags.dryrun = B_TRUE;
break;
case 'v':
- verbose = B_TRUE;
+ flags.verbose = B_TRUE;
break;
case 'F':
- force = B_TRUE;
+ flags.force = B_TRUE;
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
@@ -2051,15 +2504,391 @@ zfs_do_receive(int argc, char **argv)
return (1);
}
- err = zfs_receive(g_zfs, argv[0], isprefix, verbose, dryrun, force,
- STDIN_FILENO);
+ err = zfs_receive(g_zfs, argv[0], flags, STDIN_FILENO, NULL);
+
+ return (err != 0);
+}
+
+typedef struct allow_cb {
+ int a_permcnt;
+ size_t a_treeoffset;
+} allow_cb_t;
- if (!err) {
- zpool_log_history(g_zfs, argc + optind, argv - optind, argv[0],
- B_FALSE, B_FALSE);
+static void
+zfs_print_perms(avl_tree_t *tree)
+{
+ zfs_perm_node_t *permnode;
+
+ permnode = avl_first(tree);
+ while (permnode != NULL) {
+ (void) printf("%s", permnode->z_pname);
+ permnode = AVL_NEXT(tree, permnode);
+ if (permnode)
+ (void) printf(",");
+ else
+ (void) printf("\n");
}
+}
- return (err != 0);
+/*
+ * Iterate over user/groups/everyone/... and the call perm_iter
+ * function to print actual permission when tree has >0 nodes.
+ */
+static void
+zfs_iter_perms(avl_tree_t *tree, const char *banner, allow_cb_t *cb)
+{
+ zfs_allow_node_t *item;
+ avl_tree_t *ptree;
+
+ item = avl_first(tree);
+ while (item) {
+ ptree = (void *)((char *)item + cb->a_treeoffset);
+ if (avl_numnodes(ptree)) {
+ if (cb->a_permcnt++ == 0)
+ (void) printf("%s\n", banner);
+ (void) printf("\t%s", item->z_key);
+ /*
+ * Avoid an extra space being printed
+ * for "everyone" which is keyed with a null
+ * string
+ */
+ if (item->z_key[0] != '\0')
+ (void) printf(" ");
+ zfs_print_perms(ptree);
+ }
+ item = AVL_NEXT(tree, item);
+ }
+}
+
+#define LINES "-------------------------------------------------------------\n"
+static int
+zfs_print_allows(char *ds)
+{
+ zfs_allow_t *curperms, *perms;
+ zfs_handle_t *zhp;
+ allow_cb_t allowcb = { 0 };
+ char banner[MAXPATHLEN];
+
+ if (ds[0] == '-')
+ usage(B_FALSE);
+
+ if (strrchr(ds, '@')) {
+ (void) fprintf(stderr, gettext("Snapshots don't have 'allow'"
+ " permissions\n"));
+ return (1);
+ }
+ if ((zhp = zfs_open(g_zfs, ds, ZFS_TYPE_DATASET)) == NULL)
+ return (1);
+
+ if (zfs_perm_get(zhp, &perms)) {
+ (void) fprintf(stderr,
+ gettext("Failed to retrieve 'allows' on %s\n"), ds);
+ zfs_close(zhp);
+ return (1);
+ }
+
+ zfs_close(zhp);
+
+ if (perms != NULL)
+ (void) printf("%s", LINES);
+ for (curperms = perms; curperms; curperms = curperms->z_next) {
+
+ (void) snprintf(banner, sizeof (banner),
+ "Permission sets on (%s)", curperms->z_setpoint);
+ allowcb.a_treeoffset =
+ offsetof(zfs_allow_node_t, z_localdescend);
+ allowcb.a_permcnt = 0;
+ zfs_iter_perms(&curperms->z_sets, banner, &allowcb);
+
+ (void) snprintf(banner, sizeof (banner),
+ "Create time permissions on (%s)", curperms->z_setpoint);
+ allowcb.a_treeoffset =
+ offsetof(zfs_allow_node_t, z_localdescend);
+ allowcb.a_permcnt = 0;
+ zfs_iter_perms(&curperms->z_crperms, banner, &allowcb);
+
+
+ (void) snprintf(banner, sizeof (banner),
+ "Local permissions on (%s)", curperms->z_setpoint);
+ allowcb.a_treeoffset = offsetof(zfs_allow_node_t, z_local);
+ allowcb.a_permcnt = 0;
+ zfs_iter_perms(&curperms->z_user, banner, &allowcb);
+ zfs_iter_perms(&curperms->z_group, banner, &allowcb);
+ zfs_iter_perms(&curperms->z_everyone, banner, &allowcb);
+
+ (void) snprintf(banner, sizeof (banner),
+ "Descendent permissions on (%s)", curperms->z_setpoint);
+ allowcb.a_treeoffset = offsetof(zfs_allow_node_t, z_descend);
+ allowcb.a_permcnt = 0;
+ zfs_iter_perms(&curperms->z_user, banner, &allowcb);
+ zfs_iter_perms(&curperms->z_group, banner, &allowcb);
+ zfs_iter_perms(&curperms->z_everyone, banner, &allowcb);
+
+ (void) snprintf(banner, sizeof (banner),
+ "Local+Descendent permissions on (%s)",
+ curperms->z_setpoint);
+ allowcb.a_treeoffset =
+ offsetof(zfs_allow_node_t, z_localdescend);
+ allowcb.a_permcnt = 0;
+ zfs_iter_perms(&curperms->z_user, banner, &allowcb);
+ zfs_iter_perms(&curperms->z_group, banner, &allowcb);
+ zfs_iter_perms(&curperms->z_everyone, banner, &allowcb);
+
+ (void) printf("%s", LINES);
+ }
+ zfs_free_allows(perms);
+ return (0);
+}
+
+#define ALLOWOPTIONS "ldcsu:g:e"
+#define UNALLOWOPTIONS "ldcsu:g:er"
+
+/*
+ * Validate options, and build necessary datastructure to display/remove/add
+ * permissions.
+ * Returns 0 - If permissions should be added/removed
+ * Returns 1 - If permissions should be displayed.
+ * Returns -1 - on failure
+ */
+int
+parse_allow_args(int *argc, char **argv[], boolean_t unallow,
+ char **ds, int *recurse, nvlist_t **zperms)
+{
+ int c;
+ char *options = unallow ? UNALLOWOPTIONS : ALLOWOPTIONS;
+ zfs_deleg_inherit_t deleg_type = ZFS_DELEG_NONE;
+ zfs_deleg_who_type_t who_type = ZFS_DELEG_WHO_UNKNOWN;
+ char *who = NULL;
+ char *perms = NULL;
+ zfs_handle_t *zhp;
+
+ while ((c = getopt(*argc, *argv, options)) != -1) {
+ switch (c) {
+ case 'l':
+ if (who_type == ZFS_DELEG_CREATE ||
+ who_type == ZFS_DELEG_NAMED_SET)
+ usage(B_FALSE);
+
+ deleg_type |= ZFS_DELEG_PERM_LOCAL;
+ break;
+ case 'd':
+ if (who_type == ZFS_DELEG_CREATE ||
+ who_type == ZFS_DELEG_NAMED_SET)
+ usage(B_FALSE);
+
+ deleg_type |= ZFS_DELEG_PERM_DESCENDENT;
+ break;
+ case 'r':
+ *recurse = B_TRUE;
+ break;
+ case 'c':
+ if (who_type != ZFS_DELEG_WHO_UNKNOWN)
+ usage(B_FALSE);
+ if (deleg_type)
+ usage(B_FALSE);
+ who_type = ZFS_DELEG_CREATE;
+ break;
+ case 's':
+ if (who_type != ZFS_DELEG_WHO_UNKNOWN)
+ usage(B_FALSE);
+ if (deleg_type)
+ usage(B_FALSE);
+ who_type = ZFS_DELEG_NAMED_SET;
+ break;
+ case 'u':
+ if (who_type != ZFS_DELEG_WHO_UNKNOWN)
+ usage(B_FALSE);
+ who_type = ZFS_DELEG_USER;
+ who = optarg;
+ break;
+ case 'g':
+ if (who_type != ZFS_DELEG_WHO_UNKNOWN)
+ usage(B_FALSE);
+ who_type = ZFS_DELEG_GROUP;
+ who = optarg;
+ break;
+ case 'e':
+ if (who_type != ZFS_DELEG_WHO_UNKNOWN)
+ usage(B_FALSE);
+ who_type = ZFS_DELEG_EVERYONE;
+ break;
+ default:
+ usage(B_FALSE);
+ break;
+ }
+ }
+
+ if (deleg_type == 0)
+ deleg_type = ZFS_DELEG_PERM_LOCALDESCENDENT;
+
+ *argc -= optind;
+ *argv += optind;
+
+ if (unallow == B_FALSE && *argc == 1) {
+ /*
+ * Only print permissions if no options were processed
+ */
+ if (optind == 1)
+ return (1);
+ else
+ usage(B_FALSE);
+ }
+
+ /*
+ * initialize variables for zfs_build_perms based on number
+ * of arguments.
+ * 3 arguments ==> zfs [un]allow joe perm,perm,perm <dataset> or
+ * zfs [un]allow -s @set1 perm,perm <dataset>
+ * 2 arguments ==> zfs [un]allow -c perm,perm <dataset> or
+ * zfs [un]allow -u|-g <name> perm <dataset> or
+ * zfs [un]allow -e perm,perm <dataset>
+ * zfs unallow joe <dataset>
+ * zfs unallow -s @set1 <dataset>
+ * 1 argument ==> zfs [un]allow -e <dataset> or
+ * zfs [un]allow -c <dataset>
+ */
+
+ switch (*argc) {
+ case 3:
+ perms = (*argv)[1];
+ who = (*argv)[0];
+ *ds = (*argv)[2];
+
+ /*
+ * advance argc/argv for do_allow cases.
+ * for do_allow case make sure who have a know who type
+ * and its not a permission set.
+ */
+ if (unallow == B_TRUE) {
+ *argc -= 2;
+ *argv += 2;
+ } else if (who_type != ZFS_DELEG_WHO_UNKNOWN &&
+ who_type != ZFS_DELEG_NAMED_SET)
+ usage(B_FALSE);
+ break;
+
+ case 2:
+ if (unallow == B_TRUE && (who_type == ZFS_DELEG_EVERYONE ||
+ who_type == ZFS_DELEG_CREATE || who != NULL)) {
+ perms = (*argv)[0];
+ *ds = (*argv)[1];
+ } else {
+ if (unallow == B_FALSE &&
+ (who_type == ZFS_DELEG_WHO_UNKNOWN ||
+ who_type == ZFS_DELEG_NAMED_SET))
+ usage(B_FALSE);
+ else if (who_type == ZFS_DELEG_WHO_UNKNOWN ||
+ who_type == ZFS_DELEG_NAMED_SET)
+ who = (*argv)[0];
+ else if (who_type != ZFS_DELEG_NAMED_SET)
+ perms = (*argv)[0];
+ *ds = (*argv)[1];
+ }
+ if (unallow == B_TRUE) {
+ (*argc)--;
+ (*argv)++;
+ }
+ break;
+
+ case 1:
+ if (unallow == B_FALSE)
+ usage(B_FALSE);
+ if (who == NULL && who_type != ZFS_DELEG_CREATE &&
+ who_type != ZFS_DELEG_EVERYONE)
+ usage(B_FALSE);
+ *ds = (*argv)[0];
+ break;
+
+ default:
+ usage(B_FALSE);
+ }
+
+ if (strrchr(*ds, '@')) {
+ (void) fprintf(stderr,
+ gettext("Can't set or remove 'allow' permissions "
+ "on snapshots.\n"));
+ return (-1);
+ }
+
+ if ((zhp = zfs_open(g_zfs, *ds, ZFS_TYPE_DATASET)) == NULL)
+ return (-1);
+
+ if ((zfs_build_perms(zhp, who, perms,
+ who_type, deleg_type, zperms)) != 0) {
+ zfs_close(zhp);
+ return (-1);
+ }
+ zfs_close(zhp);
+ return (0);
+}
+
+static int
+zfs_do_allow(int argc, char **argv)
+{
+ char *ds;
+ nvlist_t *zperms = NULL;
+ zfs_handle_t *zhp;
+ int unused;
+ int ret;
+
+ if ((ret = parse_allow_args(&argc, &argv, B_FALSE, &ds,
+ &unused, &zperms)) == -1)
+ return (1);
+
+ if (ret == 1)
+ return (zfs_print_allows(argv[0]));
+
+ if ((zhp = zfs_open(g_zfs, ds, ZFS_TYPE_DATASET)) == NULL)
+ return (1);
+
+ if (zfs_perm_set(zhp, zperms)) {
+ zfs_close(zhp);
+ nvlist_free(zperms);
+ return (1);
+ }
+ nvlist_free(zperms);
+ zfs_close(zhp);
+
+ return (0);
+}
+
+static int
+unallow_callback(zfs_handle_t *zhp, void *data)
+{
+ nvlist_t *nvp = (nvlist_t *)data;
+ int error;
+
+ error = zfs_perm_remove(zhp, nvp);
+ if (error) {
+ (void) fprintf(stderr, gettext("Failed to remove permissions "
+ "on %s\n"), zfs_get_name(zhp));
+ }
+ return (error);
+}
+
+static int
+zfs_do_unallow(int argc, char **argv)
+{
+ int recurse = B_FALSE;
+ char *ds;
+ int error;
+ nvlist_t *zperms = NULL;
+ int flags = 0;
+
+ if (parse_allow_args(&argc, &argv, B_TRUE,
+ &ds, &recurse, &zperms) == -1)
+ return (1);
+
+ if (recurse)
+ flags |= ZFS_ITER_RECURSE;
+ error = zfs_for_each(argc, argv, flags,
+ ZFS_TYPE_FILESYSTEM|ZFS_TYPE_VOLUME, NULL,
+ NULL, unallow_callback, (void *)zperms);
+
+ if (zperms)
+ nvlist_free(zperms);
+
+ return (error);
}
typedef struct get_all_cbdata {
@@ -2067,14 +2896,35 @@ typedef struct get_all_cbdata {
size_t cb_alloc;
size_t cb_used;
uint_t cb_types;
+ boolean_t cb_verbose;
} get_all_cbdata_t;
+#define CHECK_SPINNER 30
+#define SPINNER_TIME 3 /* seconds */
+#define MOUNT_TIME 5 /* seconds */
+
static int
get_one_dataset(zfs_handle_t *zhp, void *data)
{
+ static char spin[] = { '-', '\\', '|', '/' };
+ static int spinval = 0;
+ static int spincheck = 0;
+ static time_t last_spin_time = (time_t)0;
get_all_cbdata_t *cbp = data;
zfs_type_t type = zfs_get_type(zhp);
+ if (cbp->cb_verbose) {
+ if (--spincheck < 0) {
+ time_t now = time(NULL);
+ if (last_spin_time + SPINNER_TIME < now) {
+ (void) printf("\b%c", spin[spinval++ % 4]);
+ (void) fflush(stdout);
+ last_spin_time = now;
+ }
+ spincheck = CHECK_SPINNER;
+ }
+ }
+
/*
* Interate over any nested datasets.
*/
@@ -2117,15 +2967,26 @@ get_one_dataset(zfs_handle_t *zhp, void *data)
}
static void
-get_all_datasets(uint_t types, zfs_handle_t ***dslist, size_t *count)
+get_all_datasets(uint_t types, zfs_handle_t ***dslist, size_t *count,
+ boolean_t verbose)
{
get_all_cbdata_t cb = { 0 };
cb.cb_types = types;
+ cb.cb_verbose = verbose;
+
+ if (verbose) {
+ (void) printf("%s: *", gettext("Reading ZFS config"));
+ (void) fflush(stdout);
+ }
(void) zfs_iter_root(g_zfs, get_one_dataset, &cb);
*dslist = cb.cb_handles;
*count = cb.cb_used;
+
+ if (verbose) {
+ (void) printf("\b%s\n", gettext("done."));
+ }
}
static int
@@ -2167,15 +3028,17 @@ dataset_cmp(const void *a, const void *b)
* Share or mount a dataset.
*/
static int
-share_mount_one(zfs_handle_t *zhp, int op, int flags, boolean_t explicit,
- const char *options)
+share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol,
+ boolean_t explicit, const char *options)
{
char mountpoint[ZFS_MAXPROPLEN];
char shareopts[ZFS_MAXPROPLEN];
+ char smbshareopts[ZFS_MAXPROPLEN];
const char *cmdname = op == OP_SHARE ? "share" : "mount";
struct mnttab mnt;
uint64_t zoned, canmount;
zfs_type_t type = zfs_get_type(zhp);
+ boolean_t shared_nfs, shared_smb;
assert(type & (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME));
@@ -2216,9 +3079,12 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, boolean_t explicit,
sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts,
sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0);
+ verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshareopts,
+ sizeof (smbshareopts), NULL, NULL, 0, B_FALSE) == 0);
canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT);
- if (op == OP_SHARE && strcmp(shareopts, "off") == 0) {
+ if (op == OP_SHARE && strcmp(shareopts, "off") == 0 &&
+ strcmp(smbshareopts, "off") == 0) {
if (!explicit)
return (0);
@@ -2240,9 +3106,8 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, boolean_t explicit,
(void) fprintf(stderr, gettext("cannot %s '%s': "
"legacy mountpoint\n"), cmdname, zfs_get_name(zhp));
- (void) fprintf(stderr, gettext("use %s to "
- "%s this filesystem\n"), op == OP_SHARE ?
- "share(1M)" : "mount(1M)", cmdname);
+ (void) fprintf(stderr, gettext("use %s(1M) to "
+ "%s this filesystem\n"), cmdname, cmdname);
return (1);
}
@@ -2255,7 +3120,16 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, boolean_t explicit,
return (1);
}
- if (!canmount) {
+ /*
+ * canmount explicit outcome
+ * on no pass through
+ * on yes pass through
+ * off no return 0
+ * off yes display error, return 1
+ * noauto no return 0
+ * noauto yes pass through
+ */
+ if (canmount == ZFS_CANMOUNT_OFF) {
if (!explicit)
return (0);
@@ -2263,6 +3137,8 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, boolean_t explicit,
"'canmount' property is set to 'off'\n"), cmdname,
zfs_get_name(zhp));
return (1);
+ } else if (canmount == ZFS_CANMOUNT_NOAUTO && !explicit) {
+ return (0);
}
/*
@@ -2274,7 +3150,15 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, boolean_t explicit,
*/
switch (op) {
case OP_SHARE:
- if (zfs_is_shared_nfs(zhp, NULL)) {
+
+ shared_nfs = zfs_is_shared_nfs(zhp, NULL);
+ shared_smb = zfs_is_shared_smb(zhp, NULL);
+
+ if (shared_nfs && shared_smb ||
+ (shared_nfs && strcmp(shareopts, "on") == 0 &&
+ strcmp(smbshareopts, "off") == 0) ||
+ (shared_smb && strcmp(smbshareopts, "on") == 0 &&
+ strcmp(shareopts, "off") == 0)) {
if (!explicit)
return (0);
@@ -2288,8 +3172,23 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, boolean_t explicit,
zfs_mount(zhp, NULL, 0) != 0)
return (1);
- if (zfs_share_nfs(zhp) != 0)
+ if (protocol == NULL) {
+ if (zfs_shareall(zhp) != 0)
+ return (1);
+ } else if (strcmp(protocol, "nfs") == 0) {
+ if (zfs_share_nfs(zhp))
+ return (1);
+ } else if (strcmp(protocol, "smb") == 0) {
+ if (zfs_share_smb(zhp))
+ return (1);
+ } else {
+ (void) fprintf(stderr, gettext("cannot share "
+ "'%s': invalid share type '%s' "
+ "specified\n"),
+ zfs_get_name(zhp), protocol);
return (1);
+ }
+
break;
case OP_MOUNT:
@@ -2352,24 +3251,93 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, boolean_t explicit,
return (0);
}
+/*
+ * Reports progress in the form "(current/total)". Not thread-safe.
+ */
+static void
+report_mount_progress(int current, int total)
+{
+ static int len;
+ static char *reverse = "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
+ "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b";
+ static time_t last_progress_time;
+ time_t now = time(NULL);
+
+ /* report 1..n instead of 0..n-1 */
+ ++current;
+
+ /* display header if we're here for the first time */
+ if (current == 1) {
+ (void) printf(gettext("Mounting ZFS filesystems: "));
+ len = 0;
+ } else if (current != total && last_progress_time + MOUNT_TIME >= now) {
+ /* too soon to report again */
+ return;
+ }
+
+ last_progress_time = now;
+
+ /* back up to prepare for overwriting */
+ if (len)
+ (void) printf("%*.*s", len, len, reverse);
+
+ /* We put a newline at the end if this is the last one. */
+ len = printf("(%d/%d)%s", current, total, current == total ? "\n" : "");
+ (void) fflush(stdout);
+}
+
+static void
+append_options(char *mntopts, char *newopts)
+{
+ int len = strlen(mntopts);
+
+ /* original length plus new string to append plus 1 for the comma */
+ if (len + 1 + strlen(newopts) >= MNT_LINE_MAX) {
+ (void) fprintf(stderr, gettext("the opts argument for "
+ "'%c' option is too long (more than %d chars)\n"),
+ "-o", MNT_LINE_MAX);
+ usage(B_FALSE);
+ }
+
+ if (*mntopts)
+ mntopts[len++] = ',';
+
+ (void) strcpy(&mntopts[len], newopts);
+}
+
static int
share_mount(int op, int argc, char **argv)
{
int do_all = 0;
+ boolean_t verbose = B_FALSE;
int c, ret = 0;
- const char *options = NULL;
+ char *options = NULL;
int types, flags = 0;
/* check options */
- while ((c = getopt(argc, argv, op == OP_MOUNT ? ":ao:O" : "a"))
+ while ((c = getopt(argc, argv, op == OP_MOUNT ? ":avo:O" : "a"))
!= -1) {
switch (c) {
case 'a':
do_all = 1;
break;
+ case 'v':
+ verbose = B_TRUE;
+ break;
case 'o':
- options = optarg;
+ if (*optarg == '\0') {
+ (void) fprintf(stderr, gettext("empty mount "
+ "options (-o) specified\n"));
+ usage(B_FALSE);
+ }
+
+ if (options == NULL)
+ options = safe_malloc(MNT_LINE_MAX + 1);
+
+ /* option validation is done later */
+ append_options(options, optarg);
break;
+
case 'O':
warnx("no overlay mounts support on FreeBSD, ignoring");
break;
@@ -2392,20 +3360,22 @@ share_mount(int op, int argc, char **argv)
if (do_all) {
zfs_handle_t **dslist = NULL;
size_t i, count = 0;
+ char *protocol = NULL;
if (op == OP_MOUNT) {
types = ZFS_TYPE_FILESYSTEM;
} else if (argc > 0) {
- if (strcmp(argv[0], "nfs") == 0) {
+ if (strcmp(argv[0], "nfs") == 0 ||
+ strcmp(argv[0], "smb") == 0) {
types = ZFS_TYPE_FILESYSTEM;
} else if (strcmp(argv[0], "iscsi") == 0) {
types = ZFS_TYPE_VOLUME;
} else {
(void) fprintf(stderr, gettext("share type "
- "must be 'nfs' or 'iscsi'\n"));
+ "must be 'nfs', 'smb' or 'iscsi'\n"));
usage(B_FALSE);
}
-
+ protocol = argv[0];
argc--;
argv++;
} else {
@@ -2417,7 +3387,7 @@ share_mount(int op, int argc, char **argv)
usage(B_FALSE);
}
- get_all_datasets(types, &dslist, &count);
+ get_all_datasets(types, &dslist, &count, verbose);
if (count == 0)
return (0);
@@ -2425,8 +3395,11 @@ share_mount(int op, int argc, char **argv)
qsort(dslist, count, sizeof (void *), dataset_cmp);
for (i = 0; i < count; i++) {
- if (share_mount_one(dslist[i], op, flags, B_FALSE,
- options) != 0)
+ if (verbose)
+ report_mount_progress(i, count);
+
+ if (share_mount_one(dslist[i], op, flags, protocol,
+ B_FALSE, options) != 0)
ret = 1;
zfs_close(dslist[i]);
}
@@ -2436,9 +3409,9 @@ share_mount(int op, int argc, char **argv)
struct statfs *sfs;
int i, n;
- if (op == OP_SHARE) {
+ if ((op == OP_SHARE) || (options != NULL)) {
(void) fprintf(stderr, gettext("missing filesystem "
- "argument\n"));
+ "argument (specify -a for all)\n"));
usage(B_FALSE);
}
@@ -2476,7 +3449,7 @@ share_mount(int op, int argc, char **argv)
if ((zhp = zfs_open(g_zfs, argv[0], types)) == NULL) {
ret = 1;
} else {
- ret = share_mount_one(zhp, op, flags, B_TRUE,
+ ret = share_mount_one(zhp, op, flags, NULL, B_TRUE,
options);
zfs_close(zhp);
}
@@ -2498,7 +3471,7 @@ zfs_do_mount(int argc, char **argv)
}
/*
- * zfs share -a [nfs | iscsi]
+ * zfs share -a [nfs | iscsi | smb]
* zfs share filesystem
*
* Share all filesystems, or share the given filesystem.
@@ -2535,9 +3508,23 @@ unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual)
{
zfs_handle_t *zhp;
int ret;
+ struct stat64 statbuf;
struct mnttab search = { 0 }, entry;
const char *cmdname = (op == OP_SHARE) ? "unshare" : "unmount";
- char property[ZFS_MAXPROPLEN];
+ ino_t path_inode;
+
+ /*
+ * Search for the path in /etc/mnttab. Rather than looking for the
+ * specific path, which can be fooled by non-standard paths (i.e. ".."
+ * or "//"), we stat() the path and search for the corresponding
+ * (major,minor) device pair.
+ */
+ if (stat64(path, &statbuf) != 0) {
+ (void) fprintf(stderr, gettext("cannot %s '%s': %s\n"),
+ cmdname, path, strerror(errno));
+ return (1);
+ }
+ path_inode = statbuf.st_ino;
/*
* Search for the given (major,minor) pair in the mount table.
@@ -2545,9 +3532,17 @@ unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual)
search.mnt_mountp = path;
rewind(mnttab_file);
if (getmntany(mnttab_file, &entry, &search) != 0) {
- (void) fprintf(stderr, gettext("cannot %s '%s': not "
- "currently mounted\n"), cmdname, path);
- return (1);
+ if (op == OP_SHARE) {
+ (void) fprintf(stderr, gettext("cannot %s '%s': not "
+ "currently mounted\n"), cmdname, path);
+ return (1);
+ }
+ (void) fprintf(stderr, gettext("warning: %s not in mnttab\n"),
+ path);
+ if ((ret = umount2(path, flags)) != 0)
+ (void) fprintf(stderr, gettext("%s: %s\n"), path,
+ strerror(errno));
+ return (ret != 0);
}
if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) {
@@ -2560,39 +3555,58 @@ unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual)
ZFS_TYPE_FILESYSTEM)) == NULL)
return (1);
- verify(zfs_prop_get(zhp, op == OP_SHARE ?
- ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, property,
- sizeof (property), NULL, NULL, 0, B_FALSE) == 0);
+ ret = 1;
+ if (stat64(entry.mnt_mountp, &statbuf) != 0) {
+ (void) fprintf(stderr, gettext("cannot %s '%s': %s\n"),
+ cmdname, path, strerror(errno));
+ goto out;
+ } else if (statbuf.st_ino != path_inode) {
+ (void) fprintf(stderr, gettext("cannot "
+ "%s '%s': not a mountpoint\n"), cmdname, path);
+ goto out;
+ }
if (op == OP_SHARE) {
- if (strcmp(property, "off") == 0) {
+ char nfs_mnt_prop[ZFS_MAXPROPLEN];
+ char smbshare_prop[ZFS_MAXPROPLEN];
+
+ verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, nfs_mnt_prop,
+ sizeof (nfs_mnt_prop), NULL, NULL, 0, B_FALSE) == 0);
+ verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshare_prop,
+ sizeof (smbshare_prop), NULL, NULL, 0, B_FALSE) == 0);
+
+ if (strcmp(nfs_mnt_prop, "off") == 0 &&
+ strcmp(smbshare_prop, "off") == 0) {
(void) fprintf(stderr, gettext("cannot unshare "
"'%s': legacy share\n"), path);
(void) fprintf(stderr, gettext("use "
"unshare(1M) to unshare this filesystem\n"));
- ret = 1;
- } else if (!zfs_is_shared_nfs(zhp, NULL)) {
+ } else if (!zfs_is_shared(zhp)) {
(void) fprintf(stderr, gettext("cannot unshare '%s': "
"not currently shared\n"), path);
- ret = 1;
} else {
- ret = zfs_unshareall_nfs(zhp);
+ ret = zfs_unshareall_bypath(zhp, path);
}
} else {
+ char mtpt_prop[ZFS_MAXPROPLEN];
+
+ verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mtpt_prop,
+ sizeof (mtpt_prop), NULL, NULL, 0, B_FALSE) == 0);
+
if (is_manual) {
ret = zfs_unmount(zhp, NULL, flags);
- } else if (strcmp(property, "legacy") == 0) {
+ } else if (strcmp(mtpt_prop, "legacy") == 0) {
(void) fprintf(stderr, gettext("cannot unmount "
"'%s': legacy mountpoint\n"),
zfs_get_name(zhp));
(void) fprintf(stderr, gettext("use umount(1M) "
"to unmount this filesystem\n"));
- ret = 1;
} else {
ret = zfs_unmountall(zhp, flags);
}
}
+out:
zfs_close(zhp);
return (ret != 0);
@@ -2609,7 +3623,8 @@ unshare_unmount(int op, int argc, char **argv)
int ret = 0;
int types, c;
zfs_handle_t *zhp;
- char property[ZFS_MAXPROPLEN];
+ char nfsiscsi_mnt_prop[ZFS_MAXPROPLEN];
+ char sharesmb[ZFS_MAXPROPLEN];
/* check options */
while ((c = getopt(argc, argv, op == OP_SHARE ? "a" : "af")) != -1) {
@@ -2695,18 +3710,35 @@ unshare_unmount(int op, int argc, char **argv)
continue;
}
- verify(zfs_prop_get(zhp, op == OP_SHARE ?
- ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT,
- property, sizeof (property), NULL, NULL,
- 0, B_FALSE) == 0);
-
- /* Ignore legacy mounts and shares */
- if ((op == OP_SHARE &&
- strcmp(property, "off") == 0) ||
- (op == OP_MOUNT &&
- strcmp(property, "legacy") == 0)) {
- zfs_close(zhp);
- continue;
+ switch (op) {
+ case OP_SHARE:
+ verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS,
+ nfsiscsi_mnt_prop,
+ sizeof (nfsiscsi_mnt_prop),
+ NULL, NULL, 0, B_FALSE) == 0);
+ if (strcmp(nfsiscsi_mnt_prop, "off") != 0)
+ break;
+ verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB,
+ nfsiscsi_mnt_prop,
+ sizeof (nfsiscsi_mnt_prop),
+ NULL, NULL, 0, B_FALSE) == 0);
+ if (strcmp(nfsiscsi_mnt_prop, "off") == 0)
+ continue;
+ break;
+ case OP_MOUNT:
+ /* Ignore legacy mounts */
+ verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT,
+ nfsiscsi_mnt_prop,
+ sizeof (nfsiscsi_mnt_prop),
+ NULL, NULL, 0, B_FALSE) == 0);
+ if (strcmp(nfsiscsi_mnt_prop, "legacy") == 0)
+ continue;
+ /* Ignore canmount=noauto mounts */
+ if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) ==
+ ZFS_CANMOUNT_NOAUTO)
+ continue;
+ default:
+ break;
}
node = safe_malloc(sizeof (unshare_unmount_node_t));
@@ -2746,7 +3778,7 @@ unshare_unmount(int op, int argc, char **argv)
switch (op) {
case OP_SHARE:
- if (zfs_unshare_nfs(node->un_zhp,
+ if (zfs_unshareall_bypath(node->un_zhp,
node->un_mountp) != 0)
ret = 1;
break;
@@ -2774,7 +3806,8 @@ unshare_unmount(int op, int argc, char **argv)
zfs_handle_t **dslist = NULL;
size_t i, count = 0;
- get_all_datasets(ZFS_TYPE_VOLUME, &dslist, &count);
+ get_all_datasets(ZFS_TYPE_VOLUME, &dslist, &count,
+ B_FALSE);
if (count != 0) {
qsort(dslist, count, sizeof (void *),
@@ -2819,12 +3852,22 @@ unshare_unmount(int op, int argc, char **argv)
if (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
verify(zfs_prop_get(zhp, op == OP_SHARE ?
- ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, property,
- sizeof (property), NULL, NULL, 0, B_FALSE) == 0);
+ ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT,
+ nfsiscsi_mnt_prop, sizeof (nfsiscsi_mnt_prop), NULL,
+ NULL, 0, B_FALSE) == 0);
switch (op) {
case OP_SHARE:
- if (strcmp(property, "off") == 0) {
+ verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS,
+ nfsiscsi_mnt_prop,
+ sizeof (nfsiscsi_mnt_prop),
+ NULL, NULL, 0, B_FALSE) == 0);
+ verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB,
+ sharesmb, sizeof (sharesmb), NULL, NULL,
+ 0, B_FALSE) == 0);
+
+ if (strcmp(nfsiscsi_mnt_prop, "off") == 0 &&
+ strcmp(sharesmb, "off") == 0) {
(void) fprintf(stderr, gettext("cannot "
"unshare '%s': legacy share\n"),
zfs_get_name(zhp));
@@ -2832,18 +3875,18 @@ unshare_unmount(int op, int argc, char **argv)
"unshare(1M) to unshare this "
"filesystem\n"));
ret = 1;
- } else if (!zfs_is_shared_nfs(zhp, NULL)) {
+ } else if (!zfs_is_shared(zhp)) {
(void) fprintf(stderr, gettext("cannot "
"unshare '%s': not currently "
"shared\n"), zfs_get_name(zhp));
ret = 1;
- } else if (zfs_unshareall_nfs(zhp) != 0) {
+ } else if (zfs_unshareall(zhp) != 0) {
ret = 1;
}
break;
case OP_MOUNT:
- if (strcmp(property, "legacy") == 0) {
+ if (strcmp(nfsiscsi_mnt_prop, "legacy") == 0) {
(void) fprintf(stderr, gettext("cannot "
"unmount '%s': legacy "
"mountpoint\n"), zfs_get_name(zhp));
@@ -2865,10 +3908,11 @@ unshare_unmount(int op, int argc, char **argv)
} else {
assert(op == OP_SHARE);
- verify(zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI, property,
- sizeof (property), NULL, NULL, 0, B_FALSE) == 0);
+ verify(zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI,
+ nfsiscsi_mnt_prop, sizeof (nfsiscsi_mnt_prop),
+ NULL, NULL, 0, B_FALSE) == 0);
- if (strcmp(property, "off") == 0) {
+ if (strcmp(nfsiscsi_mnt_prop, "off") == 0) {
(void) fprintf(stderr, gettext("cannot unshare "
"'%s': 'shareiscsi' property not set\n"),
zfs_get_name(zhp));
@@ -2948,9 +3992,6 @@ do_jail(int argc, char **argv, int attach)
ret = (zfs_jail(zhp, jailid, attach) != 0);
- if (!ret)
- zpool_log_history(g_zfs, argc, argv, argv[2], B_FALSE, B_FALSE);
-
zfs_close(zhp);
return (ret);
}
@@ -3141,6 +4182,23 @@ do_volcheck(boolean_t isinit)
return (zpool_iter(g_zfs, volcheck, &isinit) ? 1 : 0);
}
+static int
+find_command_idx(char *command, int *idx)
+{
+ int i;
+
+ for (i = 0; i < NCOMMAND; i++) {
+ if (command_table[i].name == NULL)
+ continue;
+
+ if (strcmp(command, command_table[i].name) == 0) {
+ *idx = i;
+ return (0);
+ }
+ }
+ return (1);
+}
+
int
main(int argc, char **argv)
{
@@ -3160,6 +4218,9 @@ main(int argc, char **argv)
return (1);
}
+ zpool_set_history_str("zfs", argc, argv, history_str);
+ verify(zpool_stage_history(g_zfs, history_str) == 0);
+
libzfs_print_on_error(g_zfs, B_TRUE);
if ((mnttab_file = fopen(MNTTAB, "r")) == NULL) {
@@ -3218,18 +4279,14 @@ main(int argc, char **argv)
/*
* Run the appropriate command.
*/
- for (i = 0; i < NCOMMAND; i++) {
- if (command_table[i].name == NULL)
- continue;
-
- if (strcmp(cmdname, command_table[i].name) == 0) {
- current_command = &command_table[i];
- ret = command_table[i].func(argc - 1, argv + 1);
- break;
- }
- }
-
- if (i == NCOMMAND) {
+ if (find_command_idx(cmdname, &i) == 0) {
+ current_command = &command_table[i];
+ ret = command_table[i].func(argc - 1, argv + 1);
+ } else if (strchr(cmdname, '=') != NULL) {
+ verify(find_command_idx("set", &i) == 0);
+ current_command = &command_table[i];
+ ret = command_table[i].func(argc, argv);
+ } else {
(void) fprintf(stderr, gettext("unrecognized "
"command '%s'\n"), cmdname);
usage(B_FALSE);
diff --git a/cddl/contrib/opensolaris/cmd/zinject/translate.c b/cddl/contrib/opensolaris/cmd/zinject/translate.c
new file mode 100644
index 0000000..da26cd6
--- /dev/null
+++ b/cddl/contrib/opensolaris/cmd/zinject/translate.c
@@ -0,0 +1,460 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <libzfs.h>
+
+#undef verify /* both libzfs.h and zfs_context.h want to define this */
+
+#include <sys/zfs_context.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <sys/file.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/dnode.h>
+#include <sys/vdev_impl.h>
+
+#include "zinject.h"
+
+#include <assert.h>
+#define verify assert
+
+extern void kernel_init(int);
+extern void kernel_fini(void);
+
+static int debug;
+
+static void
+ziprintf(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (!debug)
+ return;
+
+ va_start(ap, fmt);
+ (void) vprintf(fmt, ap);
+ va_end(ap);
+}
+
+/*
+ * Given a full path to a file, translate into a dataset name and a relative
+ * path within the dataset. 'dataset' must be at least MAXNAMELEN characters,
+ * and 'relpath' must be at least MAXPATHLEN characters. We also pass a stat64
+ * buffer, which we need later to get the object ID.
+ */
+static int
+parse_pathname(const char *fullpath, char *dataset, char *relpath,
+ struct stat64 *statbuf)
+{
+ struct statfs sfs;
+ const char *rel;
+
+ if (fullpath[0] != '/') {
+ (void) fprintf(stderr, "invalid object '%s': must be full "
+ "path\n", fullpath);
+ usage();
+ return (-1);
+ }
+
+ if (strlen(fullpath) >= MAXPATHLEN) {
+ (void) fprintf(stderr, "invalid object; pathname too long\n");
+ return (-1);
+ }
+
+ if (stat64(fullpath, statbuf) != 0) {
+ (void) fprintf(stderr, "cannot open '%s': %s\n",
+ fullpath, strerror(errno));
+ return (-1);
+ }
+
+ if (statfs(fullpath, &sfs) == -1) {
+ (void) fprintf(stderr, "cannot find mountpoint for '%s': %s\n",
+ fullpath, strerror(errno));
+ return (-1);
+ }
+
+ if (strcmp(sfs.f_fstypename, MNTTYPE_ZFS) != 0) {
+ (void) fprintf(stderr, "invalid path '%s': not a ZFS "
+ "filesystem\n", fullpath);
+ return (-1);
+ }
+
+ if (strncmp(fullpath, sfs.f_mntonname, strlen(sfs.f_mntonname)) != 0) {
+ (void) fprintf(stderr, "invalid path '%s': mountpoint "
+ "doesn't match path\n", fullpath);
+ return (-1);
+ }
+
+ (void) strcpy(dataset, sfs.f_mntfromname);
+
+ rel = fullpath + strlen(sfs.f_mntonname);
+ if (rel[0] == '/')
+ rel++;
+ (void) strcpy(relpath, rel);
+
+ return (0);
+}
+
+/*
+ * Convert from a (dataset, path) pair into a (objset, object) pair. Note that
+ * we grab the object number from the inode number, since looking this up via
+ * libzpool is a real pain.
+ */
+/* ARGSUSED */
+static int
+object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
+ zinject_record_t *record)
+{
+ objset_t *os;
+ int err;
+
+ /*
+ * Before doing any libzpool operations, call sync() to ensure that the
+ * on-disk state is consistent with the in-core state.
+ */
+ sync();
+
+ if ((err = dmu_objset_open(dataset, DMU_OST_ZFS,
+ DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) {
+ (void) fprintf(stderr, "cannot open dataset '%s': %s\n",
+ dataset, strerror(err));
+ return (-1);
+ }
+
+ record->zi_objset = dmu_objset_id(os);
+ record->zi_object = statbuf->st_ino;
+
+ dmu_objset_close(os);
+
+ return (0);
+}
+
+/*
+ * Calculate the real range based on the type, level, and range given.
+ */
+static int
+calculate_range(const char *dataset, err_type_t type, int level, char *range,
+ zinject_record_t *record)
+{
+ objset_t *os = NULL;
+ dnode_t *dn = NULL;
+ int err;
+ int ret = -1;
+
+ /*
+ * Determine the numeric range from the string.
+ */
+ if (range == NULL) {
+ /*
+ * If range is unspecified, set the range to [0,-1], which
+ * indicates that the whole object should be treated as an
+ * error.
+ */
+ record->zi_start = 0;
+ record->zi_end = -1ULL;
+ } else {
+ char *end;
+
+ /* XXX add support for suffixes */
+ record->zi_start = strtoull(range, &end, 10);
+
+
+ if (*end == '\0')
+ record->zi_end = record->zi_start + 1;
+ else if (*end == ',')
+ record->zi_end = strtoull(end + 1, &end, 10);
+
+ if (*end != '\0') {
+ (void) fprintf(stderr, "invalid range '%s': must be "
+ "a numeric range of the form 'start[,end]'\n",
+ range);
+ goto out;
+ }
+ }
+
+ switch (type) {
+ case TYPE_DATA:
+ break;
+
+ case TYPE_DNODE:
+ /*
+ * If this is a request to inject faults into the dnode, then we
+ * must translate the current (objset,object) pair into an
+ * offset within the metadnode for the objset. Specifying any
+ * kind of range with type 'dnode' is illegal.
+ */
+ if (range != NULL) {
+ (void) fprintf(stderr, "range cannot be specified when "
+ "type is 'dnode'\n");
+ goto out;
+ }
+
+ record->zi_start = record->zi_object * sizeof (dnode_phys_t);
+ record->zi_end = record->zi_start + sizeof (dnode_phys_t);
+ record->zi_object = 0;
+ break;
+ }
+
+ /*
+ * Get the dnode associated with object, so we can calculate the block
+ * size.
+ */
+ if ((err = dmu_objset_open(dataset, DMU_OST_ANY,
+ DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) {
+ (void) fprintf(stderr, "cannot open dataset '%s': %s\n",
+ dataset, strerror(err));
+ goto out;
+ }
+
+ if (record->zi_object == 0) {
+ dn = os->os->os_meta_dnode;
+ } else {
+ err = dnode_hold(os->os, record->zi_object, FTAG, &dn);
+ if (err != 0) {
+ (void) fprintf(stderr, "failed to hold dnode "
+ "for object %llu\n",
+ (u_longlong_t)record->zi_object);
+ goto out;
+ }
+ }
+
+
+ ziprintf("data shift: %d\n", (int)dn->dn_datablkshift);
+ ziprintf(" ind shift: %d\n", (int)dn->dn_indblkshift);
+
+ /*
+ * Translate range into block IDs.
+ */
+ if (record->zi_start != 0 || record->zi_end != -1ULL) {
+ record->zi_start >>= dn->dn_datablkshift;
+ record->zi_end >>= dn->dn_datablkshift;
+ }
+
+ /*
+ * Check level, and then translate level 0 blkids into ranges
+ * appropriate for level of indirection.
+ */
+ record->zi_level = level;
+ if (level > 0) {
+ ziprintf("level 0 blkid range: [%llu, %llu]\n",
+ record->zi_start, record->zi_end);
+
+ if (level >= dn->dn_nlevels) {
+ (void) fprintf(stderr, "level %d exceeds max level "
+ "of object (%d)\n", level, dn->dn_nlevels - 1);
+ goto out;
+ }
+
+ if (record->zi_start != 0 || record->zi_end != 0) {
+ int shift = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
+
+ for (; level > 0; level--) {
+ record->zi_start >>= shift;
+ record->zi_end >>= shift;
+ }
+ }
+ }
+
+ ret = 0;
+out:
+ if (dn) {
+ if (dn != os->os->os_meta_dnode)
+ dnode_rele(dn, FTAG);
+ }
+ if (os)
+ dmu_objset_close(os);
+
+ return (ret);
+}
+
+int
+translate_record(err_type_t type, const char *object, const char *range,
+ int level, zinject_record_t *record, char *poolname, char *dataset)
+{
+ char path[MAXPATHLEN];
+ char *slash;
+ struct stat64 statbuf;
+ int ret = -1;
+
+ kernel_init(FREAD);
+
+ debug = (getenv("ZINJECT_DEBUG") != NULL);
+
+ ziprintf("translating: %s\n", object);
+
+ if (MOS_TYPE(type)) {
+ /*
+ * MOS objects are treated specially.
+ */
+ switch (type) {
+ case TYPE_MOS:
+ record->zi_type = 0;
+ break;
+ case TYPE_MOSDIR:
+ record->zi_type = DMU_OT_OBJECT_DIRECTORY;
+ break;
+ case TYPE_METASLAB:
+ record->zi_type = DMU_OT_OBJECT_ARRAY;
+ break;
+ case TYPE_CONFIG:
+ record->zi_type = DMU_OT_PACKED_NVLIST;
+ break;
+ case TYPE_BPLIST:
+ record->zi_type = DMU_OT_BPLIST;
+ break;
+ case TYPE_SPACEMAP:
+ record->zi_type = DMU_OT_SPACE_MAP;
+ break;
+ case TYPE_ERRLOG:
+ record->zi_type = DMU_OT_ERROR_LOG;
+ break;
+ }
+
+ dataset[0] = '\0';
+ (void) strcpy(poolname, object);
+ return (0);
+ }
+
+ /*
+ * Convert a full path into a (dataset, file) pair.
+ */
+ if (parse_pathname(object, dataset, path, &statbuf) != 0)
+ goto err;
+
+ ziprintf(" dataset: %s\n", dataset);
+ ziprintf(" path: %s\n", path);
+
+ /*
+ * Convert (dataset, file) into (objset, object)
+ */
+ if (object_from_path(dataset, path, &statbuf, record) != 0)
+ goto err;
+
+ ziprintf("raw objset: %llu\n", record->zi_objset);
+ ziprintf("raw object: %llu\n", record->zi_object);
+
+ /*
+ * For the given object, calculate the real (type, level, range)
+ */
+ if (calculate_range(dataset, type, level, (char *)range, record) != 0)
+ goto err;
+
+ ziprintf(" objset: %llu\n", record->zi_objset);
+ ziprintf(" object: %llu\n", record->zi_object);
+ if (record->zi_start == 0 &&
+ record->zi_end == -1ULL)
+ ziprintf(" range: all\n");
+ else
+ ziprintf(" range: [%llu, %llu]\n", record->zi_start,
+ record->zi_end);
+
+ /*
+ * Copy the pool name
+ */
+ (void) strcpy(poolname, dataset);
+ if ((slash = strchr(poolname, '/')) != NULL)
+ *slash = '\0';
+
+ ret = 0;
+
+err:
+ kernel_fini();
+ return (ret);
+}
+
+int
+translate_raw(const char *str, zinject_record_t *record)
+{
+ /*
+ * A raw bookmark of the form objset:object:level:blkid, where each
+ * number is a hexidecimal value.
+ */
+ if (sscanf(str, "%llx:%llx:%x:%llx", (u_longlong_t *)&record->zi_objset,
+ (u_longlong_t *)&record->zi_object, &record->zi_level,
+ (u_longlong_t *)&record->zi_start) != 4) {
+ (void) fprintf(stderr, "bad raw spec '%s': must be of the form "
+ "'objset:object:level:blkid'\n", str);
+ return (-1);
+ }
+
+ record->zi_end = record->zi_start;
+
+ return (0);
+}
+
+int
+translate_device(const char *pool, const char *device, err_type_t label_type,
+ zinject_record_t *record)
+{
+ char *end;
+ zpool_handle_t *zhp;
+ nvlist_t *tgt;
+ boolean_t isspare, iscache;
+
+ /*
+ * Given a device name or GUID, create an appropriate injection record
+ * with zi_guid set.
+ */
+ if ((zhp = zpool_open(g_zfs, pool)) == NULL)
+ return (-1);
+
+ record->zi_guid = strtoull(device, &end, 16);
+ if (record->zi_guid == 0 || *end != '\0') {
+ tgt = zpool_find_vdev(zhp, device, &isspare, &iscache, NULL);
+
+ if (tgt == NULL) {
+ (void) fprintf(stderr, "cannot find device '%s' in "
+ "pool '%s'\n", device, pool);
+ return (-1);
+ }
+
+ verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
+ &record->zi_guid) == 0);
+ }
+
+ switch (label_type) {
+ case TYPE_LABEL_UBERBLOCK:
+ record->zi_start = offsetof(vdev_label_t, vl_uberblock[0]);
+ record->zi_end = record->zi_start + VDEV_UBERBLOCK_RING - 1;
+ break;
+ case TYPE_LABEL_NVLIST:
+ record->zi_start = offsetof(vdev_label_t, vl_vdev_phys);
+ record->zi_end = record->zi_start + VDEV_PHYS_SIZE - 1;
+ break;
+ }
+ return (0);
+}
diff --git a/cddl/contrib/opensolaris/cmd/zinject/zinject.c b/cddl/contrib/opensolaris/cmd/zinject/zinject.c
new file mode 100644
index 0000000..2302dc4
--- /dev/null
+++ b/cddl/contrib/opensolaris/cmd/zinject/zinject.c
@@ -0,0 +1,770 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * ZFS Fault Injector
+ *
+ * This userland component takes a set of options and uses libzpool to translate
+ * from a user-visible object type and name to an internal representation.
+ * There are two basic types of faults: device faults and data faults.
+ *
+ *
+ * DEVICE FAULTS
+ *
+ * Errors can be injected into a particular vdev using the '-d' option. This
+ * option takes a path or vdev GUID to uniquely identify the device within a
+ * pool. There are two types of errors that can be injected, EIO and ENXIO,
+ * that can be controlled through the '-e' option. The default is ENXIO. For
+ * EIO failures, any attempt to read data from the device will return EIO, but
+ * subsequent attempt to reopen the device will succeed. For ENXIO failures,
+ * any attempt to read from the device will return EIO, but any attempt to
+ * reopen the device will also return ENXIO.
+ * For label faults, the -L option must be specified. This allows faults
+ * to be injected into either the nvlist or uberblock region of all the labels
+ * for the specified device.
+ *
+ * This form of the command looks like:
+ *
+ * zinject -d device [-e errno] [-L <uber | nvlist>] pool
+ *
+ *
+ * DATA FAULTS
+ *
+ * We begin with a tuple of the form:
+ *
+ * <type,level,range,object>
+ *
+ * type A string describing the type of data to target. Each type
+ * implicitly describes how to interpret 'object'. Currently,
+ * the following values are supported:
+ *
+ * data User data for a file
+ * dnode Dnode for a file or directory
+ *
+ * The following MOS objects are special. Instead of injecting
+ * errors on a particular object or blkid, we inject errors across
+ * all objects of the given type.
+ *
+ * mos Any data in the MOS
+ * mosdir object directory
+ * config pool configuration
+ * bplist blkptr list
+ * spacemap spacemap
+ * metaslab metaslab
+ * errlog persistent error log
+ *
+ * level Object level. Defaults to '0', not applicable to all types. If
+ * a range is given, this corresponds to the indirect block
+ * corresponding to the specific range.
+ *
+ * range A numerical range [start,end) within the object. Defaults to
+ * the full size of the file.
+ *
+ * object A string describing the logical location of the object. For
+ * files and directories (currently the only supported types),
+ * this is the path of the object on disk.
+ *
+ * This is translated, via libzpool, into the following internal representation:
+ *
+ * <type,objset,object,level,range>
+ *
+ * These types should be self-explanatory. This tuple is then passed to the
+ * kernel via a special ioctl() to initiate fault injection for the given
+ * object. Note that 'type' is not strictly necessary for fault injection, but
+ * is used when translating existing faults into a human-readable string.
+ *
+ *
+ * The command itself takes one of the forms:
+ *
+ * zinject
+ * zinject <-a | -u pool>
+ * zinject -c <id|all>
+ * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
+ * [-r range] <object>
+ * zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
+ *
+ * With no arguments, the command prints all currently registered injection
+ * handlers, with their numeric identifiers.
+ *
+ * The '-c' option will clear the given handler, or all handlers if 'all' is
+ * specified.
+ *
+ * The '-e' option takes a string describing the errno to simulate. This must
+ * be either 'io' or 'checksum'. In most cases this will result in the same
+ * behavior, but RAID-Z will produce a different set of ereports for this
+ * situation.
+ *
+ * The '-a', '-u', and '-m' flags toggle internal flush behavior. If '-a' is
+ * specified, then the ARC cache is flushed appropriately. If '-u' is
+ * specified, then the underlying SPA is unloaded. Either of these flags can be
+ * specified independently of any other handlers. The '-m' flag automatically
+ * does an unmount and remount of the underlying dataset to aid in flushing the
+ * cache.
+ *
+ * The '-f' flag controls the frequency of errors injected, expressed as a
+ * integer percentage between 1 and 100. The default is 100.
+ *
+ * The this form is responsible for actually injecting the handler into the
+ * framework. It takes the arguments described above, translates them to the
+ * internal tuple using libzpool, and then issues an ioctl() to register the
+ * handler.
+ *
+ * The final form can target a specific bookmark, regardless of whether a
+ * human-readable interface has been designed. It allows developers to specify
+ * a particular block by number.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include <sys/fs/zfs.h>
+#include <sys/mount.h>
+
+#include <libzfs.h>
+
+#undef verify /* both libzfs.h and zfs_context.h want to define this */
+
+#include "zinject.h"
+
+libzfs_handle_t *g_zfs;
+int zfs_fd;
+
+#ifndef ECKSUM
+#define ECKSUM EBADE
+#endif
+
+static const char *errtable[TYPE_INVAL] = {
+ "data",
+ "dnode",
+ "mos",
+ "mosdir",
+ "metaslab",
+ "config",
+ "bplist",
+ "spacemap",
+ "errlog",
+ "uber",
+ "nvlist"
+};
+
+static err_type_t
+name_to_type(const char *arg)
+{
+ int i;
+ for (i = 0; i < TYPE_INVAL; i++)
+ if (strcmp(errtable[i], arg) == 0)
+ return (i);
+
+ return (TYPE_INVAL);
+}
+
+static const char *
+type_to_name(uint64_t type)
+{
+ switch (type) {
+ case DMU_OT_OBJECT_DIRECTORY:
+ return ("mosdir");
+ case DMU_OT_OBJECT_ARRAY:
+ return ("metaslab");
+ case DMU_OT_PACKED_NVLIST:
+ return ("config");
+ case DMU_OT_BPLIST:
+ return ("bplist");
+ case DMU_OT_SPACE_MAP:
+ return ("spacemap");
+ case DMU_OT_ERROR_LOG:
+ return ("errlog");
+ default:
+ return ("-");
+ }
+}
+
+
+/*
+ * Print usage message.
+ */
+void
+usage(void)
+{
+ (void) printf(
+ "usage:\n"
+ "\n"
+ "\tzinject\n"
+ "\n"
+ "\t\tList all active injection records.\n"
+ "\n"
+ "\tzinject -c <id|all>\n"
+ "\n"
+ "\t\tClear the particular record (if given a numeric ID), or\n"
+ "\t\tall records if 'all' is specificed.\n"
+ "\n"
+ "\tzinject -d device [-e errno] [-L <nvlist|uber>] pool\n"
+ "\t\tInject a fault into a particular device or the device's\n"
+ "\t\tlabel. Label injection can either be 'nvlist' or 'uber'.\n"
+ "\t\t'errno' can either be 'nxio' (the default) or 'io'.\n"
+ "\n"
+ "\tzinject -b objset:object:level:blkid pool\n"
+ "\n"
+ "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
+ "\t\tspecified by the remaining tuple. Each number is in\n"
+ "\t\thexidecimal, and only one block can be specified.\n"
+ "\n"
+ "\tzinject [-q] <-t type> [-e errno] [-l level] [-r range]\n"
+ "\t [-a] [-m] [-u] [-f freq] <object>\n"
+ "\n"
+ "\t\tInject an error into the object specified by the '-t' option\n"
+ "\t\tand the object descriptor. The 'object' parameter is\n"
+ "\t\tinterperted depending on the '-t' option.\n"
+ "\n"
+ "\t\t-q\tQuiet mode. Only print out the handler number added.\n"
+ "\t\t-e\tInject a specific error. Must be either 'io' or\n"
+ "\t\t\t'checksum'. Default is 'io'.\n"
+ "\t\t-l\tInject error at a particular block level. Default is "
+ "0.\n"
+ "\t\t-m\tAutomatically remount underlying filesystem.\n"
+ "\t\t-r\tInject error over a particular logical range of an\n"
+ "\t\t\tobject. Will be translated to the appropriate blkid\n"
+ "\t\t\trange according to the object's properties.\n"
+ "\t\t-a\tFlush the ARC cache. Can be specified without any\n"
+ "\t\t\tassociated object.\n"
+ "\t\t-u\tUnload the associated pool. Can be specified with only\n"
+ "\t\t\ta pool object.\n"
+ "\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n"
+ "\t\t\ta percentage between 1 and 100.\n"
+ "\n"
+ "\t-t data\t\tInject an error into the plain file contents of a\n"
+ "\t\t\tfile. The object must be specified as a complete path\n"
+ "\t\t\tto a file on a ZFS filesystem.\n"
+ "\n"
+ "\t-t dnode\tInject an error into the metadnode in the block\n"
+ "\t\t\tcorresponding to the dnode for a file or directory. The\n"
+ "\t\t\t'-r' option is incompatible with this mode. The object\n"
+ "\t\t\tis specified as a complete path to a file or directory\n"
+ "\t\t\ton a ZFS filesystem.\n"
+ "\n"
+ "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
+ "\t\t\ttype. Valid types are: mos, mosdir, config, bplist,\n"
+ "\t\t\tspacemap, metaslab, errlog. The only valid <object> is\n"
+ "\t\t\tthe poolname.\n");
+}
+
+static int
+iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
+ void *data)
+{
+ zfs_cmd_t zc;
+ int ret;
+
+ zc.zc_guid = 0;
+
+ while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
+ if ((ret = func((int)zc.zc_guid, zc.zc_name,
+ &zc.zc_inject_record, data)) != 0)
+ return (ret);
+
+ return (0);
+}
+
+static int
+print_data_handler(int id, const char *pool, zinject_record_t *record,
+ void *data)
+{
+ int *count = data;
+
+ if (record->zi_guid != 0)
+ return (0);
+
+ if (*count == 0) {
+ (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-15s\n",
+ "ID", "POOL", "OBJSET", "OBJECT", "TYPE", "LVL", "RANGE");
+ (void) printf("--- --------------- ------ "
+ "------ -------- --- ---------------\n");
+ }
+
+ *count += 1;
+
+ (void) printf("%3d %-15s %-6llu %-6llu %-8s %3d ", id, pool,
+ (u_longlong_t)record->zi_objset, (u_longlong_t)record->zi_object,
+ type_to_name(record->zi_type), record->zi_level);
+
+ if (record->zi_start == 0 &&
+ record->zi_end == -1ULL)
+ (void) printf("all\n");
+ else
+ (void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start,
+ (u_longlong_t)record->zi_end);
+
+ return (0);
+}
+
+static int
+print_device_handler(int id, const char *pool, zinject_record_t *record,
+ void *data)
+{
+ int *count = data;
+
+ if (record->zi_guid == 0)
+ return (0);
+
+ if (*count == 0) {
+ (void) printf("%3s %-15s %s\n", "ID", "POOL", "GUID");
+ (void) printf("--- --------------- ----------------\n");
+ }
+
+ *count += 1;
+
+ (void) printf("%3d %-15s %llx\n", id, pool,
+ (u_longlong_t)record->zi_guid);
+
+ return (0);
+}
+
+/*
+ * Print all registered error handlers. Returns the number of handlers
+ * registered.
+ */
+static int
+print_all_handlers(void)
+{
+ int count = 0;
+
+ (void) iter_handlers(print_device_handler, &count);
+ (void) printf("\n");
+ count = 0;
+ (void) iter_handlers(print_data_handler, &count);
+
+ return (count);
+}
+
+/* ARGSUSED */
+static int
+cancel_one_handler(int id, const char *pool, zinject_record_t *record,
+ void *data)
+{
+ zfs_cmd_t zc;
+
+ zc.zc_guid = (uint64_t)id;
+
+ if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
+ (void) fprintf(stderr, "failed to remove handler %d: %s\n",
+ id, strerror(errno));
+ return (1);
+ }
+
+ return (0);
+}
+
+/*
+ * Remove all fault injection handlers.
+ */
+static int
+cancel_all_handlers(void)
+{
+ int ret = iter_handlers(cancel_one_handler, NULL);
+
+ (void) printf("removed all registered handlers\n");
+
+ return (ret);
+}
+
+/*
+ * Remove a specific fault injection handler.
+ */
+static int
+cancel_handler(int id)
+{
+ zfs_cmd_t zc;
+
+ zc.zc_guid = (uint64_t)id;
+
+ if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
+ (void) fprintf(stderr, "failed to remove handler %d: %s\n",
+ id, strerror(errno));
+ return (1);
+ }
+
+ (void) printf("removed handler %d\n", id);
+
+ return (0);
+}
+
+/*
+ * Register a new fault injection handler.
+ */
+static int
+register_handler(const char *pool, int flags, zinject_record_t *record,
+ int quiet)
+{
+ zfs_cmd_t zc;
+
+ (void) strcpy(zc.zc_name, pool);
+ zc.zc_inject_record = *record;
+ zc.zc_guid = flags;
+
+ if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
+ (void) fprintf(stderr, "failed to add handler: %s\n",
+ strerror(errno));
+ return (1);
+ }
+
+ if (flags & ZINJECT_NULL)
+ return (0);
+
+ if (quiet) {
+ (void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
+ } else {
+ (void) printf("Added handler %llu with the following "
+ "properties:\n", (u_longlong_t)zc.zc_guid);
+ (void) printf(" pool: %s\n", pool);
+ if (record->zi_guid) {
+ (void) printf(" vdev: %llx\n",
+ (u_longlong_t)record->zi_guid);
+ } else {
+ (void) printf("objset: %llu\n",
+ (u_longlong_t)record->zi_objset);
+ (void) printf("object: %llu\n",
+ (u_longlong_t)record->zi_object);
+ (void) printf(" type: %llu\n",
+ (u_longlong_t)record->zi_type);
+ (void) printf(" level: %d\n", record->zi_level);
+ if (record->zi_start == 0 &&
+ record->zi_end == -1ULL)
+ (void) printf(" range: all\n");
+ else
+ (void) printf(" range: [%llu, %llu)\n",
+ (u_longlong_t)record->zi_start,
+ (u_longlong_t)record->zi_end);
+ }
+ }
+
+ return (0);
+}
+
+int
+main(int argc, char **argv)
+{
+ int c;
+ char *range = NULL;
+ char *cancel = NULL;
+ char *end;
+ char *raw = NULL;
+ char *device = NULL;
+ int level = 0;
+ int quiet = 0;
+ int error = 0;
+ int domount = 0;
+ err_type_t type = TYPE_INVAL;
+ err_type_t label = TYPE_INVAL;
+ zinject_record_t record = { 0 };
+ char pool[MAXNAMELEN];
+ char dataset[MAXNAMELEN];
+ zfs_handle_t *zhp;
+ int ret;
+ int flags = 0;
+
+ if ((g_zfs = libzfs_init()) == NULL) {
+ (void) fprintf(stderr, "internal error: failed to "
+ "initialize ZFS library\n");
+ return (1);
+ }
+
+ libzfs_print_on_error(g_zfs, B_TRUE);
+
+ if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
+ (void) fprintf(stderr, "failed to open ZFS device\n");
+ return (1);
+ }
+
+ if (argc == 1) {
+ /*
+ * No arguments. Print the available handlers. If there are no
+ * available handlers, direct the user to '-h' for help
+ * information.
+ */
+ if (print_all_handlers() == 0) {
+ (void) printf("No handlers registered.\n");
+ (void) printf("Run 'zinject -h' for usage "
+ "information.\n");
+ }
+
+ return (0);
+ }
+
+ while ((c = getopt(argc, argv, ":ab:d:f:qhc:t:l:mr:e:uL:")) != -1) {
+ switch (c) {
+ case 'a':
+ flags |= ZINJECT_FLUSH_ARC;
+ break;
+ case 'b':
+ raw = optarg;
+ break;
+ case 'c':
+ cancel = optarg;
+ break;
+ case 'd':
+ device = optarg;
+ break;
+ case 'e':
+ if (strcasecmp(optarg, "io") == 0) {
+ error = EIO;
+ } else if (strcasecmp(optarg, "checksum") == 0) {
+ error = ECKSUM;
+ } else if (strcasecmp(optarg, "nxio") == 0) {
+ error = ENXIO;
+ } else {
+ (void) fprintf(stderr, "invalid error type "
+ "'%s': must be 'io', 'checksum' or "
+ "'nxio'\n", optarg);
+ usage();
+ return (1);
+ }
+ break;
+ case 'f':
+ record.zi_freq = atoi(optarg);
+ if (record.zi_freq < 1 || record.zi_freq > 100) {
+ (void) fprintf(stderr, "frequency range must "
+ "be in the range (0, 100]\n");
+ return (1);
+ }
+ break;
+ case 'h':
+ usage();
+ return (0);
+ case 'l':
+ level = (int)strtol(optarg, &end, 10);
+ if (*end != '\0') {
+ (void) fprintf(stderr, "invalid level '%s': "
+ "must be an integer\n", optarg);
+ usage();
+ return (1);
+ }
+ break;
+ case 'm':
+ domount = 1;
+ break;
+ case 'q':
+ quiet = 1;
+ break;
+ case 'r':
+ range = optarg;
+ break;
+ case 't':
+ if ((type = name_to_type(optarg)) == TYPE_INVAL &&
+ !MOS_TYPE(type)) {
+ (void) fprintf(stderr, "invalid type '%s'\n",
+ optarg);
+ usage();
+ return (1);
+ }
+ break;
+ case 'u':
+ flags |= ZINJECT_UNLOAD_SPA;
+ break;
+ case 'L':
+ if ((label = name_to_type(optarg)) == TYPE_INVAL &&
+ !LABEL_TYPE(type)) {
+ (void) fprintf(stderr, "invalid label type "
+ "'%s'\n", optarg);
+ usage();
+ return (1);
+ }
+ break;
+ case ':':
+ (void) fprintf(stderr, "option -%c requires an "
+ "operand\n", optopt);
+ usage();
+ return (1);
+ case '?':
+ (void) fprintf(stderr, "invalid option '%c'\n",
+ optopt);
+ usage();
+ return (2);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (cancel != NULL) {
+ /*
+ * '-c' is invalid with any other options.
+ */
+ if (raw != NULL || range != NULL || type != TYPE_INVAL ||
+ level != 0) {
+ (void) fprintf(stderr, "cancel (-c) incompatible with "
+ "any other options\n");
+ usage();
+ return (2);
+ }
+ if (argc != 0) {
+ (void) fprintf(stderr, "extraneous argument to '-c'\n");
+ usage();
+ return (2);
+ }
+
+ if (strcmp(cancel, "all") == 0) {
+ return (cancel_all_handlers());
+ } else {
+ int id = (int)strtol(cancel, &end, 10);
+ if (*end != '\0') {
+ (void) fprintf(stderr, "invalid handle id '%s':"
+ " must be an integer or 'all'\n", cancel);
+ usage();
+ return (1);
+ }
+ return (cancel_handler(id));
+ }
+ }
+
+ if (device != NULL) {
+ /*
+ * Device (-d) injection uses a completely different mechanism
+ * for doing injection, so handle it separately here.
+ */
+ if (raw != NULL || range != NULL || type != TYPE_INVAL ||
+ level != 0) {
+ (void) fprintf(stderr, "device (-d) incompatible with "
+ "data error injection\n");
+ usage();
+ return (2);
+ }
+
+ if (argc != 1) {
+ (void) fprintf(stderr, "device (-d) injection requires "
+ "a single pool name\n");
+ usage();
+ return (2);
+ }
+
+ (void) strcpy(pool, argv[0]);
+ dataset[0] = '\0';
+
+ if (error == ECKSUM) {
+ (void) fprintf(stderr, "device error type must be "
+ "'io' or 'nxio'\n");
+ return (1);
+ }
+
+ if (translate_device(pool, device, label, &record) != 0)
+ return (1);
+ if (!error)
+ error = ENXIO;
+ } else if (raw != NULL) {
+ if (range != NULL || type != TYPE_INVAL || level != 0) {
+ (void) fprintf(stderr, "raw (-b) format with "
+ "any other options\n");
+ usage();
+ return (2);
+ }
+
+ if (argc != 1) {
+ (void) fprintf(stderr, "raw (-b) format expects a "
+ "single pool name\n");
+ usage();
+ return (2);
+ }
+
+ (void) strcpy(pool, argv[0]);
+ dataset[0] = '\0';
+
+ if (error == ENXIO) {
+ (void) fprintf(stderr, "data error type must be "
+ "'checksum' or 'io'\n");
+ return (1);
+ }
+
+ if (translate_raw(raw, &record) != 0)
+ return (1);
+ if (!error)
+ error = EIO;
+ } else if (type == TYPE_INVAL) {
+ if (flags == 0) {
+ (void) fprintf(stderr, "at least one of '-b', '-d', "
+ "'-t', '-a', or '-u' must be specified\n");
+ usage();
+ return (2);
+ }
+
+ if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
+ (void) strcpy(pool, argv[0]);
+ dataset[0] = '\0';
+ } else if (argc != 0) {
+ (void) fprintf(stderr, "extraneous argument for "
+ "'-f'\n");
+ usage();
+ return (2);
+ }
+
+ flags |= ZINJECT_NULL;
+ } else {
+ if (argc != 1) {
+ (void) fprintf(stderr, "missing object\n");
+ usage();
+ return (2);
+ }
+
+ if (error == ENXIO) {
+ (void) fprintf(stderr, "data error type must be "
+ "'checksum' or 'io'\n");
+ return (1);
+ }
+
+ if (translate_record(type, argv[0], range, level, &record, pool,
+ dataset) != 0)
+ return (1);
+ if (!error)
+ error = EIO;
+ }
+
+ /*
+ * If this is pool-wide metadata, unmount everything. The ioctl() will
+ * unload the pool, so that we trigger spa-wide reopen of metadata next
+ * time we access the pool.
+ */
+ if (dataset[0] != '\0' && domount) {
+ if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL)
+ return (1);
+
+ if (zfs_unmount(zhp, NULL, 0) != 0)
+ return (1);
+ }
+
+ record.zi_error = error;
+
+ ret = register_handler(pool, flags, &record, quiet);
+
+ if (dataset[0] != '\0' && domount)
+ ret = (zfs_mount(zhp, NULL, 0) != 0);
+
+ libzfs_fini(g_zfs);
+
+ return (ret);
+}
diff --git a/cddl/contrib/opensolaris/cmd/zinject/zinject.h b/cddl/contrib/opensolaris/cmd/zinject/zinject.h
new file mode 100644
index 0000000..adc3efe
--- /dev/null
+++ b/cddl/contrib/opensolaris/cmd/zinject/zinject.h
@@ -0,0 +1,71 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ZINJECT_H
+#define _ZINJECT_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/zfs_ioctl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+ TYPE_DATA, /* plain file contents */
+ TYPE_DNODE, /* metadnode contents */
+ TYPE_MOS, /* all MOS data */
+ TYPE_MOSDIR, /* MOS object directory */
+ TYPE_METASLAB, /* metaslab objects */
+ TYPE_CONFIG, /* MOS config */
+ TYPE_BPLIST, /* block pointer list */
+ TYPE_SPACEMAP, /* space map objects */
+ TYPE_ERRLOG, /* persistent error log */
+ TYPE_LABEL_UBERBLOCK, /* label specific uberblock */
+ TYPE_LABEL_NVLIST, /* label specific nvlist */
+ TYPE_INVAL
+} err_type_t;
+
+#define MOS_TYPE(t) \
+ ((t) >= TYPE_MOS && (t) < TYPE_LABEL_UBERBLOCK)
+
+#define LABEL_TYPE(t) \
+ ((t) >= TYPE_LABEL_UBERBLOCK && (t) < TYPE_INVAL)
+
+int translate_record(err_type_t type, const char *object, const char *range,
+ int level, zinject_record_t *record, char *poolname, char *dataset);
+int translate_raw(const char *raw, zinject_record_t *record);
+int translate_device(const char *pool, const char *device,
+ err_type_t label_type, zinject_record_t *record);
+void usage(void);
+
+extern libzfs_handle_t *g_zfs;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZINJECT_H */
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool.8 b/cddl/contrib/opensolaris/cmd/zpool/zpool.8
index 95ae008..a7967d7 100644
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool.8
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool.8
@@ -17,8 +17,8 @@
.\" information: Portions Copyright [yyyy] [name of copyright owner]
.\"
.\" CDDL HEADER END
-.\" Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved.
-.TH zpool 1M "14 Nov 2006" "SunOS 5.11" "System Administration Commands"
+.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.TH zpool 1M "13 Nov 2007" "SunOS 5.11" "System Administration Commands"
.SH NAME
zpool \- configures ZFS storage pools
.SH SYNOPSIS
@@ -29,7 +29,8 @@ zpool \- configures ZFS storage pools
.LP
.nf
-\fBzpool create\fR [\fB-fn\fR] [\fB-R\fR \fIroot\fR] [\fB-m\fR \fImountpoint\fR] \fIpool\fR \fIvdev ...\fR
+\fBzpool create\fR [\fB-fn\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-m\fR \fImountpoint\fR] [\fB-R\fR \fIroot\fR]
+ \fIpool\fR \fIvdev\fR ...
.fi
.LP
@@ -39,22 +40,22 @@ zpool \- configures ZFS storage pools
.LP
.nf
-\fBzpool add\fR [\fB-fn\fR] \fIpool\fR \fIvdev\fR
+\fBzpool add\fR [\fB-fn\fR] \fIpool\fR \fIvdev\fR ...
.fi
.LP
.nf
-\fBzpool remove\fR \fIpool\fR \fIvdev\fR
+\fBzpool remove\fR \fIpool\fR \fIdevice\fR ...
.fi
.LP
.nf
-\fBzpool \fR \fBlist\fR [\fB-H\fR] [\fB-o\fR \fIfield\fR[,\fIfield\fR]*] [\fIpool\fR] ...
+\fBzpool list\fR [\fB-H\fR] [\fB-o\fR \fIproperty\fR[,...]] [\fIpool\fR] ...
.fi
.LP
.nf
-\fBzpool iostat\fR [\fB-v\fR] [\fIpool\fR] ... [\fIinterval\fR [\fIcount\fR]]
+\fBzpool iostat\fR [\fB-v\fR] [\fIpool\fR] ... [\fIinterval\fR[\fIcount\fR]]
.fi
.LP
@@ -64,17 +65,17 @@ zpool \- configures ZFS storage pools
.LP
.nf
-\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ...
+\fBzpool online\fR \fIpool\fR \fIdevice\fR ...
.fi
.LP
.nf
-\fBzpool online\fR \fIpool\fR \fIdevice\fR ...
+\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ...
.fi
.LP
.nf
-\fBzpool clear\fR \fIpool\fR [\fIdevice\fR] ...
+\fBzpool clear\fR \fIpool\fR [\fIdevice\fR]
.fi
.LP
@@ -99,23 +100,24 @@ zpool \- configures ZFS storage pools
.LP
.nf
-\fBzpool export\fR [\fB-f\fR] \fIpool\fR
+\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR]
.fi
.LP
.nf
-\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR]
+\fBzpool import\fR [\fB-o \fImntopts\fR\fR] [\fB-p\fR \fIproperty=value\fR] ... [\fB-d\fR \fIdir\fR | \fB-c\fR \fIcachefile\fR]
+ [\fB-D\fR] [\fB-f\fR] [\fB-R\fR \fIroot\fR] \fB-a\fR
.fi
.LP
.nf
-\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR] [\fB-f\fR] [\fB-o \fIopts\fR\fR] [\fB-R \fR\fIroot\fR] \fIpool\fR | \fIid\fR
- [\fInewpool\fR]
+\fBzpool import\fR [\fB-o \fImntopts\fR\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-d\fR \fIdir\fR | \fB-c\fR \fIcachefile\fR]
+ [\fB-D\fR] [\fB-f\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR |\fIid\fR [\fInewpool\fR]
.fi
.LP
.nf
-\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR] [\fB-f\fR] [\fB-a\fR]
+\fBzpool export\fR [\fB-f\fR] \fIpool\fR ...
.fi
.LP
@@ -130,20 +132,33 @@ zpool \- configures ZFS storage pools
.LP
.nf
-\fBzpool upgrade\fR [\fB-a\fR | \fIpool\fR]
+\fBzpool upgrade\fR [\fB-V\fR \fIversion\fR] \fB-a\fR | \fIpool\fR ...
+.fi
+
+.LP
+.nf
+\fBzpool history\fR [\fB-il\fR] [\fIpool\fR] ...
+.fi
+
+.LP
+.nf
+\fBzpool get\fR "\fIall\fR" | \fIproperty\fR[,...] \fIpool\fR ...
.fi
.LP
.nf
-\fBzpool history\fR [\fIpool\fR] ...
+\fBzpool set\fR \fIproperty\fR=\fIvalue\fR \fIpool\fR
.fi
.SH DESCRIPTION
+.sp
.LP
The \fBzpool\fR command configures \fBZFS\fR storage pools. A storage pool is a collection of devices that provides physical storage and data replication for \fBZFS\fR datasets.
+.sp
.LP
All datasets within a storage pool share the same space. See \fBzfs\fR(1M) for information on managing datasets.
.SS "Virtual Devices (vdevs)"
+.sp
.LP
A "virtual device" describes a single device or a collection of devices organized according to certain performance and fault characteristics. The following virtual devices are supported:
.sp
@@ -202,8 +217,8 @@ A variation on \fBRAID-5\fR that allows for better distribution of parity and el
A \fBraidz\fR group can have either single- or double-parity, meaning that the \fBraidz\fR group can sustain one or two failures respectively without losing any data. The \fBraidz1\fR \fBvdev\fR type specifies a single-parity \fBraidz\fR group
and the \fBraidz2\fR \fBvdev\fR type specifies a double-parity \fBraidz\fR group. The \fBraidz\fR \fBvdev\fR type is an alias for \fBraidz1\fR.
.sp
-A \fBraidz\fR group with \fIN\fR disks of size \fIX\fR with \fIP\fR parity disks can hold approximately (\fIN-P\fR)*\fIX\fR bytes and can withstand one device failing before
-data integrity is compromised. The minimum number of devices in a \fBraidz\fR group is one more than the number of parity disks. The recommended number is between 3 and 9.
+A \fBraidz\fR group with \fIN\fR disks of size \fIX\fR with \fIP\fR parity disks can hold approximately (\fIN-P\fR)*\fIX\fR bytes and can withstand \fIP\fR device(s)
+failing before data integrity is compromised. The minimum number of devices in a \fBraidz\fR group is one more than the number of parity disks. The recommended number is between 3 and 9 to help increase performance.
.RE
.sp
@@ -217,11 +232,37 @@ data integrity is compromised. The minimum number of devices in a \fBraidz\fR gr
A special pseudo-\fBvdev\fR which keeps track of available hot spares for a pool. For more information, see the "Hot Spares" section.
.RE
+.sp
+.ne 2
+.mk
+.na
+\fBlog\fR
+.ad
+.RS 10n
+.rt
+A separate intent log device. If more than one log device is specified, then writes are load-balanced between devices. Log devices can be mirrored. However, \fBraidz\fR and \fBraidz2\fR are not supported for the intent log. For more information, see the "Intent
+Log" section.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBcache\fR
+.ad
+.RS 10n
+.rt
+A device used to cache storage pool data. A cache device cannot be mirrored or part of a \fBraidz\fR or \fBraidz2\fR configuration. For more information, see the "Cache Devices" section.
+.RE
+
+.sp
.LP
Virtual devices cannot be nested, so a mirror or \fBraidz\fR virtual device can only contain files or disks. Mirrors of mirrors (or other combinations) are not allowed.
+.sp
.LP
A pool can have any number of virtual devices at the top of the configuration (known as "root vdevs"). Data is dynamically distributed across all top-level devices to balance data among devices. As new virtual devices are added, \fBZFS\fR automatically places data
on the newly available devices.
+.sp
.LP
Virtual devices are specified one at a time on the command line, separated by whitespace. The keywords "mirror" and "raidz" are used to distinguish where a group ends and another begins. For example, the following creates two root vdevs, each a mirror of two disks:
.sp
@@ -233,15 +274,119 @@ Virtual devices are specified one at a time on the command line, separated by wh
.sp
.SS "Device Failure and Recovery"
+.sp
.LP
\fBZFS\fR supports a rich set of mechanisms for handling device failure and data corruption. All metadata and data is checksummed, and \fBZFS\fR automatically repairs bad data from a good copy when corruption is detected.
+.sp
.LP
In order to take advantage of these features, a pool must make use of some form of redundancy, using either mirrored or \fBraidz\fR groups. While \fBZFS\fR supports running in a non-redundant configuration, where each root vdev is simply a disk or file, this is
strongly discouraged. A single case of bit corruption can render some or all of your data unavailable.
+.sp
.LP
A pool's health status is described by one of three states: online, degraded, or faulted. An online pool has all devices operating normally. A degraded pool is one in which one or more devices have failed, but the data is still available due to a redundant configuration. A faulted pool has
-one or more failed devices, and there is insufficient redundancy to replicate the missing data.
+corrupted metadata, or one or more faulted devices, and insufficient replicas to continue functioning.
+.sp
+.LP
+The health of the top-level vdev, such as mirror or \fBraidz\fR device, is potentially impacted by the state of its associated vdevs, or component devices. A top-level vdev or component device is in one of the following states:
+.sp
+.ne 2
+.mk
+.na
+\fB\fBDEGRADED\fR\fR
+.ad
+.RS 12n
+.rt
+One or more top-level vdevs is in the degraded state because one or more component devices are offline. Sufficient replicas exist to continue functioning.
+.sp
+One or more component devices is in the degraded or faulted state, but sufficient replicas exist to continue functioning. The underlying conditions are as follows:
+.RS +4
+.TP
+.ie t \(bu
+.el o
+The number of checksum errors exceeds acceptable levels and the device is degraded as an indication that something may be wrong. \fBZFS\fR continues to use the device as necessary.
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+The number of I/O errors exceeds acceptable levels. The device could not be marked as faulted because there are insufficient replicas to continue functioning.
+.RE
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBFAULTED\fR\fR
+.ad
+.RS 12n
+.rt
+One or more top-level vdevs is in the faulted state because one or more component devices are offline. Insufficient replicas exist to continue functioning.
+.sp
+One or more component devices is in the faulted state, and insufficient replicas exist to continue functioning. The underlying conditions are as follows:
+.RS +4
+.TP
+.ie t \(bu
+.el o
+The device could be opened, but the contents did not match expected values.
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+The number of I/O errors exceeds acceptable levels and the device is faulted to prevent further use of the device.
+.RE
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBOFFLINE\fR\fR
+.ad
+.RS 12n
+.rt
+The device was explicitly taken offline by the "\fBzpool offline\fR" command.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBONLINE\fR\fR
+.ad
+.RS 12n
+.rt
+The device is online and functioning.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBREMOVED\fR\fR
+.ad
+.RS 12n
+.rt
+The device was physically removed while the system was running. Device removal detection is hardware-dependent and may not be supported on all platforms.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBUNAVAIL\fR\fR
+.ad
+.RS 12n
+.rt
+The device could not be opened. If a pool is imported when a device was unavailable, then the device will be identified by a unique identifier instead of its path since the path was never correct in the first place.
+.RE
+
+.sp
+.LP
+If a device is removed and later re-attached to the system, \fBZFS\fR attempts to put the device online automatically. Device attach detection is hardware-dependent and might not be supported on all platforms.
.SS "Hot Spares"
+.sp
.LP
\fBZFS\fR allows devices to be associated with pools as "hot spares". These devices are not actively used in the pool, but when an active device fails, it is automatically replaced by a hot spare. To create a pool with hot spares, specify a "spare" \fBvdev\fR with any number of devices. For example,
.sp
@@ -252,22 +397,261 @@ one or more failed devices, and there is insufficient redundancy to replicate th
.in -2
.sp
+.sp
.LP
-Spares can be shared across multiple pools, and can be added with the "zpool add" command and removed with the "zpool remove" command. Once a spare replacement is initiated, a new "spare" \fBvdev\fR is created within the configuration that
-will remain there until the original device is replaced. At this point, the hot spare becomes available again if another device fails.
+Spares can be shared across multiple pools, and can be added with the "\fBzpool add\fR" command and removed with the "\fBzpool remove\fR" command. Once a spare replacement is initiated, a new "spare" \fBvdev\fR is
+created within the configuration that will remain there until the original device is replaced. At this point, the hot spare becomes available again if another device fails.
+.sp
.LP
An in-progress spare replacement can be cancelled by detaching the hot spare. If the original faulted device is detached, then the hot spare assumes its place in the configuration, and is removed from the spare list of all active pools.
-.SS "Alternate Root Pools"
+.sp
+.LP
+Spares cannot replace log devices.
+.SS "Intent Log"
+.sp
+.LP
+The \fBZFS\fR Intent Log (\fBZIL\fR) satisfies \fBPOSIX\fR requirements for synchronous transactions. For instance, databases often require their transactions to be on stable storage devices when returning from a system call. \fBNFS\fR and
+other applications can also use \fBfsync\fR() to ensure data stability. By default, the intent log is allocated from blocks within the main pool. However, it might be possible to get better performance using separate intent log devices such as \fBNVRAM\fR or a dedicated
+disk. For example:
+.sp
+.in +2
+.nf
+\fB# zpool create pool c0d0 c1d0 log c2d0\fR
+.fi
+.in -2
+.sp
+
+.sp
+.LP
+Multiple log devices can also be specified, and they can be mirrored. See the EXAMPLES section for an example of mirroring multiple log devices.
+.sp
+.LP
+Log devices can be added, replaced, attached, detached, and imported and exported as part of the larger pool.
+.SS "Cache Devices"
+.sp
+.LP
+Devices can be added to a storage pool as "cache devices." These devices provide an additional layer of caching between main memory and disk. For read-heavy workloads, where the working set size is much larger than what can be cached in main memory, using cache devices allow
+much more of this working set to be served from low latency media. Using cache devices provides the greatest performance improvement for random read-workloads of mostly static content.
+.sp
+.LP
+To create a pool with cache devices, specify a "cache" \fBvdev\fR with any number of devices. For example:
+.sp
+.in +2
+.nf
+\fB# zpool create pool c0d0 c1d0 cache c2d0 c3d0\fR
+.fi
+.in -2
+.sp
+
+.sp
+.LP
+Cache devices cannot be mirrored or part of a \fBraidz\fR configuration. If a read error is encountered on a cache device, that read \fBI/O\fR is reissued to the original storage pool device, which might be part of a mirrored or \fBraidz\fR configuration.
+.sp
+.LP
+The content of the cache devices is considered volatile, as is the case with other system caches.
+.SS "Properties"
+.sp
+.LP
+Each pool has several properties associated with it. Some properties are read-only statistics while others are configurable and change the behavior of the pool. The following are read-only properties:
+.sp
+.ne 2
+.mk
+.na
+\fBavailable\fR
+.ad
+.RS 20n
+.rt
+Amount of storage available within the pool. This property can also be referred to by its shortened column name, "avail".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBcapacity\fR
+.ad
+.RS 20n
+.rt
+Percentage of pool space used. This property can also be referred to by its shortened column name, "cap".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBhealth\fR
+.ad
+.RS 20n
+.rt
+The current health of the pool. Health can be "\fBONLINE\fR", "\fBDEGRADED\fR", "\fBFAULTED\fR", " \fBOFFLINE\fR", "\fBREMOVED\fR", or "\fBUNAVAIL\fR".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBguid\fR
+.ad
+.RS 20n
+.rt
+A unique identifier for the pool.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBsize\fR
+.ad
+.RS 20n
+.rt
+Total size of the storage pool.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBused\fR
+.ad
+.RS 20n
+.rt
+Amount of storage space used within the pool.
+.RE
+
+.sp
.LP
-The "zpool create -R" and "zpool import -R" commands allow users to create and import a pool with a different root path. By default, whenever a pool is created or imported on a system, it is permanently added so that it is available whenever the system boots. For
-removable media, or when in recovery situations, this may not always be desirable. An alternate root pool does not persist on the system. Instead, it exists only until exported or the system is rebooted, at which point it will have to be imported again.
+These space usage properties report actual physical space available to the storage pool. The physical space can be different from the total amount of space that any contained datasets can actually use. The amount of space used in a \fBraidz\fR configuration depends on the characteristics
+of the data being written. In addition, \fBZFS\fR reserves some space for internal accounting that the \fBzfs\fR(1M) command takes into account, but the \fBzpool\fR command does not. For non-full pools of a reasonable size, these effects should be invisible. For small pools, or pools that are close to being completely full, these discrepancies may become more noticeable.
+.sp
.LP
-In addition, all mount points in the pool are prefixed with the given root, so a pool can be constrained to a particular area of the file system. This is most useful when importing unknown pools from removable media, as the mount points of any file systems cannot be trusted.
+The following property can be set at creation time and import time:
+.sp
+.ne 2
+.mk
+.na
+\fB\fBaltroot\fR\fR
+.ad
+.sp .6
+.RS 4n
+Alternate root directory. If set, this directory is prepended to any mount points within the pool. This can be used when examining an unknown pool where the mount points cannot be trusted, or in an alternate boot environment, where the typical paths are not valid. \fBaltroot\fR is
+not a persistent property. It is valid only while the system is up. Setting \fBaltroot\fR defaults to using \fBcachefile\fR=none, though this may be overridden using an explicit setting.
+.RE
+
+.sp
.LP
-When creating an alternate root pool, the default mount point is "/", rather than the normal default "/\fIpool\fR".
+The following properties can be set at creation time and import time, and later changed with the "\fBzpool set\fR" command:
+.sp
+.ne 2
+.mk
+.na
+\fB\fBautoreplace\fR=on | off\fR
+.ad
+.sp .6
+.RS 4n
+Controls automatic device replacement. If set to "\fBoff\fR", device replacement must be initiated by the administrator by using the "\fBzpool replace\fR" command. If set to "\fBon\fR", any new device, found
+in the same physical location as a device that previously belonged to the pool, is automatically formatted and replaced. The default behavior is "\fBoff\fR". This property can also be referred to by its shortened column name, "replace".
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBbootfs\fR=\fIpool\fR/\fIdataset\fR\fR
+.ad
+.sp .6
+.RS 4n
+Identifies the default bootable dataset for the root pool. This property is expected to be set mainly by the installation and upgrade programs.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBcachefile\fR=\fIpath\fR | "none"\fR
+.ad
+.sp .6
+.RS 4n
+Controls the location of where the pool configuration is cached. Discovering all pools on system startup requires a cached copy of the configuration data that is stored on the root file system. All pools in this cache are automatically imported when the system boots. Some environments,
+such as install and clustering, need to cache this information in a different location so that pools are not automatically imported. Setting this property caches the pool configuration in a different location that can later be imported with "\fBzpool import -c\fR". Setting
+it to the special value "\fBnone\fR" creates a temporary pool that is never cached, and the special value \fB\&''\fR (empty string) uses the default location.
+.sp
+Multiple pools can share the same cache file. Because the kernel destroys and recreates this file when pools are added and removed, care should be taken when attempting to access this file. When the last pool using a \fBcachefile\fR is exported or destroyed, the file is removed.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBdelegation\fR=\fBon\fR | \fBoff\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls whether a non-privileged user is granted access based on the dataset permissions defined on the dataset. See \fBzfs\fR(1M) for more information
+on \fBZFS\fR delegated administration.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBfailmode\fR=\fBwait\fR | \fBcontinue\fR | \fBpanic\fR\fR
+.ad
+.sp .6
+.RS 4n
+Controls the system behavior in the event of catastrophic pool failure. This condition is typically a result of a loss of connectivity to the underlying storage device(s) or a failure of all devices within the pool. The behavior of such an event is determined as follows:
+.sp
+.ne 2
+.mk
+.na
+\fBwait\fR
+.ad
+.RS 12n
+.rt
+Blocks all \fBI/O\fR access until the device connectivity is recovered and the errors are cleared. This is the default behavior.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBcontinue\fR
+.ad
+.RS 12n
+.rt
+Returns \fBEIO\fR to any new write \fBI/O\fR requests but allows reads to any of the remaining healthy devices. Any write requests that have yet to be committed to disk would be blocked.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fBpanic\fR
+.ad
+.RS 12n
+.rt
+Prints out a message to the console and generates a system crash dump.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBversion\fR=\fIversion\fR\fR
+.ad
+.sp .6
+.RS 4n
+The current on-disk version of the pool. This can be increased, but never decreased. The preferred method of updating pools is with the "\fBzpool upgrade\fR" command, though this property can be used when a specific version is needed for backwards compatibility.
+This property can be any number between 1 and the current version reported by "\fBzpool upgrade -v\fR". The special value "\fBcurrent\fR" is an alias for the latest supported version.
+.RE
+
.SS "Subcommands"
+.sp
.LP
All subcommands that modify state are logged persistently to the pool in their original form.
+.sp
.LP
The \fBzpool\fR command provides subcommands to create and destroy storage pools, add capacity to storage pools, and provide information about the storage pools. The following subcommands are supported:
.sp
@@ -285,12 +669,12 @@ Displays a help message.
.ne 2
.mk
.na
-\fB\fBzpool create\fR [\fB-fn\fR] [\fB-R\fR \fIroot\fR] [\fB-m\fR \fImountpoint\fR] \fIpool\fR \fIvdev ...\fR\fR
+\fB\fBzpool create\fR [\fB-fn\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-m\fR \fImountpoint\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR \fIvdev\fR ...\fR
.ad
.sp .6
.RS 4n
Creates a new storage pool containing the virtual devices specified on the command line. The pool name must begin with a letter, and can only contain alphanumeric characters as well as underscore ("_"), dash ("-"), and period ("."). The pool
-names "mirror", "raidz", and "spare" are reserved, as are names beginning with the pattern "c[0-9]". The \fBvdev\fR specification is described in the "Virtual Devices" section.
+names "mirror", "raidz", "spare" and "log" are reserved, as are names beginning with the pattern "c[0-9]". The \fBvdev\fR specification is described in the "Virtual Devices" section.
.sp
The command verifies that each device specified is accessible and not currently in use by another subsystem. There are some uses, such as being currently mounted, or specified as the dedicated dump device, that prevents a device from ever being used by \fBZFS\fR. Other uses,
such as having a preexisting \fBUFS\fR file system, can be overridden with the \fB-f\fR option.
@@ -305,8 +689,8 @@ Unless the \fB-R\fR option is specified, the default mount point is "/\fIpool\fR
.na
\fB\fB-f\fR\fR
.ad
-.RS 17n
-.rt
+.sp .6
+.RS 4n
Forces use of \fBvdev\fRs, even if they appear in use or specify a conflicting replication level. Not all devices can be overridden in this manner.
.RE
@@ -316,8 +700,8 @@ Forces use of \fBvdev\fRs, even if they appear in use or specify a conflicting r
.na
\fB\fB-n\fR\fR
.ad
-.RS 17n
-.rt
+.sp .6
+.RS 4n
Displays the configuration that would be used without actually creating the pool. The actual pool creation can still fail due to insufficient privileges or device sharing.
.RE
@@ -325,11 +709,22 @@ Displays the configuration that would be used without actually creating the pool
.ne 2
.mk
.na
+\fB\fB-o\fR \fIproperty=value\fR [\fB-o\fR \fIproperty=value\fR] ...\fR
+.ad
+.sp .6
+.RS 4n
+Sets the given pool properties. See the "Properties" section for a list of valid properties that can be set.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
\fB\fB-R\fR \fIroot\fR\fR
.ad
-.RS 17n
-.rt
-Creates the pool with an alternate \fIroot\fR. See the "Alternate Root Pools" section. The root dataset has its mount point set to "/" as part of this operation.
+.sp .6
+.RS 4n
+Equivalent to "-o cachefile=none,altroot=\fIroot\fR"
.RE
.sp
@@ -338,10 +733,9 @@ Creates the pool with an alternate \fIroot\fR. See the "Alternate Root Pools" se
.na
\fB\fB-m\fR \fImountpoint\fR\fR
.ad
-.RS 17n
-.rt
-Sets the mount point for the root dataset. The default mount point is "/\fIpool\fR". The mount point must be an absolute path, "\fBlegacy\fR", or "\fBnone\fR". For more information on dataset mount
-points, see \fBzfs\fR(1M).
+.sp .6
+.RS 4n
+Sets the mount point for the root dataset. The default mount point is "/\fIpool\fR" or "\fBaltroot\fR/\fIpool\fR" if \fBaltroot\fR is specified. The mount point must be an absolute path, "\fBlegacy\fR", or "\fBnone\fR". For more information on dataset mount points, see \fBzfs\fR(1M).
.RE
.RE
@@ -372,7 +766,7 @@ Forces any active datasets contained within the pool to be unmounted.
.ne 2
.mk
.na
-\fB\fBzpool add\fR [\fB-fn\fR] \fIpool\fR \fIvdev ...\fR\fR
+\fB\fBzpool add\fR [\fB-fn\fR] \fIpool\fR \fIvdev\fR ...\fR
.ad
.sp .6
.RS 4n
@@ -400,26 +794,26 @@ Forces use of \fBvdev\fRs, even if they appear in use or specify a conflicting r
Displays the configuration that would be used without actually adding the \fBvdev\fRs. The actual pool creation can still fail due to insufficient privileges or device sharing.
.RE
-Do not add a disk that is currently configured as a quorum device to a zpool. Once a disk is in a zpool, that disk can then be configured as a quorum device.
+Do not add a disk that is currently configured as a quorum device to a zpool. After a disk is in the pool, that disk can then be configured as a quorum device.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool remove\fR \fIpool\fR \fIvdev\fR\fR
+\fB\fBzpool remove\fR \fIpool\fR \fIdevice\fR ...\fR
.ad
.sp .6
.RS 4n
-Removes the given \fBvdev\fR from the pool. This command currently only supports removing hot spares. Devices which are part of a mirror can be removed using the "zpool detach" command. \fBRaidz\fR and top-level \fBvdevs\fR cannot
-be removed from a pool.
+Removes the specified device from the pool. This command currently only supports removing hot spares and cache devices. Devices that are part of a mirrored configuration can be removed using the "\fBzpool detach\fR" command. Non-redundant and \fBraidz\fR devices
+cannot be removed from a pool.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool list\fR [\fB-H\fR] [\fB-o\fR \fIfield\fR[,\fIfield*\fR]] [\fIpool\fR] ...\fR
+\fB\fBzpool list\fR [\fB-H\fR] [\fB-o\fR \fIprops\fR[,...]] [\fIpool\fR] ...\fR
.ad
.sp .6
.RS 4n
@@ -439,36 +833,20 @@ Scripted mode. Do not display headers, and separate fields by a single tab inste
.ne 2
.mk
.na
-\fB\fB-o\fR \fIfield\fR\fR
+\fB\fB-o\fR \fIprops\fR\fR
.ad
.RS 12n
.rt
-Comma-separated list of fields to display. Each field must be one of:
-.sp
-.in +2
-.nf
-name Pool name
-size Total size
-used Amount of space used
-available Amount of space available
-capacity Percentage of pool space used
-health Health status
-.fi
-.in -2
-.sp
-
-The default is all fields.
+Comma-separated list of properties to display. See the "Properties" section for a list of valid properties. The default list is "name, size, used, available, capacity, health, altroot"
.RE
-This command reports actual physical space available to the storage pool. The physical space can be different from the total amount of space that any contained datasets can actually use. The amount of space used in a \fBraidz\fR configuration depends on the characteristics of
-the data being written. In addition, \fBZFS\fR reserves some space for internal accounting that the \fBzfs\fR(1M) command takes into account, but the \fBzpool\fR command does not. For non-full pools of a reasonable size, these effects should be invisible. For small pools, or pools that are close to being completely full, these discrepancies may become more noticeable.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool iostat\fR [\fB-v\fR] [\fIpool\fR] ... [\fIinterval\fR [\fIcount\fR]]\fR
+\fB\fBzpool iostat\fR [\fB-v\fR] [\fIpool\fR] ... [\fIinterval\fR[\fIcount\fR]]\fR
.ad
.sp .6
.RS 4n
@@ -495,7 +873,7 @@ Verbose statistics. Reports usage statistics for individual \fIvdevs\fR within t
.ad
.sp .6
.RS 4n
-Displays the detailed health status for the given pools. If no \fIpool\fR is specified, then the status of each pool in the system is displayed.
+Displays the detailed health status for the given pools. If no \fIpool\fR is specified, then the status of each pool in the system is displayed. For more information on pool and device health, see the "Device Failure and Recovery" section.
.sp
If a scrub or resilver is in progress, this command reports the percentage done and the estimated time to completion. Both of these are only approximate, because the amount of data in the pool and the other workloads on the system can change.
.sp
@@ -526,13 +904,26 @@ Displays verbose data error information, printing out a complete list of all dat
.ne 2
.mk
.na
+\fB\fBzpool online\fR \fIpool\fR \fIdevice\fR ...\fR
+.ad
+.sp .6
+.RS 4n
+Brings the specified physical device online.
+.sp
+This command is not applicable to spares or cache devices.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
\fB\fBzpool offline\fR [\fB-t\fR] \fIpool\fR \fIdevice\fR ...\fR
.ad
.sp .6
.RS 4n
Takes the specified physical device offline. While the \fIdevice\fR is offline, no attempt is made to read or write to the device.
.sp
-This command is not applicable to spares.
+This command is not applicable to spares or cache devices.
.sp
.ne 2
.mk
@@ -550,19 +941,6 @@ Temporary. Upon reboot, the specified physical device reverts to its previous st
.ne 2
.mk
.na
-\fB\fBzpool online\fR \fIpool\fR \fIdevice\fR ...\fR
-.ad
-.sp .6
-.RS 4n
-Brings the specified physical device online.
-.sp
-This command is not applicable to spares.
-.RE
-
-.sp
-.ne 2
-.mk
-.na
\fB\fBzpool clear\fR \fIpool\fR [\fIdevice\fR] ...\fR
.ad
.sp .6
@@ -616,8 +994,8 @@ Replaces \fIold_device\fR with \fInew_device\fR. This is equivalent to attaching
.sp
The size of \fInew_device\fR must be greater than or equal to the minimum size of all the devices in a mirror or \fBraidz\fR configuration.
.sp
-If \fInew_device\fR is not specified, it defaults to \fIold_device\fR. This form of replacement is useful after an existing disk has failed and has been physically replaced. In this case, the new disk may have the same \fB/dev/dsk\fR path
-as the old device, even though it is actually a different disk. \fBZFS\fR recognizes this.
+\fInew_device\fR is required if the pool is not redundant. If \fInew_device\fR is not specified, it defaults to \fIold_device\fR. This form of replacement is useful after an existing disk has failed and has been physically replaced.
+In this case, the new disk may have the same \fB/dev/dsk\fR path as the old device, even though it is actually a different disk. \fBZFS\fR recognizes this.
.sp
.ne 2
.mk
@@ -663,24 +1041,45 @@ Stop scrubbing.
.ne 2
.mk
.na
-\fB\fBzpool export\fR [\fB-f\fR] \fIpool\fR ...\fR
+\fB\fBzpool import\fR [\fB-d\fR \fIdir\fR | \fB-c\fR \fIcachefile\fR] [\fB-D\fR]\fR
.ad
.sp .6
.RS 4n
-Exports the given pools from the system. All devices are marked as exported, but are still considered in use by other subsystems. The devices can be moved between systems (even those of different endianness) and imported as long as a sufficient number of devices are present.
+Lists pools available to import. If the \fB-d\fR option is not specified, this command searches for devices in "/dev/dsk". The \fB-d\fR option can be specified multiple times, and all directories are searched. If the device appears to be part of
+an exported pool, this command displays a summary of the pool with the name of the pool, a numeric identifier, as well as the \fIvdev\fR layout and current health of the device for each device or file. Destroyed pools, pools that were previously destroyed with the "\fBzpool destroy\fR" command, are not listed unless the \fB-D\fR option is specified.
.sp
-Before exporting the pool, all datasets within the pool are unmounted.
+The numeric identifier is unique, and can be used instead of the pool name when multiple exported pools of the same name are available.
.sp
-For pools to be portable, you must give the \fBzpool\fR command whole disks, not just slices, so that \fBZFS\fR can label the disks with portable \fBEFI\fR labels. Otherwise, disk drivers on platforms of different endianness will not recognize the disks.
+.ne 2
+.mk
+.na
+\fB\fB-c\fR \fIcachefile\fR\fR
+.ad
+.RS 16n
+.rt
+Reads configuration from the given \fBcachefile\fR that was created with the "\fBcachefile\fR" pool property. This \fBcachefile\fR is used instead of searching for devices.
+.RE
+
.sp
.ne 2
.mk
.na
-\fB\fB-f\fR\fR
+\fB\fB-d\fR \fIdir\fR\fR
.ad
-.RS 6n
+.RS 16n
.rt
-Forcefully unmount all datasets, using the "\fBunmount -f\fR" command.
+Searches for devices or files in \fIdir\fR. The \fB-d\fR option can be specified multiple times.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-D\fR\fR
+.ad
+.RS 16n
+.rt
+Lists destroyed pools only.
.RE
.RE
@@ -689,59 +1088,56 @@ Forcefully unmount all datasets, using the "\fBunmount -f\fR" command.
.ne 2
.mk
.na
-\fB\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR]\fR
+\fB\fBzpool import\fR [\fB-o\fR \fImntopts\fR] [ \fB-o\fR \fIproperty\fR=\fIvalue\fR] ... [\fB-d\fR \fIdir\fR | \fB-c\fR \fIcachefile\fR]
+[\fB-D\fR] [\fB-f\fR] [\fB-R\fR \fIroot\fR] \fB-a\fR\fR
.ad
.sp .6
.RS 4n
-Lists pools available to import. If the \fB-d\fR option is not specified, this command searches for devices in "/dev/dsk". The \fB-d\fR option can be specified multiple times, and all directories are searched. If the device appears to be part of
-an exported pool, this command displays a summary of the pool with the name of the pool, a numeric identifier, as well as the \fIvdev\fR layout and current health of the device for each device or file. Destroyed pools, pools that were previously destroyed with the "\fB-zpool destroy\fR" command, are not listed unless the \fB-D\fR option is specified.
-.sp
-The numeric identifier is unique, and can be used instead of the pool name when multiple exported pools of the same name are available.
+Imports all pools found in the search directories. Identical to the previous command, except that all pools with a sufficient number of devices available are imported. Destroyed pools, pools that were previously destroyed with the "\fBzpool destroy\fR"
+command, will not be imported unless the \fB-D\fR option is specified.
.sp
.ne 2
.mk
.na
-\fB\fB-d\fR \fIdir\fR\fR
+\fB\fB-o\fR \fImntopts\fR\fR
.ad
-.RS 10n
+.RS 21n
.rt
-Searches for devices or files in \fIdir\fR. The \fB-d\fR option can be specified multiple times.
+Comma-separated list of mount options to use when mounting datasets within the pool. See \fBzfs\fR(1M) for a description of dataset properties and mount
+options.
.RE
.sp
.ne 2
.mk
.na
-\fB\fB-D\fR\fR
+\fB\fB-o\fR \fIproperty=value\fR\fR
.ad
-.RS 10n
+.RS 21n
.rt
-Lists destroyed pools only.
-.RE
-
+Sets the specified property on the imported pool. See the "Properties" section for more information on the available pool properties.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR] [\fB-f\fR] [\fB-o\fR \fIopts\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR | \fIid\fR [\fInewpool\fR]\fR
+\fB\fB-c\fR \fIcachefile\fR\fR
.ad
-.sp .6
-.RS 4n
-Imports a specific pool. A pool can be identified by its name or the numeric identifier. If \fInewpool\fR is specified, the pool is imported using the name \fInewpool\fR. Otherwise, it is imported with the same name as its exported name.
-.sp
-If a device is removed from a system without running "\fBzpool export\fR" first, the device appears as potentially active. It cannot be determined if this was a failed export, or whether the device is really in use from another host. To import a pool in this state,
-the \fB-f\fR option is required.
+.RS 21n
+.rt
+Reads configuration from the given \fBcachefile\fR that was created with the "\fBcachefile\fR" pool property. This \fBcachefile\fR is used instead of searching for devices.
+.RE
+
.sp
.ne 2
.mk
.na
\fB\fB-d\fR \fIdir\fR\fR
.ad
-.RS 11n
+.RS 21n
.rt
-Searches for devices or files in \fIdir\fR. The \fB-d\fR option can be specified multiple times.
+Searches for devices or files in \fIdir\fR. The \fB-d\fR option can be specified multiple times. This option is incompatible with the \fB-c\fR option.
.RE
.sp
@@ -750,9 +1146,9 @@ Searches for devices or files in \fIdir\fR. The \fB-d\fR option can be specified
.na
\fB\fB-D\fR\fR
.ad
-.RS 11n
+.RS 21n
.rt
-Imports destroyed pool. The \fB-f\fR option is also required.
+Imports destroyed pools only. The \fB-f\fR option is also required.
.RE
.sp
@@ -761,7 +1157,7 @@ Imports destroyed pool. The \fB-f\fR option is also required.
.na
\fB\fB-f\fR\fR
.ad
-.RS 11n
+.RS 21n
.rt
Forces import, even if the pool appears to be potentially active.
.RE
@@ -770,12 +1166,11 @@ Forces import, even if the pool appears to be potentially active.
.ne 2
.mk
.na
-\fB\fB-o\fR \fIopts\fR\fR
+\fB\fB-a\fR\fR
.ad
-.RS 11n
+.RS 21n
.rt
-Comma-separated list of mount options to use when mounting datasets within the pool. See \fBzfs\fR(1M) for a description of dataset properties and mount
-options.
+Searches for and imports all pools found.
.RE
.sp
@@ -784,9 +1179,9 @@ options.
.na
\fB\fB-R\fR \fIroot\fR\fR
.ad
-.RS 11n
+.RS 21n
.rt
-Imports pool(s) with an alternate \fIroot\fR. See the "Alternate Root Pools" section.
+Sets the "\fBcachefile\fR" property to "\fBnone\fR" and the "\fIaltroot\fR" property to "\fIroot\fR".
.RE
.RE
@@ -795,21 +1190,58 @@ Imports pool(s) with an alternate \fIroot\fR. See the "Alternate Root Pools" sec
.ne 2
.mk
.na
-\fB\fBzpool import\fR [\fB-d\fR \fIdir\fR] [\fB-D\fR] [\fB-f\fR] [\fB-a\fR]\fR
+\fB\fBzpool import\fR [\fB-o\fR \fImntopts\fR] [ \fB-o\fR \fIproperty\fR=\fIvalue\fR] ... [\fB-d\fR \fIdir\fR | \fB-c\fR \fIcachefile\fR]
+[\fB-D\fR] [\fB-f\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR | \fIid\fR [\fInewpool\fR]\fR
.ad
.sp .6
.RS 4n
-Imports all pools found in the search directories. Identical to the previous command, except that all pools with a sufficient number of devices available are imported. Destroyed pools, pools that were previously destroyed with the "\fB-zpool destroy\fR" command,
-will not be imported unless the \fB-D\fR option is specified.
+Imports a specific pool. A pool can be identified by its name or the numeric identifier. If \fInewpool\fR is specified, the pool is imported using the name \fInewpool\fR. Otherwise, it is imported with the same name as its exported name.
+.sp
+If a device is removed from a system without running "\fBzpool export\fR" first, the device appears as potentially active. It cannot be determined if this was a failed export, or whether the device is really in use from another host. To import a pool in this state,
+the \fB-f\fR option is required.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-o\fR \fImntopts\fR\fR
+.ad
+.sp .6
+.RS 4n
+Comma-separated list of mount options to use when mounting datasets within the pool. See \fBzfs\fR(1M) for a description of dataset properties and mount
+options.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-o\fR \fIproperty=value\fR\fR
+.ad
+.sp .6
+.RS 4n
+Sets the specified property on the imported pool. See the "Properties" section for more information on the available pool properties.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-c\fR \fIcachefile\fR\fR
+.ad
+.sp .6
+.RS 4n
+Reads configuration from the given \fBcachefile\fR that was created with the "\fBcachefile\fR" pool property. This \fBcachefile\fR is used instead of searching for devices.
+.RE
+
.sp
.ne 2
.mk
.na
\fB\fB-d\fR \fIdir\fR\fR
.ad
-.RS 10n
-.rt
-Searches for devices or files in \fIdir\fR. The \fB-d\fR option can be specified multiple times.
+.sp .6
+.RS 4n
+Searches for devices or files in \fIdir\fR. The \fB-d\fR option can be specified multiple times. This option is incompatible with the \fB-c\fR option.
.RE
.sp
@@ -818,9 +1250,9 @@ Searches for devices or files in \fIdir\fR. The \fB-d\fR option can be specified
.na
\fB\fB-D\fR\fR
.ad
-.RS 10n
-.rt
-Imports destroyed pools only. The \fB-f\fR option is also required.
+.sp .6
+.RS 4n
+Imports destroyed pool. The \fB-f\fR option is also required.
.RE
.sp
@@ -829,11 +1261,48 @@ Imports destroyed pools only. The \fB-f\fR option is also required.
.na
\fB\fB-f\fR\fR
.ad
-.RS 10n
-.rt
+.sp .6
+.RS 4n
Forces import, even if the pool appears to be potentially active.
.RE
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-R\fR \fIroot\fR\fR
+.ad
+.sp .6
+.RS 4n
+Sets the "\fBcachefile\fR" property to "\fBnone\fR" and the "\fIaltroot\fR" property to "\fIroot\fR".
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool export\fR [\fB-f\fR] \fIpool\fR ...\fR
+.ad
+.sp .6
+.RS 4n
+Exports the given pools from the system. All devices are marked as exported, but are still considered in use by other subsystems. The devices can be moved between systems (even those of different endianness) and imported as long as a sufficient number of devices are present.
+.sp
+Before exporting the pool, all datasets within the pool are unmounted.
+.sp
+For pools to be portable, you must give the \fBzpool\fR command whole disks, not just slices, so that \fBZFS\fR can label the disks with portable \fBEFI\fR labels. Otherwise, disk drivers on platforms of different endianness will not recognize the disks.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-f\fR\fR
+.ad
+.RS 6n
+.rt
+Forcefully unmount all datasets, using the "\fBunmount -f\fR" command.
+.RE
+
.RE
.sp
@@ -856,14 +1325,14 @@ a more recent version are also displayed, although these pools will be inaccessi
.ad
.sp .6
.RS 4n
-Displays \fBZFS\fR versions supported by the current software. The current \fBZFS\fR versions and all previous supportedversions are displayed, along with an explanation of the features provided with each version.
+Displays \fBZFS\fR versions supported by the current software. The current \fBZFS\fR versions and all previous supported versions are displayed, along with an explanation of the features provided with each version.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool upgrade\fR [\fB-a\fR | \fIpool\fR]\fR
+\fB\fBzpool upgrade\fR [\fB-V\fR \fIversion\fR] \fB-a\fR | \fIpool\fR ...\fR
.ad
.sp .6
.RS 4n
@@ -874,27 +1343,95 @@ Upgrades the given pool to the latest on-disk version. Once this is done, the po
.na
\fB\fB-a\fR\fR
.ad
-.RS 6n
+.RS 14n
.rt
Upgrades all pools.
.RE
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-V\fR \fIversion\fR\fR
+.ad
+.RS 14n
+.rt
+Upgrade to the specified version. If the \fB-V\fR flag is not specified, the pool is upgraded to the most recent version. This option can only be used to increase the version number, and only up to the most recent version supported by this software.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool history\fR [\fB-il\fR] [\fIpool\fR] ...\fR
+.ad
+.sp .6
+.RS 4n
+Displays the command history of the specified pools or all pools if no pool is specified.
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-i\fR\fR
+.ad
+.RS 6n
+.rt
+Displays internally logged \fBZFS\fR events in addition to user initiated events.
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-l\fR\fR
+.ad
+.RS 6n
+.rt
+Displays log records in long format, which in addition to standard format includes, the user name, the hostname, and the zone in which the operation was performed.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.mk
+.na
+\fB\fBzpool get\fR "\fIall\fR" | \fIproperty\fR[,...] \fIpool\fR ...\fR
+.ad
+.sp .6
+.RS 4n
+Retrieves the given list of properties (or all properties if "\fBall\fR" is used) for the specified storage pool(s). These properties are displayed with the following fields:
+.sp
+.in +2
+.nf
+ name Name of storage pool
+ property Property name
+ value Property value
+ source Property source, either 'default' or 'local'.
+.fi
+.in -2
+.sp
+
+See the "Properties" section for more information on the available pool properties.
.RE
.sp
.ne 2
.mk
.na
-\fB\fBzpool history\fR [\fIpool\fR] ...\fR
+\fB\fBzpool set\fR \fIproperty\fR=\fIvalue\fR \fIpool\fR \fR
.ad
.sp .6
.RS 4n
-Displays the command history of the specified pools (or all pools if no pool is specified).
+Sets the given property on the specified pool. See the "Properties" section for more information on what properties can be set and acceptable values.
.RE
.SH EXAMPLES
.LP
\fBExample 1 \fRCreating a RAID-Z Storage Pool
+.sp
.LP
The following command creates a pool with a single \fBraidz\fR root \fIvdev\fR that consists of six disks.
@@ -908,6 +1445,7 @@ The following command creates a pool with a single \fBraidz\fR root \fIvdev\fR t
.LP
\fBExample 2 \fRCreating a Mirrored Storage Pool
+.sp
.LP
The following command creates a pool with two mirrors, where each mirror contains two disks.
@@ -921,6 +1459,7 @@ The following command creates a pool with two mirrors, where each mirror contain
.LP
\fBExample 3 \fRCreating a ZFS Storage Pool by Using Slices
+.sp
.LP
The following command creates an unmirrored pool using two disk slices.
@@ -934,6 +1473,7 @@ The following command creates an unmirrored pool using two disk slices.
.LP
\fBExample 4 \fRCreating a ZFS Storage Pool by Using Files
+.sp
.LP
The following command creates an unmirrored pool using files. While not recommended, a pool based on files can be useful for experimental purposes.
@@ -947,6 +1487,7 @@ The following command creates an unmirrored pool using files. While not recommen
.LP
\fBExample 5 \fRAdding a Mirror to a ZFS Storage Pool
+.sp
.LP
The following command adds two mirrored disks to the pool "\fItank\fR", assuming the pool is already made up of two-way mirrors. The additional space is immediately available to any datasets within the pool.
@@ -960,9 +1501,11 @@ The following command adds two mirrored disks to the pool "\fItank\fR", assuming
.LP
\fBExample 6 \fRListing Available ZFS Storage Pools
+.sp
.LP
The following command lists all available pools on the system. In this case, the pool \fIzion\fR is faulted due to a missing device.
+.sp
.LP
The results from this command are similar to the following:
@@ -980,6 +1523,7 @@ The results from this command are similar to the following:
.LP
\fBExample 7 \fRDestroying a ZFS Storage Pool
+.sp
.LP
The following command destroys the pool "\fItank\fR" and any datasets contained within.
@@ -993,6 +1537,7 @@ The following command destroys the pool "\fItank\fR" and any datasets contained
.LP
\fBExample 8 \fRExporting a ZFS Storage Pool
+.sp
.LP
The following command exports the devices in pool \fItank\fR so that they can be relocated or later imported.
@@ -1006,9 +1551,11 @@ The following command exports the devices in pool \fItank\fR so that they can be
.LP
\fBExample 9 \fRImporting a ZFS Storage Pool
+.sp
.LP
The following command displays available pools, and then imports the pool "tank" for use on the system.
+.sp
.LP
The results from this command are similar to the following:
@@ -1034,6 +1581,7 @@ config:
.LP
\fBExample 10 \fRUpgrading All ZFS Storage Pools to the Current Version
+.sp
.LP
The following command upgrades all ZFS Storage pools to the current version of the software.
@@ -1048,6 +1596,7 @@ This system is currently running ZFS version 2.
.LP
\fBExample 11 \fRManaging Hot Spares
+.sp
.LP
The following command creates a new pool with an available hot spare:
@@ -1059,6 +1608,7 @@ The following command creates a new pool with an available hot spare:
.in -2
.sp
+.sp
.LP
If one of the disks were to fail, the pool would be reduced to the degraded state. The failed device can be replaced using the following command:
@@ -1070,8 +1620,9 @@ If one of the disks were to fail, the pool would be reduced to the degraded stat
.in -2
.sp
+.sp
.LP
-Once the data has been resilvered, the spare is automatically removed and is made available should another device fails. The hot spare can be permanently removed from the pool using the following command:
+Once the data has been resilvered, the spare is automatically removed and is made available should another device fails. The hot spare can be permanently removed from the pool using the following command:
.sp
.in +2
@@ -1081,7 +1632,48 @@ Once the data has been resilvered, the spare is automatically removed and is mad
.in -2
.sp
+.LP
+\fBExample 12 \fRCreating a ZFS Pool with Mirrored Separate Intent Logs
+.sp
+.LP
+The following command creates a ZFS storage pool consisting of two, two-way mirrors and mirrored log devices:
+
+.sp
+.in +2
+.nf
+\fB# zpool create pool mirror c0d0 c1d0 mirror c2d0 c3d0 log mirror \e
+ c4d0 c5d0\fR
+.fi
+.in -2
+.sp
+
+.LP
+\fBExample 13 \fRAdding Cache Devices to a ZFS Pool
+.sp
+.LP
+The following command adds two disks for use as cache devices to a ZFS storage pool:
+
+.sp
+.in +2
+.nf
+\fB# zpool add pool cache c2d0 c3d0\fR
+.fi
+.in -2
+.sp
+
+.sp
+.LP
+Once added, the cache devices gradually fill with content from main memory. Depending on the size of your cache devices, it could take over an hour for them to fill. Capacity and reads can be monitored using the \fBiostat\fR option as follows:
+.sp
+.in +2
+.nf
+\fB# zpool iostat -v pool 5\fR
+.fi
+.in -2
+.sp
+
.SH EXIT STATUS
+.sp
.LP
The following exit values are returned:
.sp
@@ -1118,6 +1710,7 @@ Invalid command line options were specified.
.RE
.SH ATTRIBUTES
+.sp
.LP
See \fBattributes\fR(5) for descriptions of the following attributes:
.sp
@@ -1136,5 +1729,6 @@ Interface StabilityEvolving
.TE
.SH SEE ALSO
+.sp
.LP
\fBzfs\fR(1M), \fBattributes\fR(5)
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c
index f724179..6ba91b1 100644
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_iter.c
@@ -53,6 +53,7 @@ struct zpool_list {
boolean_t zl_findall;
uu_avl_t *zl_avl;
uu_avl_pool_t *zl_pool;
+ zprop_list_t **zl_proplist;
};
/* ARGSUSED */
@@ -81,6 +82,12 @@ add_pool(zpool_handle_t *zhp, void *data)
node->zn_handle = zhp;
uu_avl_node_init(node, &node->zn_avlnode, zlp->zl_pool);
if (uu_avl_find(zlp->zl_avl, node, NULL, &idx) == NULL) {
+ if (zlp->zl_proplist &&
+ zpool_expand_proplist(zhp, zlp->zl_proplist) != 0) {
+ zpool_close(zhp);
+ free(node);
+ return (-1);
+ }
uu_avl_insert(zlp->zl_avl, node, idx);
} else {
zpool_close(zhp);
@@ -98,7 +105,7 @@ add_pool(zpool_handle_t *zhp, void *data)
* line.
*/
zpool_list_t *
-pool_list_get(int argc, char **argv, zpool_proplist_t **proplist, int *err)
+pool_list_get(int argc, char **argv, zprop_list_t **proplist, int *err)
{
zpool_list_t *zlp;
@@ -114,6 +121,8 @@ pool_list_get(int argc, char **argv, zpool_proplist_t **proplist, int *err)
UU_DEFAULT)) == NULL)
zpool_no_memory();
+ zlp->zl_proplist = proplist;
+
if (argc == 0) {
(void) zpool_iter(g_zfs, add_pool, zlp);
zlp->zl_findall = B_TRUE;
@@ -123,13 +132,12 @@ pool_list_get(int argc, char **argv, zpool_proplist_t **proplist, int *err)
for (i = 0; i < argc; i++) {
zpool_handle_t *zhp;
- if ((zhp = zpool_open_canfail(g_zfs,
- argv[i])) != NULL && add_pool(zhp, zlp) == 0) {
- if (proplist &&
- zpool_expand_proplist(zhp, proplist) != 0)
+ if (zhp = zpool_open_canfail(g_zfs, argv[i])) {
+ if (add_pool(zhp, zlp) != 0)
*err = B_TRUE;
- } else
+ } else {
*err = B_TRUE;
+ }
}
}
@@ -228,7 +236,7 @@ pool_list_count(zpool_list_t *zlp)
*/
int
for_each_pool(int argc, char **argv, boolean_t unavail,
- zpool_proplist_t **proplist, zpool_iter_f func, void *data)
+ zprop_list_t **proplist, zpool_iter_f func, void *data)
{
zpool_list_t *list;
int ret = 0;
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
index 5b1d856..2388df9 100644
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
@@ -20,12 +20,10 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <solaris.h>
#include <assert.h>
#include <ctype.h>
@@ -42,6 +40,8 @@
#include <strings.h>
#include <unistd.h>
#include <priv.h>
+#include <pwd.h>
+#include <zone.h>
#include <sys/time.h>
#include <sys/fs/zfs.h>
@@ -50,6 +50,7 @@
#include <libzfs.h>
#include "zpool_util.h"
+#include "zfs_comutil.h"
static int zpool_do_create(int, char **);
static int zpool_do_destroy(int, char **);
@@ -85,6 +86,8 @@ static int zpool_do_set(int, char **);
* These libumem hooks provide a reasonable set of defaults for the allocator's
* debugging facilities.
*/
+
+#ifdef DEBUG
const char *
_umem_debug_init(void)
{
@@ -96,6 +99,7 @@ _umem_logging_init(void)
{
return ("fail,contents"); /* $UMEM_LOGGING setting */
}
+#endif
typedef enum {
HELP_ADD,
@@ -169,6 +173,7 @@ static zpool_command_t command_table[] = {
#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0]))
zpool_command_t *current_command;
+static char history_str[HIS_MAX_RECORD_LEN];
static const char *
get_usage(zpool_help_t idx) {
@@ -177,12 +182,13 @@ get_usage(zpool_help_t idx) {
return (gettext("\tadd [-fn] <pool> <vdev> ...\n"));
case HELP_ATTACH:
return (gettext("\tattach [-f] <pool> <device> "
- "<new_device>\n"));
+ "<new-device>\n"));
case HELP_CLEAR:
return (gettext("\tclear <pool> [device]\n"));
case HELP_CREATE:
- return (gettext("\tcreate [-fn] [-R root] [-m mountpoint] "
- "<pool> <vdev> ...\n"));
+ return (gettext("\tcreate [-fn] [-o property=value] ... \n"
+ "\t [-O file-system-property=value] ... \n"
+ "\t [-m mountpoint] [-R root] <pool> <vdev> ...\n"));
case HELP_DESTROY:
return (gettext("\tdestroy [-f] <pool>\n"));
case HELP_DETACH:
@@ -190,17 +196,19 @@ get_usage(zpool_help_t idx) {
case HELP_EXPORT:
return (gettext("\texport [-f] <pool> ...\n"));
case HELP_HISTORY:
- return (gettext("\thistory [<pool>]\n"));
+ return (gettext("\thistory [-il] [<pool>] ...\n"));
case HELP_IMPORT:
return (gettext("\timport [-d dir] [-D]\n"
- "\timport [-d dir] [-D] [-f] [-o opts] [-R root] -a\n"
- "\timport [-d dir] [-D] [-f] [-o opts] [-R root ]"
- " <pool | id> [newpool]\n"));
+ "\timport [-o mntopts] [-o property=value] ... \n"
+ "\t [-d dir | -c cachefile] [-D] [-f] [-R root] -a\n"
+ "\timport [-o mntopts] [-o property=value] ... \n"
+ "\t [-d dir | -c cachefile] [-D] [-f] [-R root] "
+ "<pool | id> [newpool]\n"));
case HELP_IOSTAT:
return (gettext("\tiostat [-v] [pool] ... [interval "
"[count]]\n"));
case HELP_LIST:
- return (gettext("\tlist [-H] [-o field[,field]*] "
+ return (gettext("\tlist [-H] [-o property[,...]] "
"[pool] ...\n"));
case HELP_OFFLINE:
return (gettext("\toffline [-t] <pool> <device> ...\n"));
@@ -208,9 +216,9 @@ get_usage(zpool_help_t idx) {
return (gettext("\tonline <pool> <device> ...\n"));
case HELP_REPLACE:
return (gettext("\treplace [-f] <pool> <device> "
- "[new_device]\n"));
+ "[new-device]\n"));
case HELP_REMOVE:
- return (gettext("\tremove <pool> <device>\n"));
+ return (gettext("\tremove <pool> <device> ...\n"));
case HELP_SCRUB:
return (gettext("\tscrub [-s] <pool> ...\n"));
case HELP_STATUS:
@@ -218,9 +226,9 @@ get_usage(zpool_help_t idx) {
case HELP_UPGRADE:
return (gettext("\tupgrade\n"
"\tupgrade -v\n"
- "\tupgrade <-a | pool>\n"));
+ "\tupgrade [-V version] <-a | pool ...>\n"));
case HELP_GET:
- return (gettext("\tget <all | property[,property]...> "
+ return (gettext("\tget <\"all\" | property[,...]> "
"<pool> ...\n"));
case HELP_SET:
return (gettext("\tset <property=value> <pool> \n"));
@@ -230,67 +238,28 @@ get_usage(zpool_help_t idx) {
/* NOTREACHED */
}
-/*
- * Fields available for 'zpool list'.
- */
-typedef enum {
- ZPOOL_FIELD_NAME,
- ZPOOL_FIELD_SIZE,
- ZPOOL_FIELD_USED,
- ZPOOL_FIELD_AVAILABLE,
- ZPOOL_FIELD_CAPACITY,
- ZPOOL_FIELD_HEALTH,
- ZPOOL_FIELD_ROOT
-} zpool_field_t;
-
-#define MAX_FIELDS 10
-
-typedef struct column_def {
- const char *cd_title;
- size_t cd_width;
- enum {
- left_justify,
- right_justify
- } cd_justify;
-} column_def_t;
-
-static column_def_t column_table[] = {
- { "NAME", 20, left_justify },
- { "SIZE", 6, right_justify },
- { "USED", 6, right_justify },
- { "AVAIL", 6, right_justify },
- { "CAP", 5, right_justify },
- { "HEALTH", 9, left_justify },
- { "ALTROOT", 15, left_justify }
-};
-
-static char *column_subopts[] = {
- "name",
- "size",
- "used",
- "available",
- "capacity",
- "health",
- "root",
- NULL
-};
/*
* Callback routine that will print out a pool property value.
*/
-static zpool_prop_t
-print_prop_cb(zpool_prop_t prop, void *cb)
+static int
+print_prop_cb(int prop, void *cb)
{
FILE *fp = cb;
(void) fprintf(fp, "\t%-13s ", zpool_prop_to_name(prop));
+ if (zpool_prop_readonly(prop))
+ (void) fprintf(fp, " NO ");
+ else
+ (void) fprintf(fp, " YES ");
+
if (zpool_prop_values(prop) == NULL)
(void) fprintf(fp, "-\n");
else
(void) fprintf(fp, "%s\n", zpool_prop_values(prop));
- return (ZFS_PROP_CONT);
+ return (ZPROP_CONT);
}
/*
@@ -301,7 +270,6 @@ print_prop_cb(zpool_prop_t prop, void *cb)
void
usage(boolean_t requested)
{
- int i;
FILE *fp = requested ? stdout : stderr;
if (current_command == NULL) {
@@ -321,28 +289,22 @@ usage(boolean_t requested)
} else {
(void) fprintf(fp, gettext("usage:\n"));
(void) fprintf(fp, "%s", get_usage(current_command->usage));
-
- if (strcmp(current_command->name, "list") == 0) {
- (void) fprintf(fp, gettext("\nwhere 'field' is one "
- "of the following:\n\n"));
-
- for (i = 0; column_subopts[i] != NULL; i++)
- (void) fprintf(fp, "\t%s\n", column_subopts[i]);
- }
}
if (current_command != NULL &&
((strcmp(current_command->name, "set") == 0) ||
- (strcmp(current_command->name, "get") == 0))) {
+ (strcmp(current_command->name, "get") == 0) ||
+ (strcmp(current_command->name, "list") == 0))) {
(void) fprintf(fp,
gettext("\nthe following properties are supported:\n"));
- (void) fprintf(fp, "\n\t%-13s %s\n\n",
- "PROPERTY", "VALUES");
+ (void) fprintf(fp, "\n\t%-13s %s %s\n\n",
+ "PROPERTY", "EDIT", "VALUES");
/* Iterate over all properties */
- (void) zpool_prop_iter(print_prop_cb, fp, B_FALSE);
+ (void) zprop_iter(print_prop_cb, fp, B_FALSE, B_TRUE,
+ ZFS_TYPE_POOL);
}
/*
@@ -356,46 +318,9 @@ usage(boolean_t requested)
exit(requested ? 0 : 2);
}
-const char *
-state_to_health(int vs_state)
-{
- switch (vs_state) {
- case VDEV_STATE_CLOSED:
- case VDEV_STATE_CANT_OPEN:
- case VDEV_STATE_OFFLINE:
- return (dgettext(TEXT_DOMAIN, "FAULTED"));
- case VDEV_STATE_DEGRADED:
- return (dgettext(TEXT_DOMAIN, "DEGRADED"));
- case VDEV_STATE_HEALTHY:
- return (dgettext(TEXT_DOMAIN, "ONLINE"));
- }
-
- return (dgettext(TEXT_DOMAIN, "UNKNOWN"));
-}
-
-const char *
-state_to_name(vdev_stat_t *vs)
-{
- switch (vs->vs_state) {
- case VDEV_STATE_CLOSED:
- case VDEV_STATE_CANT_OPEN:
- if (vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
- return (gettext("FAULTED"));
- else
- return (gettext("UNAVAIL"));
- case VDEV_STATE_OFFLINE:
- return (gettext("OFFLINE"));
- case VDEV_STATE_DEGRADED:
- return (gettext("DEGRADED"));
- case VDEV_STATE_HEALTHY:
- return (gettext("ONLINE"));
- }
-
- return (gettext("UNKNOWN"));
-}
-
void
-print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent)
+print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent,
+ boolean_t print_logs)
{
nvlist_t **child;
uint_t c, children;
@@ -409,13 +334,75 @@ print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent)
return;
for (c = 0; c < children; c++) {
+ uint64_t is_log = B_FALSE;
+
+ (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+ &is_log);
+ if ((is_log && !print_logs) || (!is_log && print_logs))
+ continue;
+
vname = zpool_vdev_name(g_zfs, zhp, child[c]);
- print_vdev_tree(zhp, vname, child[c], indent + 2);
+ print_vdev_tree(zhp, vname, child[c], indent + 2,
+ B_FALSE);
free(vname);
}
}
/*
+ * Add a property pair (name, string-value) into a property nvlist.
+ */
+static int
+add_prop_list(const char *propname, char *propval, nvlist_t **props,
+ boolean_t poolprop)
+{
+ zpool_prop_t prop = ZPROP_INVAL;
+ zfs_prop_t fprop;
+ nvlist_t *proplist;
+ const char *normnm;
+ char *strval;
+
+ if (*props == NULL &&
+ nvlist_alloc(props, NV_UNIQUE_NAME, 0) != 0) {
+ (void) fprintf(stderr,
+ gettext("internal error: out of memory\n"));
+ return (1);
+ }
+
+ proplist = *props;
+
+ if (poolprop) {
+ if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
+ (void) fprintf(stderr, gettext("property '%s' is "
+ "not a valid pool property\n"), propname);
+ return (2);
+ }
+ normnm = zpool_prop_to_name(prop);
+ } else {
+ if ((fprop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
+ (void) fprintf(stderr, gettext("property '%s' is "
+ "not a valid file system property\n"), propname);
+ return (2);
+ }
+ normnm = zfs_prop_to_name(fprop);
+ }
+
+ if (nvlist_lookup_string(proplist, normnm, &strval) == 0 &&
+ prop != ZPOOL_PROP_CACHEFILE) {
+ (void) fprintf(stderr, gettext("property '%s' "
+ "specified multiple times\n"), propname);
+ return (2);
+ }
+
+ if (nvlist_add_string(proplist, normnm, propval) != 0) {
+ (void) fprintf(stderr, gettext("internal "
+ "error: out of memory\n"));
+ return (1);
+ }
+
+ return (0);
+}
+
+/*
* zpool add [-fn] <pool> <vdev> ...
*
* -f Force addition of devices, even if they appear in use
@@ -483,7 +470,8 @@ zpool_do_add(int argc, char **argv)
}
/* pass off to get_vdev_spec for processing */
- nvroot = make_root_vdev(config, force, !force, B_FALSE, argc, argv);
+ nvroot = make_root_vdev(zhp, force, !force, B_FALSE, dryrun,
+ argc, argv);
if (nvroot == NULL) {
zpool_close(zhp);
return (1);
@@ -498,16 +486,21 @@ zpool_do_add(int argc, char **argv)
(void) printf(gettext("would update '%s' to the following "
"configuration:\n"), zpool_get_name(zhp));
- print_vdev_tree(zhp, poolname, poolnvroot, 0);
- print_vdev_tree(zhp, NULL, nvroot, 0);
+ /* print original main pool and new tree */
+ print_vdev_tree(zhp, poolname, poolnvroot, 0, B_FALSE);
+ print_vdev_tree(zhp, NULL, nvroot, 0, B_FALSE);
+
+ /* Do the same for the logs */
+ if (num_logs(poolnvroot) > 0) {
+ print_vdev_tree(zhp, "logs", poolnvroot, 0, B_TRUE);
+ print_vdev_tree(zhp, NULL, nvroot, 0, B_TRUE);
+ } else if (num_logs(nvroot) > 0) {
+ print_vdev_tree(zhp, "logs", nvroot, 0, B_TRUE);
+ }
ret = 0;
} else {
ret = (zpool_add(zhp, nvroot) != 0);
- if (!ret) {
- zpool_log_history(g_zfs, argc + 1 + optind,
- argv - 1 - optind, poolname, B_TRUE, B_FALSE);
- }
}
nvlist_free(nvroot);
@@ -517,17 +510,17 @@ zpool_do_add(int argc, char **argv)
}
/*
- * zpool remove <pool> <vdev>
+ * zpool remove <pool> <vdev> ...
*
* Removes the given vdev from the pool. Currently, this only supports removing
- * spares from the pool. Eventually, we'll want to support removing leaf vdevs
- * (as an alias for 'detach') as well as toplevel vdevs.
+ * spares and cache devices from the pool. Eventually, we'll want to support
+ * removing leaf vdevs (as an alias for 'detach') as well as toplevel vdevs.
*/
int
zpool_do_remove(int argc, char **argv)
{
char *poolname;
- int ret;
+ int i, ret = 0;
zpool_handle_t *zhp;
argc--;
@@ -548,17 +541,18 @@ zpool_do_remove(int argc, char **argv)
if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
return (1);
- ret = (zpool_vdev_remove(zhp, argv[1]) != 0);
- if (!ret) {
- zpool_log_history(g_zfs, ++argc, --argv, poolname, B_TRUE,
- B_FALSE);
+ for (i = 1; i < argc; i++) {
+ if (zpool_vdev_remove(zhp, argv[i]) != 0)
+ ret = 1;
}
return (ret);
}
/*
- * zpool create [-fn] [-R root] [-m mountpoint] <pool> <dev> ...
+ * zpool create [-fn] [-o property=value] ...
+ * [-O file-system-property=value] ...
+ * [-R root] [-m mountpoint] <pool> <dev> ...
*
* -f Force creation, even if devices appear in use
* -n Do not create the pool, but display the resulting layout if it
@@ -566,6 +560,8 @@ zpool_do_remove(int argc, char **argv)
* -R Create a pool under an alternate root
* -m Set default mountpoint for the root dataset. By default it's
* '/<pool>'
+ * -o Set property=value.
+ * -O Set fsproperty=value in the pool's root file system
*
* Creates the named pool according to the given vdev specification. The
* bulk of the vdev processing is done in get_vdev_spec() in zpool_vdev.c. Once
@@ -578,16 +574,17 @@ zpool_do_create(int argc, char **argv)
boolean_t force = B_FALSE;
boolean_t dryrun = B_FALSE;
int c;
- nvlist_t *nvroot;
+ nvlist_t *nvroot = NULL;
char *poolname;
- int ret;
+ int ret = 1;
char *altroot = NULL;
char *mountpoint = NULL;
- nvlist_t **child;
- uint_t children;
+ nvlist_t *fsprops = NULL;
+ nvlist_t *props = NULL;
+ char *propval;
/* check options */
- while ((c = getopt(argc, argv, ":fnR:m:")) != -1) {
+ while ((c = getopt(argc, argv, ":fnR:m:o:O:")) != -1) {
switch (c) {
case 'f':
force = B_TRUE;
@@ -597,19 +594,52 @@ zpool_do_create(int argc, char **argv)
break;
case 'R':
altroot = optarg;
+ if (add_prop_list(zpool_prop_to_name(
+ ZPOOL_PROP_ALTROOT), optarg, &props, B_TRUE))
+ goto errout;
+ if (nvlist_lookup_string(props,
+ zpool_prop_to_name(ZPOOL_PROP_CACHEFILE),
+ &propval) == 0)
+ break;
+ if (add_prop_list(zpool_prop_to_name(
+ ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE))
+ goto errout;
break;
case 'm':
mountpoint = optarg;
break;
+ case 'o':
+ if ((propval = strchr(optarg, '=')) == NULL) {
+ (void) fprintf(stderr, gettext("missing "
+ "'=' for -o option\n"));
+ goto errout;
+ }
+ *propval = '\0';
+ propval++;
+
+ if (add_prop_list(optarg, propval, &props, B_TRUE))
+ goto errout;
+ break;
+ case 'O':
+ if ((propval = strchr(optarg, '=')) == NULL) {
+ (void) fprintf(stderr, gettext("missing "
+ "'=' for -O option\n"));
+ goto errout;
+ }
+ *propval = '\0';
+ propval++;
+
+ if (add_prop_list(optarg, propval, &fsprops, B_FALSE))
+ goto errout;
+ break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
- usage(B_FALSE);
- break;
+ goto badusage;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
- usage(B_FALSE);
+ goto badusage;
}
}
@@ -619,11 +649,11 @@ zpool_do_create(int argc, char **argv)
/* get pool name and check number of arguments */
if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool name argument\n"));
- usage(B_FALSE);
+ goto badusage;
}
if (argc < 2) {
(void) fprintf(stderr, gettext("missing vdev specification\n"));
- usage(B_FALSE);
+ goto badusage;
}
poolname = argv[0];
@@ -637,31 +667,28 @@ zpool_do_create(int argc, char **argv)
"character '/' in pool name\n"), poolname);
(void) fprintf(stderr, gettext("use 'zfs create' to "
"create a dataset\n"));
- return (1);
+ goto errout;
}
/* pass off to get_vdev_spec for bulk processing */
- nvroot = make_root_vdev(NULL, force, !force, B_FALSE, argc - 1,
- argv + 1);
+ nvroot = make_root_vdev(NULL, force, !force, B_FALSE, dryrun,
+ argc - 1, argv + 1);
if (nvroot == NULL)
- return (1);
+ goto errout;
/* make_root_vdev() allows 0 toplevel children if there are spares */
- verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
- &child, &children) == 0);
- if (children == 0) {
+ if (!zfs_allocatable_devs(nvroot)) {
(void) fprintf(stderr, gettext("invalid vdev "
"specification: at least one toplevel vdev must be "
"specified\n"));
- return (1);
+ goto errout;
}
if (altroot != NULL && altroot[0] != '/') {
(void) fprintf(stderr, gettext("invalid alternate root '%s': "
"must be an absolute path\n"), altroot);
- nvlist_free(nvroot);
- return (1);
+ goto errout;
}
/*
@@ -672,14 +699,13 @@ zpool_do_create(int argc, char **argv)
(strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) != 0 &&
strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0)) {
char buf[MAXPATHLEN];
- struct stat64 statbuf;
+ DIR *dirp;
if (mountpoint && mountpoint[0] != '/') {
(void) fprintf(stderr, gettext("invalid mountpoint "
"'%s': must be an absolute path, 'legacy', or "
"'none'\n"), mountpoint);
- nvlist_free(nvroot);
- return (1);
+ goto errout;
}
if (mountpoint == NULL) {
@@ -698,23 +724,30 @@ zpool_do_create(int argc, char **argv)
mountpoint);
}
- if (stat64(buf, &statbuf) == 0 &&
- statbuf.st_nlink != 2) {
- if (mountpoint == NULL)
- (void) fprintf(stderr, gettext("default "
- "mountpoint '%s' exists and is not "
- "empty\n"), buf);
- else
- (void) fprintf(stderr, gettext("mountpoint "
- "'%s' exists and is not empty\n"), buf);
+ if ((dirp = opendir(buf)) == NULL && errno != ENOENT) {
+ (void) fprintf(stderr, gettext("mountpoint '%s' : "
+ "%s\n"), buf, strerror(errno));
(void) fprintf(stderr, gettext("use '-m' "
"option to provide a different default\n"));
- nvlist_free(nvroot);
- return (1);
+ goto errout;
+ } else if (dirp) {
+ int count = 0;
+
+ while (count < 3 && readdir(dirp) != NULL)
+ count++;
+ (void) closedir(dirp);
+
+ if (count > 2) {
+ (void) fprintf(stderr, gettext("mountpoint "
+ "'%s' exists and is not empty\n"), buf);
+ (void) fprintf(stderr, gettext("use '-m' "
+ "option to provide a "
+ "different default\n"));
+ goto errout;
+ }
}
}
-
if (dryrun) {
/*
* For a dry run invocation, print out a basic message and run
@@ -724,15 +757,17 @@ zpool_do_create(int argc, char **argv)
(void) printf(gettext("would create '%s' with the "
"following layout:\n\n"), poolname);
- print_vdev_tree(NULL, poolname, nvroot, 0);
+ print_vdev_tree(NULL, poolname, nvroot, 0, B_FALSE);
+ if (num_logs(nvroot) > 0)
+ print_vdev_tree(NULL, "logs", nvroot, 0, B_TRUE);
ret = 0;
} else {
- ret = 1;
/*
* Hand off to libzfs.
*/
- if (zpool_create(g_zfs, poolname, nvroot, altroot) == 0) {
+ if (zpool_create(g_zfs, poolname,
+ nvroot, props, fsprops) == 0) {
zfs_handle_t *pool = zfs_open(g_zfs, poolname,
ZFS_TYPE_FILESYSTEM);
if (pool != NULL) {
@@ -742,20 +777,25 @@ zpool_do_create(int argc, char **argv)
ZFS_PROP_MOUNTPOINT),
mountpoint) == 0);
if (zfs_mount(pool, NULL, 0) == 0)
- ret = zfs_share_nfs(pool);
+ ret = zfs_shareall(pool);
zfs_close(pool);
}
- zpool_log_history(g_zfs, argc + optind, argv - optind,
- poolname, B_TRUE, B_TRUE);
} else if (libzfs_errno(g_zfs) == EZFS_INVALIDNAME) {
(void) fprintf(stderr, gettext("pool name may have "
"been omitted\n"));
}
}
+errout:
nvlist_free(nvroot);
-
+ nvlist_free(fsprops);
+ nvlist_free(props);
return (ret);
+badusage:
+ nvlist_free(fsprops);
+ nvlist_free(props);
+ usage(B_FALSE);
+ return (2);
}
/*
@@ -819,9 +859,6 @@ zpool_do_destroy(int argc, char **argv)
return (1);
}
- zpool_log_history(g_zfs, argc + optind, argv - optind, pool, B_TRUE,
- B_FALSE);
-
ret = (zpool_destroy(zhp) != 0);
zpool_close(zhp);
@@ -882,10 +919,7 @@ zpool_do_export(int argc, char **argv)
continue;
}
- zpool_log_history(g_zfs, argc + optind, argv - optind, argv[i],
- B_TRUE, B_FALSE);
-
- if (zpool_export(zhp) != 0)
+ if (zpool_export(zhp, force) != 0)
ret = 1;
zpool_close(zhp);
@@ -919,6 +953,14 @@ max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max)
max = ret;
}
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
+ &child, &children) == 0) {
+ for (c = 0; c < children; c++)
+ if ((ret = max_width(zhp, child[c], depth + 2,
+ max)) > max)
+ max = ret;
+ }
+
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) == 0) {
for (c = 0; c < children; c++)
@@ -937,7 +979,8 @@ max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max)
* pool, printing out the name and status for each one.
*/
void
-print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
+print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth,
+ boolean_t print_logs)
{
nvlist_t **child;
uint_t c, children;
@@ -952,9 +995,10 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
(uint64_t **)&vs, &c) == 0);
(void) printf("\t%*s%-*s", depth, "", namewidth - depth, name);
+ (void) printf(" %s", zpool_state_to_name(vs->vs_state, vs->vs_aux));
if (vs->vs_aux != 0) {
- (void) printf(" %-8s ", state_to_name(vs));
+ (void) printf(" ");
switch (vs->vs_aux) {
case VDEV_AUX_OPEN_FAILED:
@@ -973,12 +1017,14 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
(void) printf(gettext("newer version"));
break;
+ case VDEV_AUX_ERR_EXCEEDED:
+ (void) printf(gettext("too many errors"));
+ break;
+
default:
(void) printf(gettext("corrupted data"));
break;
}
- } else {
- (void) printf(" %s", state_to_name(vs));
}
(void) printf("\n");
@@ -987,21 +1033,37 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
return;
for (c = 0; c < children; c++) {
+ uint64_t is_log = B_FALSE;
+
+ (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+ &is_log);
+ if ((is_log && !print_logs) || (!is_log && print_logs))
+ continue;
+
vname = zpool_vdev_name(g_zfs, NULL, child[c]);
print_import_config(vname, child[c],
- namewidth, depth + 2);
+ namewidth, depth + 2, B_FALSE);
free(vname);
}
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
- &child, &children) != 0)
- return;
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
+ &child, &children) == 0) {
+ (void) printf(gettext("\tcache\n"));
+ for (c = 0; c < children; c++) {
+ vname = zpool_vdev_name(g_zfs, NULL, child[c]);
+ (void) printf("\t %s\n", vname);
+ free(vname);
+ }
+ }
- (void) printf(gettext("\tspares\n"));
- for (c = 0; c < children; c++) {
- vname = zpool_vdev_name(g_zfs, NULL, child[c]);
- (void) printf("\t %s\n", vname);
- free(vname);
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+ &child, &children) == 0) {
+ (void) printf(gettext("\tspares\n"));
+ for (c = 0; c < children; c++) {
+ vname = zpool_vdev_name(g_zfs, NULL, child[c]);
+ (void) printf("\t %s\n", vname);
+ free(vname);
+ }
}
}
@@ -1033,7 +1095,7 @@ show_import(nvlist_t *config)
verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
(uint64_t **)&vs, &vsc) == 0);
- health = state_to_health(vs->vs_state);
+ health = zpool_state_to_name(vs->vs_state, vs->vs_aux);
reason = zpool_import_status(config, &msgid);
@@ -1086,6 +1148,18 @@ show_import(nvlist_t *config)
(void) printf(gettext("status: The pool was last accessed by "
"another system.\n"));
break;
+
+ case ZPOOL_STATUS_FAULTED_DEV_R:
+ case ZPOOL_STATUS_FAULTED_DEV_NR:
+ (void) printf(gettext("status: One or more devices are "
+ "faulted.\n"));
+ break;
+
+ case ZPOOL_STATUS_BAD_LOG:
+ (void) printf(gettext("status: An intent log record cannot be "
+ "read.\n"));
+ break;
+
default:
/*
* No other status can be seen when importing pools.
@@ -1147,7 +1221,7 @@ show_import(nvlist_t *config)
"but can be imported using the '-Df' flags.\n"));
else if (pool_state != POOL_STATE_EXPORTED)
(void) printf(gettext("\tThe pool may be active on "
- "on another system, but can be imported using\n\t"
+ "another system, but can be imported using\n\t"
"the '-f' flag.\n"));
}
@@ -1160,7 +1234,12 @@ show_import(nvlist_t *config)
namewidth = max_width(NULL, nvroot, 0, 0);
if (namewidth < 10)
namewidth = 10;
- print_import_config(name, nvroot, namewidth, 0);
+
+ print_import_config(name, nvroot, namewidth, 0, B_FALSE);
+ if (num_logs(nvroot) > 0) {
+ (void) printf(gettext("\tlogs\n"));
+ print_import_config(name, nvroot, namewidth, 0, B_TRUE);
+ }
if (reason == ZPOOL_STATUS_BAD_GUID_SUM) {
(void) printf(gettext("\n\tAdditional devices are known to "
@@ -1171,17 +1250,18 @@ show_import(nvlist_t *config)
/*
* Perform the import for the given configuration. This passes the heavy
- * lifting off to zpool_import(), and then mounts the datasets contained within
- * the pool.
+ * lifting off to zpool_import_props(), and then mounts the datasets contained
+ * within the pool.
*/
static int
do_import(nvlist_t *config, const char *newname, const char *mntopts,
- const char *altroot, int force, int argc, char **argv)
+ int force, nvlist_t *props, boolean_t allowfaulted)
{
zpool_handle_t *zhp;
char *name;
uint64_t state;
uint64_t version;
+ int error = 0;
verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
&name) == 0);
@@ -1190,7 +1270,7 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
ZPOOL_CONFIG_POOL_STATE, &state) == 0);
verify(nvlist_lookup_uint64(config,
ZPOOL_CONFIG_VERSION, &version) == 0);
- if (version > ZFS_VERSION) {
+ if (version > SPA_VERSION) {
(void) fprintf(stderr, gettext("cannot import '%s': pool "
"is formatted using a newer ZFS version\n"), name);
return (1);
@@ -1228,15 +1308,14 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
}
}
- if (zpool_import(g_zfs, config, newname, altroot) != 0)
+ if (zpool_import_props(g_zfs, config, newname, props,
+ allowfaulted) != 0)
return (1);
if (newname != NULL)
name = (char *)newname;
- zpool_log_history(g_zfs, argc, argv, name, B_TRUE, B_FALSE);
-
- verify((zhp = zpool_open(g_zfs, name)) != NULL);
+ verify((zhp = zpool_open_canfail(g_zfs, name)) != NULL);
if (zpool_enable_datasets(zhp, mntopts, 0) != 0) {
zpool_close(zhp);
@@ -1244,13 +1323,18 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
}
zpool_close(zhp);
- return (0);
+ return (error);
}
/*
* zpool import [-d dir] [-D]
- * import [-R root] [-D] [-d dir] [-f] -a
- * import [-R root] [-D] [-d dir] [-f] <pool | id> [newpool]
+ * import [-o mntopts] [-o prop=value] ... [-R root] [-D]
+ * [-d dir | -c cachefile] [-f] -a
+ * import [-o mntopts] [-o prop=value] ... [-R root] [-D]
+ * [-d dir | -c cachefile] [-f] <pool | id> [newpool]
+ *
+ * -c Read pool information from a cachefile instead of searching
+ * devices.
*
* -d Scan in a specific directory, other than /dev/dsk. More than
* one directory can be specified using multiple '-d' options.
@@ -1264,8 +1348,15 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
*
* -f Force import, even if it appears that the pool is active.
*
+ * -F Import even in the presence of faulted vdevs. This is an
+ * intentionally undocumented option for testing purposes, and
+ * treats the pool configuration as complete, leaving any bad
+ * vdevs in the FAULTED state.
+ *
* -a Import all pools found.
*
+ * -o Set property=value and/or temporary mount options (without '=').
+ *
* The import command scans for pools to import, and import pools based on pool
* name and GUID. The pool can also be renamed as part of the import process.
*/
@@ -1276,26 +1367,32 @@ zpool_do_import(int argc, char **argv)
int nsearch = 0;
int c;
int err;
- nvlist_t *pools;
+ nvlist_t *pools = NULL;
boolean_t do_all = B_FALSE;
boolean_t do_destroyed = B_FALSE;
- char *altroot = NULL;
char *mntopts = NULL;
boolean_t do_force = B_FALSE;
nvpair_t *elem;
nvlist_t *config;
- uint64_t searchguid;
- char *searchname;
+ uint64_t searchguid = 0;
+ char *searchname = NULL;
+ char *propval;
nvlist_t *found_config;
+ nvlist_t *props = NULL;
boolean_t first;
+ boolean_t allow_faulted = B_FALSE;
uint64_t pool_state;
+ char *cachefile = NULL;
/* check options */
- while ((c = getopt(argc, argv, ":Dfd:R:ao:")) != -1) {
+ while ((c = getopt(argc, argv, ":ac:d:DfFo:p:R:")) != -1) {
switch (c) {
case 'a':
do_all = B_TRUE;
break;
+ case 'c':
+ cachefile = optarg;
+ break;
case 'd':
if (searchdirs == NULL) {
searchdirs = safe_malloc(sizeof (char *));
@@ -1315,11 +1412,31 @@ zpool_do_import(int argc, char **argv)
case 'f':
do_force = B_TRUE;
break;
+ case 'F':
+ allow_faulted = B_TRUE;
+ break;
case 'o':
- mntopts = optarg;
+ if ((propval = strchr(optarg, '=')) != NULL) {
+ *propval = '\0';
+ propval++;
+ if (add_prop_list(optarg, propval,
+ &props, B_TRUE))
+ goto error;
+ } else {
+ mntopts = optarg;
+ }
break;
case 'R':
- altroot = optarg;
+ if (add_prop_list(zpool_prop_to_name(
+ ZPOOL_PROP_ALTROOT), optarg, &props, B_TRUE))
+ goto error;
+ if (nvlist_lookup_string(props,
+ zpool_prop_to_name(ZPOOL_PROP_CACHEFILE),
+ &propval) == 0)
+ break;
+ if (add_prop_list(zpool_prop_to_name(
+ ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE))
+ goto error;
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
@@ -1336,9 +1453,14 @@ zpool_do_import(int argc, char **argv)
argc -= optind;
argv += optind;
+ if (cachefile && nsearch != 0) {
+ (void) fprintf(stderr, gettext("-c is incompatible with -d\n"));
+ usage(B_FALSE);
+ }
+
if (searchdirs == NULL) {
searchdirs = safe_malloc(sizeof (char *));
- searchdirs[0] = "/dev";
+ searchdirs[0] = "/dev/dsk";
nsearch = 1;
}
@@ -1367,13 +1489,7 @@ zpool_do_import(int argc, char **argv)
}
}
- if ((pools = zpool_find_import(g_zfs, nsearch, searchdirs)) == NULL) {
- free(searchdirs);
- return (1);
- }
-
/*
- * We now have a list of all available pools in the given directories.
* Depending on the arguments given, we do one of the following:
*
* <none> Iterate through all pools and display information about
@@ -1393,11 +1509,38 @@ zpool_do_import(int argc, char **argv)
searchguid = strtoull(argv[0], &endptr, 10);
if (errno != 0 || *endptr != '\0')
searchname = argv[0];
- else
- searchname = NULL;
found_config = NULL;
}
+ if (cachefile) {
+ pools = zpool_find_import_cached(g_zfs, cachefile, searchname,
+ searchguid);
+ } else if (searchname != NULL) {
+ pools = zpool_find_import_byname(g_zfs, nsearch, searchdirs,
+ searchname);
+ } else {
+ /*
+ * It's OK to search by guid even if searchguid is 0.
+ */
+ pools = zpool_find_import_byguid(g_zfs, nsearch, searchdirs,
+ searchguid);
+ }
+
+ if (pools == NULL) {
+ if (argc != 0) {
+ (void) fprintf(stderr, gettext("cannot import '%s': "
+ "no such pool available\n"), argv[0]);
+ }
+ free(searchdirs);
+ return (1);
+ }
+
+ /*
+ * At this point we have a list of import candidate configs. Even if
+ * we were searching by pool name or guid, we still need to
+ * post-process the list to deal with pool state and possible
+ * duplicate names.
+ */
err = 0;
elem = NULL;
first = B_TRUE;
@@ -1420,8 +1563,7 @@ zpool_do_import(int argc, char **argv)
if (do_all)
err |= do_import(config, NULL, mntopts,
- altroot, do_force, argc + optind,
- argv - optind);
+ do_force, props, allow_faulted);
else
show_import(config);
} else if (searchname != NULL) {
@@ -1469,8 +1611,7 @@ zpool_do_import(int argc, char **argv)
err = B_TRUE;
} else {
err |= do_import(found_config, argc == 1 ? NULL :
- argv[1], mntopts, altroot, do_force, argc + optind,
- argv - optind);
+ argv[1], mntopts, do_force, props, allow_faulted);
}
}
@@ -1482,6 +1623,8 @@ zpool_do_import(int argc, char **argv)
(void) fprintf(stderr,
gettext("no pools available to import\n"));
+error:
+ nvlist_free(props);
nvlist_free(pools);
free(searchdirs);
@@ -1518,7 +1661,7 @@ print_iostat_header(iostat_cbdata_t *cb)
/*
* Display a single statistic.
*/
-void
+static void
print_one_stat(uint64_t value)
{
char buf[64];
@@ -1606,6 +1749,28 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
newchild[c], cb, depth + 2);
free(vname);
}
+
+ /*
+ * Include level 2 ARC devices in iostat output
+ */
+ if (nvlist_lookup_nvlist_array(newnv, ZPOOL_CONFIG_L2CACHE,
+ &newchild, &children) != 0)
+ return;
+
+ if (oldnv && nvlist_lookup_nvlist_array(oldnv, ZPOOL_CONFIG_L2CACHE,
+ &oldchild, &c) != 0)
+ return;
+
+ if (children > 0) {
+ (void) printf("%-*s - - - - - "
+ "-\n", cb->cb_namewidth, "cache");
+ for (c = 0; c < children; c++) {
+ vname = zpool_vdev_name(g_zfs, zhp, newchild[c]);
+ print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
+ newchild[c], cb, depth + 2);
+ free(vname);
+ }
+ }
}
static int
@@ -1870,160 +2035,122 @@ zpool_do_iostat(int argc, char **argv)
typedef struct list_cbdata {
boolean_t cb_scripted;
boolean_t cb_first;
- int cb_fields[MAX_FIELDS];
- int cb_fieldcount;
+ zprop_list_t *cb_proplist;
} list_cbdata_t;
/*
* Given a list of columns to display, output appropriate headers for each one.
*/
-void
-print_header(int *fields, size_t count)
+static void
+print_header(zprop_list_t *pl)
{
- int i;
- column_def_t *col;
- const char *fmt;
+ const char *header;
+ boolean_t first = B_TRUE;
+ boolean_t right_justify;
- for (i = 0; i < count; i++) {
- col = &column_table[fields[i]];
- if (i != 0)
+ for (; pl != NULL; pl = pl->pl_next) {
+ if (pl->pl_prop == ZPROP_INVAL)
+ continue;
+
+ if (!first)
(void) printf(" ");
- if (col->cd_justify == left_justify)
- fmt = "%-*s";
else
- fmt = "%*s";
+ first = B_FALSE;
+
+ header = zpool_prop_column_name(pl->pl_prop);
+ right_justify = zpool_prop_align_right(pl->pl_prop);
- (void) printf(fmt, i == count - 1 ? strlen(col->cd_title) :
- col->cd_width, col->cd_title);
+ if (pl->pl_next == NULL && !right_justify)
+ (void) printf("%s", header);
+ else if (right_justify)
+ (void) printf("%*s", pl->pl_width, header);
+ else
+ (void) printf("%-*s", pl->pl_width, header);
}
(void) printf("\n");
}
-int
-list_callback(zpool_handle_t *zhp, void *data)
+/*
+ * Given a pool and a list of properties, print out all the properties according
+ * to the described layout.
+ */
+static void
+print_pool(zpool_handle_t *zhp, zprop_list_t *pl, int scripted)
{
- list_cbdata_t *cbp = data;
- nvlist_t *config;
- int i;
- char buf[ZPOOL_MAXNAMELEN];
- uint64_t total;
- uint64_t used;
- const char *fmt;
- column_def_t *col;
-
- if (cbp->cb_first) {
- if (!cbp->cb_scripted)
- print_header(cbp->cb_fields, cbp->cb_fieldcount);
- cbp->cb_first = B_FALSE;
- }
-
- if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
- config = NULL;
- } else {
- config = zpool_get_config(zhp, NULL);
- total = zpool_get_space_total(zhp);
- used = zpool_get_space_used(zhp);
- }
-
- for (i = 0; i < cbp->cb_fieldcount; i++) {
- if (i != 0) {
- if (cbp->cb_scripted)
+ boolean_t first = B_TRUE;
+ char property[ZPOOL_MAXPROPLEN];
+ char *propstr;
+ boolean_t right_justify;
+ int width;
+
+ for (; pl != NULL; pl = pl->pl_next) {
+ if (!first) {
+ if (scripted)
(void) printf("\t");
else
(void) printf(" ");
+ } else {
+ first = B_FALSE;
}
- col = &column_table[cbp->cb_fields[i]];
-
- switch (cbp->cb_fields[i]) {
- case ZPOOL_FIELD_NAME:
- (void) strlcpy(buf, zpool_get_name(zhp), sizeof (buf));
- break;
-
- case ZPOOL_FIELD_SIZE:
- if (config == NULL)
- (void) strlcpy(buf, "-", sizeof (buf));
- else
- zfs_nicenum(total, buf, sizeof (buf));
- break;
-
- case ZPOOL_FIELD_USED:
- if (config == NULL)
- (void) strlcpy(buf, "-", sizeof (buf));
+ right_justify = B_FALSE;
+ if (pl->pl_prop != ZPROP_INVAL) {
+ if (zpool_get_prop(zhp, pl->pl_prop, property,
+ sizeof (property), NULL) != 0)
+ propstr = "-";
else
- zfs_nicenum(used, buf, sizeof (buf));
- break;
+ propstr = property;
- case ZPOOL_FIELD_AVAILABLE:
- if (config == NULL)
- (void) strlcpy(buf, "-", sizeof (buf));
- else
- zfs_nicenum(total - used, buf, sizeof (buf));
- break;
+ right_justify = zpool_prop_align_right(pl->pl_prop);
+ } else {
+ propstr = "-";
+ }
- case ZPOOL_FIELD_CAPACITY:
- if (config == NULL) {
- (void) strlcpy(buf, "-", sizeof (buf));
- } else {
- uint64_t capacity = (total == 0 ? 0 :
- (used * 100 / total));
- (void) snprintf(buf, sizeof (buf), "%llu%%",
- (u_longlong_t)capacity);
- }
- break;
+ width = pl->pl_width;
- case ZPOOL_FIELD_HEALTH:
- if (config == NULL) {
- (void) strlcpy(buf, "FAULTED", sizeof (buf));
- } else {
- nvlist_t *nvroot;
- vdev_stat_t *vs;
- uint_t vsc;
-
- verify(nvlist_lookup_nvlist(config,
- ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
- verify(nvlist_lookup_uint64_array(nvroot,
- ZPOOL_CONFIG_STATS, (uint64_t **)&vs,
- &vsc) == 0);
- (void) strlcpy(buf, state_to_name(vs),
- sizeof (buf));
- }
- break;
+ /*
+ * If this is being called in scripted mode, or if this is the
+ * last column and it is left-justified, don't include a width
+ * format specifier.
+ */
+ if (scripted || (pl->pl_next == NULL && !right_justify))
+ (void) printf("%s", propstr);
+ else if (right_justify)
+ (void) printf("%*s", width, propstr);
+ else
+ (void) printf("%-*s", width, propstr);
+ }
- case ZPOOL_FIELD_ROOT:
- if (config == NULL)
- (void) strlcpy(buf, "-", sizeof (buf));
- else if (zpool_get_root(zhp, buf, sizeof (buf)) != 0)
- (void) strlcpy(buf, "-", sizeof (buf));
- break;
- }
+ (void) printf("\n");
+}
- if (cbp->cb_scripted)
- (void) printf("%s", buf);
- else {
- if (col->cd_justify == left_justify)
- fmt = "%-*s";
- else
- fmt = "%*s";
+/*
+ * Generic callback function to list a pool.
+ */
+int
+list_callback(zpool_handle_t *zhp, void *data)
+{
+ list_cbdata_t *cbp = data;
- (void) printf(fmt, i == cbp->cb_fieldcount - 1 ?
- strlen(buf) : col->cd_width, buf);
- }
+ if (cbp->cb_first) {
+ if (!cbp->cb_scripted)
+ print_header(cbp->cb_proplist);
+ cbp->cb_first = B_FALSE;
}
- (void) printf("\n");
+ print_pool(zhp, cbp->cb_proplist, cbp->cb_scripted);
return (0);
}
/*
- * zpool list [-H] [-o field[,field]*] [pool] ...
+ * zpool list [-H] [-o prop[,prop]*] [pool] ...
*
- * -H Scripted mode. Don't display headers, and separate fields by
- * a single tab.
- * -o List of fields to display. Defaults to all fields, or
- * "name,size,used,available,capacity,health,root"
+ * -H Scripted mode. Don't display headers, and separate properties
+ * by a single tab.
+ * -o List of properties to display. Defaults to
+ * "name,size,used,available,capacity,health,altroot"
*
* List all pools in the system, whether or not they're healthy. Output space
* statistics for each one, as well as health status summary.
@@ -2034,10 +2161,9 @@ zpool_do_list(int argc, char **argv)
int c;
int ret;
list_cbdata_t cb = { 0 };
- static char default_fields[] =
- "name,size,used,available,capacity,health,root";
- char *fields = default_fields;
- char *value;
+ static char default_props[] =
+ "name,size,used,available,capacity,health,altroot";
+ char *props = default_props;
/* check options */
while ((c = getopt(argc, argv, ":Ho:")) != -1) {
@@ -2046,7 +2172,7 @@ zpool_do_list(int argc, char **argv)
cb.cb_scripted = B_TRUE;
break;
case 'o':
- fields = optarg;
+ props = optarg;
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
@@ -2063,29 +2189,17 @@ zpool_do_list(int argc, char **argv)
argc -= optind;
argv += optind;
- while (*fields != '\0') {
- if (cb.cb_fieldcount == MAX_FIELDS) {
- (void) fprintf(stderr, gettext("too many "
- "properties given to -o option\n"));
- usage(B_FALSE);
- }
-
- if ((cb.cb_fields[cb.cb_fieldcount] = getsubopt(&fields,
- column_subopts, &value)) == -1) {
- (void) fprintf(stderr, gettext("invalid property "
- "'%s'\n"), value);
- usage(B_FALSE);
- }
-
- cb.cb_fieldcount++;
- }
-
+ if (zprop_get_list(g_zfs, props, &cb.cb_proplist, ZFS_TYPE_POOL) != 0)
+ usage(B_FALSE);
cb.cb_first = B_TRUE;
- ret = for_each_pool(argc, argv, B_TRUE, NULL, list_callback, &cb);
+ ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist,
+ list_callback, &cb);
- if (argc == 0 && cb.cb_first) {
+ zprop_free_list(cb.cb_proplist);
+
+ if (argc == 0 && cb.cb_first && !cb.cb_scripted) {
(void) printf(gettext("no pools available\n"));
return (0);
}
@@ -2128,10 +2242,7 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
nvlist_t *nvroot;
char *poolname, *old_disk, *new_disk;
zpool_handle_t *zhp;
- nvlist_t *config;
int ret;
- int log_argc;
- char **log_argv;
/* check options */
while ((c = getopt(argc, argv, "f")) != -1) {
@@ -2146,8 +2257,6 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
}
}
- log_argc = argc;
- log_argv = argv;
argc -= optind;
argv += optind;
@@ -2190,14 +2299,15 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
return (1);
- if ((config = zpool_get_config(zhp, NULL)) == NULL) {
+ if (zpool_get_config(zhp, NULL) == NULL) {
(void) fprintf(stderr, gettext("pool '%s' is unavailable\n"),
poolname);
zpool_close(zhp);
return (1);
}
- nvroot = make_root_vdev(config, force, B_FALSE, replacing, argc, argv);
+ nvroot = make_root_vdev(zhp, force, B_FALSE, replacing, B_FALSE,
+ argc, argv);
if (nvroot == NULL) {
zpool_close(zhp);
return (1);
@@ -2205,11 +2315,6 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
ret = zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing);
- if (!ret) {
- zpool_log_history(g_zfs, log_argc, log_argv, poolname, B_TRUE,
- B_FALSE);
- }
-
nvlist_free(nvroot);
zpool_close(zhp);
@@ -2299,10 +2404,6 @@ zpool_do_detach(int argc, char **argv)
ret = zpool_vdev_detach(zhp, path);
- if (!ret) {
- zpool_log_history(g_zfs, argc + optind, argv - optind, poolname,
- B_TRUE, B_FALSE);
- }
zpool_close(zhp);
return (ret);
@@ -2311,7 +2412,6 @@ zpool_do_detach(int argc, char **argv)
/*
* zpool online <pool> <device> ...
*/
-/* ARGSUSED */
int
zpool_do_online(int argc, char **argv)
{
@@ -2319,6 +2419,7 @@ zpool_do_online(int argc, char **argv)
char *poolname;
zpool_handle_t *zhp;
int ret = 0;
+ vdev_state_t newstate;
/* check options */
while ((c = getopt(argc, argv, "t")) != -1) {
@@ -2349,17 +2450,26 @@ zpool_do_online(int argc, char **argv)
if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
return (1);
- for (i = 1; i < argc; i++)
- if (zpool_vdev_online(zhp, argv[i]) == 0)
- (void) printf(gettext("Bringing device %s online\n"),
- argv[i]);
- else
+ for (i = 1; i < argc; i++) {
+ if (zpool_vdev_online(zhp, argv[i], 0, &newstate) == 0) {
+ if (newstate != VDEV_STATE_HEALTHY) {
+ (void) printf(gettext("warning: device '%s' "
+ "onlined, but remains in faulted state\n"),
+ argv[i]);
+ if (newstate == VDEV_STATE_FAULTED)
+ (void) printf(gettext("use 'zpool "
+ "clear' to restore a faulted "
+ "device\n"));
+ else
+ (void) printf(gettext("use 'zpool "
+ "replace' to replace devices "
+ "that are no longer present\n"));
+ }
+ } else {
ret = 1;
-
- if (!ret) {
- zpool_log_history(g_zfs, argc + optind, argv - optind, poolname,
- B_TRUE, B_FALSE);
+ }
}
+
zpool_close(zhp);
return (ret);
@@ -2417,17 +2527,11 @@ zpool_do_offline(int argc, char **argv)
if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
return (1);
- for (i = 1; i < argc; i++)
- if (zpool_vdev_offline(zhp, argv[i], istmp) == 0)
- (void) printf(gettext("Bringing device %s offline\n"),
- argv[i]);
- else
+ for (i = 1; i < argc; i++) {
+ if (zpool_vdev_offline(zhp, argv[i], istmp) != 0)
ret = 1;
-
- if (!ret) {
- zpool_log_history(g_zfs, argc + optind, argv - optind, poolname,
- B_TRUE, B_FALSE);
}
+
zpool_close(zhp);
return (ret);
@@ -2458,14 +2562,12 @@ zpool_do_clear(int argc, char **argv)
pool = argv[1];
device = argc == 3 ? argv[2] : NULL;
- if ((zhp = zpool_open(g_zfs, pool)) == NULL)
+ if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL)
return (1);
if (zpool_clear(zhp, device) != 0)
ret = 1;
- if (!ret)
- zpool_log_history(g_zfs, argc, argv, pool, B_TRUE, B_FALSE);
zpool_close(zhp);
return (ret);
@@ -2494,11 +2596,6 @@ scrub_callback(zpool_handle_t *zhp, void *data)
err = zpool_scrub(zhp, cb->cb_type);
- if (!err) {
- zpool_log_history(g_zfs, cb->cb_argc, cb->cb_argv,
- zpool_get_name(zhp), B_TRUE, B_FALSE);
- }
-
return (err != 0);
}
@@ -2559,7 +2656,7 @@ print_scrub_status(nvlist_t *nvroot)
uint_t vsc;
time_t start, end, now;
double fraction_done;
- uint64_t examined, total, minutes_left;
+ uint64_t examined, total, minutes_left, minutes_taken;
char *scrub_type;
verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
@@ -2583,8 +2680,13 @@ print_scrub_status(nvlist_t *nvroot)
total = vs->vs_alloc;
if (end != 0) {
- (void) printf(gettext("%s %s with %llu errors on %s"),
+ minutes_taken = (uint64_t)((end - start) / 60);
+
+ (void) printf(gettext("%s %s after %lluh%um with %llu errors "
+ "on %s"),
scrub_type, vs->vs_scrub_complete ? "completed" : "stopped",
+ (u_longlong_t)(minutes_taken / 60),
+ (uint_t)(minutes_taken % 60),
(u_longlong_t)vs->vs_scrub_errors, ctime(&end));
return;
}
@@ -2597,9 +2699,12 @@ print_scrub_status(nvlist_t *nvroot)
fraction_done = (double)examined / total;
minutes_left = (uint64_t)((now - start) *
(1 - fraction_done) / fraction_done / 60);
+ minutes_taken = (uint64_t)((now - start) / 60);
- (void) printf(gettext("%s in progress, %.2f%% done, %lluh%um to go\n"),
- scrub_type, 100 * fraction_done,
+ (void) printf(gettext("%s in progress for %lluh%um, %.2f%% done, "
+ "%lluh%um to go\n"),
+ scrub_type, (u_longlong_t)(minutes_taken / 60),
+ (uint_t)(minutes_taken % 60), 100 * fraction_done,
(u_longlong_t)(minutes_left / 60), (uint_t)(minutes_left % 60));
}
@@ -2653,7 +2758,7 @@ find_spare(zpool_handle_t *zhp, void *data)
*/
void
print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
- int namewidth, int depth, boolean_t isspare)
+ int namewidth, int depth, boolean_t isspare, boolean_t print_logs)
{
nvlist_t **child;
uint_t c, children;
@@ -2662,7 +2767,7 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
char *vname;
uint64_t notpresent;
spare_cbdata_t cb;
- const char *state;
+ char *state;
verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
(uint64_t **)&vs, &c) == 0);
@@ -2671,7 +2776,7 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
&child, &children) != 0)
children = 0;
- state = state_to_name(vs);
+ state = zpool_state_to_name(vs->vs_state, vs->vs_aux);
if (isspare) {
/*
* For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for
@@ -2736,6 +2841,18 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
}
break;
+ case VDEV_AUX_ERR_EXCEEDED:
+ (void) printf(gettext("too many errors"));
+ break;
+
+ case VDEV_AUX_IO_FAILURE:
+ (void) printf(gettext("experienced I/O failures"));
+ break;
+
+ case VDEV_AUX_BAD_LOG:
+ (void) printf(gettext("bad intent log"));
+ break;
+
default:
(void) printf(gettext("corrupted data"));
break;
@@ -2753,9 +2870,15 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
(void) printf("\n");
for (c = 0; c < children; c++) {
+ uint64_t is_log = B_FALSE;
+
+ (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+ &is_log);
+ if ((is_log && !print_logs) || (!is_log && print_logs))
+ continue;
vname = zpool_vdev_name(g_zfs, zhp, child[c]);
print_status_config(zhp, vname, child[c],
- namewidth, depth + 2, isspare);
+ namewidth, depth + 2, isspare, B_FALSE);
free(vname);
}
}
@@ -2763,7 +2886,7 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
static void
print_error_log(zpool_handle_t *zhp)
{
- nvlist_t *nverrlist;
+ nvlist_t *nverrlist = NULL;
nvpair_t *elem;
char *pathname;
size_t len = MAXPATHLEN * 2;
@@ -2810,7 +2933,27 @@ print_spares(zpool_handle_t *zhp, nvlist_t **spares, uint_t nspares,
for (i = 0; i < nspares; i++) {
name = zpool_vdev_name(g_zfs, zhp, spares[i]);
print_status_config(zhp, name, spares[i],
- namewidth, 2, B_TRUE);
+ namewidth, 2, B_TRUE, B_FALSE);
+ free(name);
+ }
+}
+
+static void
+print_l2cache(zpool_handle_t *zhp, nvlist_t **l2cache, uint_t nl2cache,
+ int namewidth)
+{
+ uint_t i;
+ char *name;
+
+ if (nl2cache == 0)
+ return;
+
+ (void) printf(gettext("\tcache\n"));
+
+ for (i = 0; i < nl2cache; i++) {
+ name = zpool_vdev_name(g_zfs, zhp, l2cache[i]);
+ print_status_config(zhp, name, l2cache[i],
+ namewidth, 2, B_FALSE, B_FALSE);
free(name);
}
}
@@ -2869,7 +3012,7 @@ status_callback(zpool_handle_t *zhp, void *data)
&nvroot) == 0);
verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
(uint64_t **)&vs, &c) == 0);
- health = state_to_name(vs);
+ health = zpool_state_to_name(vs->vs_state, vs->vs_aux);
(void) printf(gettext(" pool: %s\n"), zpool_get_name(zhp));
(void) printf(gettext(" state: %s\n"), health);
@@ -2972,6 +3115,45 @@ status_callback(zpool_handle_t *zhp, void *data)
"backup.\n"));
break;
+ case ZPOOL_STATUS_FAULTED_DEV_R:
+ (void) printf(gettext("status: One or more devices are "
+ "faulted in response to persistent errors.\n\tSufficient "
+ "replicas exist for the pool to continue functioning "
+ "in a\n\tdegraded state.\n"));
+ (void) printf(gettext("action: Replace the faulted device, "
+ "or use 'zpool clear' to mark the device\n\trepaired.\n"));
+ break;
+
+ case ZPOOL_STATUS_FAULTED_DEV_NR:
+ (void) printf(gettext("status: One or more devices are "
+ "faulted in response to persistent errors. There are "
+ "insufficient replicas for the pool to\n\tcontinue "
+ "functioning.\n"));
+ (void) printf(gettext("action: Destroy and re-create the pool "
+ "from a backup source. Manually marking the device\n"
+ "\trepaired using 'zpool clear' may allow some data "
+ "to be recovered.\n"));
+ break;
+
+ case ZPOOL_STATUS_IO_FAILURE_WAIT:
+ case ZPOOL_STATUS_IO_FAILURE_CONTINUE:
+ (void) printf(gettext("status: One or more devices are "
+ "faulted in response to IO failures.\n"));
+ (void) printf(gettext("action: Make sure the affected devices "
+ "are connected, then run 'zpool clear'.\n"));
+ break;
+
+ case ZPOOL_STATUS_BAD_LOG:
+ (void) printf(gettext("status: An intent log record "
+ "could not be read.\n"
+ "\tWaiting for adminstrator intervention to fix the "
+ "faulted pool.\n"));
+ (void) printf(gettext("action: Either restore the affected "
+ "device(s) and run 'zpool online',\n"
+ "\tor ignore the intent log records by running "
+ "'zpool clear'.\n"));
+ break;
+
default:
/*
* The remaining errors can't actually be generated, yet.
@@ -2986,8 +3168,8 @@ status_callback(zpool_handle_t *zhp, void *data)
if (config != NULL) {
int namewidth;
uint64_t nerr;
- nvlist_t **spares;
- uint_t nspares;
+ nvlist_t **spares, **l2cache;
+ uint_t nspares, nl2cache;
(void) printf(gettext(" scrub: "));
@@ -3001,7 +3183,14 @@ status_callback(zpool_handle_t *zhp, void *data)
(void) printf(gettext("\t%-*s %-8s %5s %5s %5s\n"), namewidth,
"NAME", "STATE", "READ", "WRITE", "CKSUM");
print_status_config(zhp, zpool_get_name(zhp), nvroot,
- namewidth, 0, B_FALSE);
+ namewidth, 0, B_FALSE, B_FALSE);
+ if (num_logs(nvroot) > 0)
+ print_status_config(zhp, "logs", nvroot, namewidth, 0,
+ B_FALSE, B_TRUE);
+
+ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+ &l2cache, &nl2cache) == 0)
+ print_l2cache(zhp, l2cache, nl2cache, namewidth);
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
&spares, &nspares) == 0)
@@ -3016,7 +3205,7 @@ status_callback(zpool_handle_t *zhp, void *data)
* precise count by fetching the entire log and
* uniquifying the results.
*/
- if (nerr < 100 && !cbp->cb_verbose &&
+ if (nerr > 0 && nerr < 100 && !cbp->cb_verbose &&
zpool_get_errlog(zhp, &nverrlist) == 0) {
nvpair_t *elem;
@@ -3103,6 +3292,7 @@ typedef struct upgrade_cbdata {
int cb_first;
int cb_newer;
int cb_argc;
+ uint64_t cb_version;
char **cb_argv;
} upgrade_cbdata_t;
@@ -3118,7 +3308,7 @@ upgrade_cb(zpool_handle_t *zhp, void *arg)
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
&version) == 0);
- if (!cbp->cb_newer && version < ZFS_VERSION) {
+ if (!cbp->cb_newer && version < SPA_VERSION) {
if (!cbp->cb_all) {
if (cbp->cb_first) {
(void) printf(gettext("The following pools are "
@@ -3135,16 +3325,13 @@ upgrade_cb(zpool_handle_t *zhp, void *arg)
zpool_get_name(zhp));
} else {
cbp->cb_first = B_FALSE;
- ret = zpool_upgrade(zhp);
+ ret = zpool_upgrade(zhp, cbp->cb_version);
if (!ret) {
- zpool_log_history(g_zfs, cbp->cb_argc,
- cbp->cb_argv, zpool_get_name(zhp), B_TRUE,
- B_FALSE);
(void) printf(gettext("Successfully upgraded "
- "'%s'\n"), zpool_get_name(zhp));
+ "'%s'\n\n"), zpool_get_name(zhp));
}
}
- } else if (cbp->cb_newer && version > ZFS_VERSION) {
+ } else if (cbp->cb_newer && version > SPA_VERSION) {
assert(!cbp->cb_all);
if (cbp->cb_first) {
@@ -3168,29 +3355,37 @@ upgrade_cb(zpool_handle_t *zhp, void *arg)
static int
upgrade_one(zpool_handle_t *zhp, void *data)
{
- nvlist_t *config;
- uint64_t version;
- int ret;
upgrade_cbdata_t *cbp = data;
+ uint64_t cur_version;
+ int ret;
- config = zpool_get_config(zhp, NULL);
- verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
- &version) == 0);
+ if (strcmp("log", zpool_get_name(zhp)) == 0) {
+ (void) printf(gettext("'log' is now a reserved word\n"
+ "Pool 'log' must be renamed using export and import"
+ " to upgrade.\n"));
+ return (1);
+ }
- if (version == ZFS_VERSION) {
+ cur_version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
+ if (cur_version > cbp->cb_version) {
+ (void) printf(gettext("Pool '%s' is already formatted "
+ "using more current version '%llu'.\n"),
+ zpool_get_name(zhp), cur_version);
+ return (0);
+ }
+ if (cur_version == cbp->cb_version) {
(void) printf(gettext("Pool '%s' is already formatted "
"using the current version.\n"), zpool_get_name(zhp));
return (0);
}
- ret = zpool_upgrade(zhp);
+ ret = zpool_upgrade(zhp, cbp->cb_version);
if (!ret) {
- zpool_log_history(g_zfs, cbp->cb_argc, cbp->cb_argv,
- zpool_get_name(zhp), B_TRUE, B_FALSE);
(void) printf(gettext("Successfully upgraded '%s' "
- "from version %llu to version %llu\n"), zpool_get_name(zhp),
- (u_longlong_t)version, (u_longlong_t)ZFS_VERSION);
+ "from version %llu to version %llu\n\n"),
+ zpool_get_name(zhp), (u_longlong_t)cur_version,
+ (u_longlong_t)cbp->cb_version);
}
return (ret != 0);
@@ -3199,7 +3394,7 @@ upgrade_one(zpool_handle_t *zhp, void *data)
/*
* zpool upgrade
* zpool upgrade -v
- * zpool upgrade <-a | pool>
+ * zpool upgrade [-V version] <-a | pool ...>
*
* With no arguments, display downrev'd ZFS pool available for upgrade.
* Individual pools can be upgraded by specifying the pool, and '-a' will
@@ -3212,9 +3407,11 @@ zpool_do_upgrade(int argc, char **argv)
upgrade_cbdata_t cb = { 0 };
int ret = 0;
boolean_t showversions = B_FALSE;
+ char *end;
+
/* check options */
- while ((c = getopt(argc, argv, "av")) != -1) {
+ while ((c = getopt(argc, argv, "avV:")) != -1) {
switch (c) {
case 'a':
cb.cb_all = B_TRUE;
@@ -3222,6 +3419,15 @@ zpool_do_upgrade(int argc, char **argv)
case 'v':
showversions = B_TRUE;
break;
+ case 'V':
+ cb.cb_version = strtoll(optarg, &end, 10);
+ if (*end != '\0' || cb.cb_version > SPA_VERSION ||
+ cb.cb_version < SPA_VERSION_1) {
+ (void) fprintf(stderr,
+ gettext("invalid version '%s'\n"), optarg);
+ usage(B_FALSE);
+ }
+ break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
@@ -3234,6 +3440,14 @@ zpool_do_upgrade(int argc, char **argv)
argc -= optind;
argv += optind;
+ if (cb.cb_version == 0) {
+ cb.cb_version = SPA_VERSION;
+ } else if (!cb.cb_all && argc == 0) {
+ (void) fprintf(stderr, gettext("-V option is "
+ "incompatible with other arguments\n"));
+ usage(B_FALSE);
+ }
+
if (showversions) {
if (cb.cb_all || argc != 0) {
(void) fprintf(stderr, gettext("-v option is "
@@ -3242,14 +3456,14 @@ zpool_do_upgrade(int argc, char **argv)
}
} else if (cb.cb_all) {
if (argc != 0) {
- (void) fprintf(stderr, gettext("-a option is "
- "incompatible with other arguments\n"));
+ (void) fprintf(stderr, gettext("-a option should not "
+ "be used along with a pool name\n"));
usage(B_FALSE);
}
}
- (void) printf(gettext("This system is currently running ZFS version "
- "%llu.\n\n"), ZFS_VERSION);
+ (void) printf(gettext("This system is currently running "
+ "ZFS pool version %llu.\n\n"), SPA_VERSION);
cb.cb_first = B_TRUE;
if (showversions) {
(void) printf(gettext("The following versions are "
@@ -3265,8 +3479,16 @@ zpool_do_upgrade(int argc, char **argv)
(void) printf(gettext(" 4 zpool history\n"));
(void) printf(gettext(" 5 Compression using the gzip "
"algorithm\n"));
- (void) printf(gettext(" 6 bootfs pool property "));
- (void) printf(gettext("\nFor more information on a particular "
+ (void) printf(gettext(" 6 bootfs pool property\n"));
+ (void) printf(gettext(" 7 Separate intent log devices\n"));
+ (void) printf(gettext(" 8 Delegated administration\n"));
+ (void) printf(gettext(" 9 refquota and refreservation "
+ "properties\n"));
+ (void) printf(gettext(" 10 Cache devices\n"));
+ (void) printf(gettext(" 11 Improved scrub performance\n"));
+ (void) printf(gettext(" 12 Snapshot properties\n"));
+ (void) printf(gettext(" 13 snapused property\n"));
+ (void) printf(gettext("For more information on a particular "
"version, including supported releases, see:\n\n"));
(void) printf("http://www.opensolaris.org/os/community/zfs/"
"version/N\n\n");
@@ -3306,6 +3528,53 @@ zpool_do_upgrade(int argc, char **argv)
return (ret);
}
+typedef struct hist_cbdata {
+ boolean_t first;
+ int longfmt;
+ int internal;
+} hist_cbdata_t;
+
+char *hist_event_table[LOG_END] = {
+ "invalid event",
+ "pool create",
+ "vdev add",
+ "pool remove",
+ "pool destroy",
+ "pool export",
+ "pool import",
+ "vdev attach",
+ "vdev replace",
+ "vdev detach",
+ "vdev online",
+ "vdev offline",
+ "vdev upgrade",
+ "pool clear",
+ "pool scrub",
+ "pool property set",
+ "create",
+ "clone",
+ "destroy",
+ "destroy_begin_sync",
+ "inherit",
+ "property set",
+ "quota set",
+ "permission update",
+ "permission remove",
+ "permission who remove",
+ "promote",
+ "receive",
+ "rename",
+ "reservation set",
+ "replay_inc_sync",
+ "replay_full_sync",
+ "rollback",
+ "snapshot",
+ "filesystem version upgrade",
+ "refquota set",
+ "refreservation set",
+ "pool scrub done",
+};
+
/*
* Print out the command history for a specific pool.
*/
@@ -3316,13 +3585,22 @@ get_history_one(zpool_handle_t *zhp, void *data)
nvlist_t **records;
uint_t numrecords;
char *cmdstr;
+ char *pathstr;
uint64_t dst_time;
time_t tsec;
struct tm t;
char tbuf[30];
int ret, i;
+ uint64_t who;
+ struct passwd *pwd;
+ char *hostname;
+ char *zonename;
+ char internalstr[MAXPATHLEN];
+ hist_cbdata_t *cb = (hist_cbdata_t *)data;
+ uint64_t txg;
+ uint64_t ievent;
- *(boolean_t *)data = B_FALSE;
+ cb->first = B_FALSE;
(void) printf(gettext("History for '%s':\n"), zpool_get_name(zhp));
@@ -3333,14 +3611,65 @@ get_history_one(zpool_handle_t *zhp, void *data)
&records, &numrecords) == 0);
for (i = 0; i < numrecords; i++) {
if (nvlist_lookup_uint64(records[i], ZPOOL_HIST_TIME,
- &dst_time) == 0) {
- verify(nvlist_lookup_string(records[i], ZPOOL_HIST_CMD,
- &cmdstr) == 0);
- tsec = dst_time;
- (void) localtime_r(&tsec, &t);
- (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
- (void) printf("%s %s\n", tbuf, cmdstr);
+ &dst_time) != 0)
+ continue;
+
+ /* is it an internal event or a standard event? */
+ if (nvlist_lookup_string(records[i], ZPOOL_HIST_CMD,
+ &cmdstr) != 0) {
+ if (cb->internal == 0)
+ continue;
+
+ if (nvlist_lookup_uint64(records[i],
+ ZPOOL_HIST_INT_EVENT, &ievent) != 0)
+ continue;
+ verify(nvlist_lookup_uint64(records[i],
+ ZPOOL_HIST_TXG, &txg) == 0);
+ verify(nvlist_lookup_string(records[i],
+ ZPOOL_HIST_INT_STR, &pathstr) == 0);
+ if (ievent >= LOG_END)
+ continue;
+ (void) snprintf(internalstr,
+ sizeof (internalstr),
+ "[internal %s txg:%lld] %s",
+ hist_event_table[ievent], txg,
+ pathstr);
+ cmdstr = internalstr;
+ }
+ tsec = dst_time;
+ (void) localtime_r(&tsec, &t);
+ (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
+ (void) printf("%s %s", tbuf, cmdstr);
+
+ if (!cb->longfmt) {
+ (void) printf("\n");
+ continue;
}
+ (void) printf(" [");
+ if (nvlist_lookup_uint64(records[i],
+ ZPOOL_HIST_WHO, &who) == 0) {
+ pwd = getpwuid((uid_t)who);
+ if (pwd)
+ (void) printf("user %s on",
+ pwd->pw_name);
+ else
+ (void) printf("user %d on",
+ (int)who);
+ } else {
+ (void) printf(gettext("no info]\n"));
+ continue;
+ }
+ if (nvlist_lookup_string(records[i],
+ ZPOOL_HIST_HOST, &hostname) == 0) {
+ (void) printf(" %s", hostname);
+ }
+ if (nvlist_lookup_string(records[i],
+ ZPOOL_HIST_ZONE, &zonename) == 0) {
+ (void) printf(":%s", zonename);
+ }
+
+ (void) printf("]");
+ (void) printf("\n");
}
(void) printf("\n");
nvlist_free(nvhis);
@@ -3353,19 +3682,38 @@ get_history_one(zpool_handle_t *zhp, void *data)
*
* Displays the history of commands that modified pools.
*/
+
+
int
zpool_do_history(int argc, char **argv)
{
- boolean_t first = B_TRUE;
+ hist_cbdata_t cbdata = { 0 };
int ret;
+ int c;
+ cbdata.first = B_TRUE;
+ /* check options */
+ while ((c = getopt(argc, argv, "li")) != -1) {
+ switch (c) {
+ case 'l':
+ cbdata.longfmt = 1;
+ break;
+ case 'i':
+ cbdata.internal = 1;
+ break;
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
argc -= optind;
argv += optind;
ret = for_each_pool(argc, argv, B_FALSE, NULL, get_history_one,
- &first);
+ &cbdata);
- if (argc == 0 && first == B_TRUE) {
+ if (argc == 0 && cbdata.first == B_TRUE) {
(void) printf(gettext("no pools available\n"));
return (0);
}
@@ -3376,17 +3724,18 @@ zpool_do_history(int argc, char **argv)
static int
get_callback(zpool_handle_t *zhp, void *data)
{
- libzfs_get_cbdata_t *cbp = (libzfs_get_cbdata_t *)data;
+ zprop_get_cbdata_t *cbp = (zprop_get_cbdata_t *)data;
char value[MAXNAMELEN];
- zfs_source_t srctype;
- zpool_proplist_t *pl;
+ zprop_source_t srctype;
+ zprop_list_t *pl;
for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) {
/*
- * Skip the special fake placeholder.
+ * Skip the special fake placeholder. This will also skip
+ * over the name property when 'all' is specified.
*/
- if (pl->pl_prop == ZFS_PROP_NAME &&
+ if (pl->pl_prop == ZPOOL_PROP_NAME &&
pl == cbp->cb_proplist)
continue;
@@ -3394,7 +3743,7 @@ get_callback(zpool_handle_t *zhp, void *data)
value, sizeof (value), &srctype) != 0)
continue;
- libzfs_print_one_property(zpool_get_name(zhp), cbp,
+ zprop_print_one_property(zpool_get_name(zhp), cbp,
zpool_prop_to_name(pl->pl_prop), value, srctype, NULL);
}
return (0);
@@ -3403,25 +3752,27 @@ get_callback(zpool_handle_t *zhp, void *data)
int
zpool_do_get(int argc, char **argv)
{
- libzfs_get_cbdata_t cb = { 0 };
- zpool_proplist_t fake_name = { 0 };
+ zprop_get_cbdata_t cb = { 0 };
+ zprop_list_t fake_name = { 0 };
int ret;
if (argc < 3)
usage(B_FALSE);
cb.cb_first = B_TRUE;
- cb.cb_sources = ZFS_SRC_ALL;
+ cb.cb_sources = ZPROP_SRC_ALL;
cb.cb_columns[0] = GET_COL_NAME;
cb.cb_columns[1] = GET_COL_PROPERTY;
cb.cb_columns[2] = GET_COL_VALUE;
cb.cb_columns[3] = GET_COL_SOURCE;
+ cb.cb_type = ZFS_TYPE_POOL;
- if (zpool_get_proplist(g_zfs, argv[1], &cb.cb_proplist) != 0)
+ if (zprop_get_list(g_zfs, argv[1], &cb.cb_proplist,
+ ZFS_TYPE_POOL) != 0)
usage(B_FALSE);
if (cb.cb_proplist != NULL) {
- fake_name.pl_prop = ZFS_PROP_NAME;
+ fake_name.pl_prop = ZPOOL_PROP_NAME;
fake_name.pl_width = strlen(gettext("NAME"));
fake_name.pl_next = cb.cb_proplist;
cb.cb_proplist = &fake_name;
@@ -3431,9 +3782,9 @@ zpool_do_get(int argc, char **argv)
get_callback, &cb);
if (cb.cb_proplist == &fake_name)
- zfs_free_proplist(fake_name.pl_next);
+ zprop_free_list(fake_name.pl_next);
else
- zfs_free_proplist(cb.cb_proplist);
+ zprop_free_list(cb.cb_proplist);
return (ret);
}
@@ -3500,11 +3851,6 @@ zpool_do_set(int argc, char **argv)
error = for_each_pool(argc - 2, argv + 2, B_TRUE, NULL,
set_callback, &cb);
- if (cb.cb_any_successful) {
- *(cb.cb_value - 1) = '=';
- zpool_log_history(g_zfs, argc, argv, argv[2], B_FALSE, B_FALSE);
- }
-
return (error);
}
@@ -3531,7 +3877,6 @@ main(int argc, char **argv)
int ret;
int i;
char *cmdname;
- int found = 0;
(void) setlocale(LC_ALL, "");
(void) textdomain(TEXT_DOMAIN);
@@ -3562,26 +3907,29 @@ main(int argc, char **argv)
if (strcmp(cmdname, "-?") == 0)
usage(B_TRUE);
+ zpool_set_history_str("zpool", argc, argv, history_str);
+ verify(zpool_stage_history(g_zfs, history_str) == 0);
+
/*
* Run the appropriate command.
*/
if (find_command_idx(cmdname, &i) == 0) {
current_command = &command_table[i];
ret = command_table[i].func(argc - 1, argv + 1);
- found++;
- }
-
- /*
- * 'freeze' is a vile debugging abomination, so we treat it as such.
- */
- if (strcmp(cmdname, "freeze") == 0 && argc == 3) {
+ } else if (strchr(cmdname, '=')) {
+ verify(find_command_idx("set", &i) == 0);
+ current_command = &command_table[i];
+ ret = command_table[i].func(argc, argv);
+ } else if (strcmp(cmdname, "freeze") == 0 && argc == 3) {
+ /*
+ * 'freeze' is a vile debugging abomination, so we treat
+ * it as such.
+ */
char buf[16384];
int fd = open(ZFS_DEV, O_RDWR);
(void) strcpy((void *)buf, argv[2]);
return (!!ioctl(fd, ZFS_IOC_POOL_FREEZE, buf));
- }
-
- if (!found) {
+ } else {
(void) fprintf(stderr, gettext("unrecognized "
"command '%s'\n"), cmdname);
usage(B_FALSE);
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c
index 8eb9c81..f44da4f 100644
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -77,3 +77,28 @@ zpool_no_memory(void)
gettext("internal error: out of memory\n"));
exit(1);
}
+
+/*
+ * Return the number of logs in supplied nvlist
+ */
+uint_t
+num_logs(nvlist_t *nv)
+{
+ uint_t nlogs = 0;
+ uint_t c, children;
+ nvlist_t **child;
+
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) != 0)
+ return (0);
+
+ for (c = 0; c < children; c++) {
+ uint64_t is_log = B_FALSE;
+
+ (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+ &is_log);
+ if (is_log)
+ nlogs++;
+ }
+ return (nlogs);
+}
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h
index cb05bda..e82f320 100644
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef ZPOOL_UTIL_H
#define ZPOOL_UTIL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <libnvpair.h>
#include <libzfs.h>
@@ -41,22 +39,24 @@ extern "C" {
void *safe_malloc(size_t);
char *safe_strdup(const char *);
void zpool_no_memory(void);
+uint_t num_logs(nvlist_t *nv);
/*
* Virtual device functions
*/
-nvlist_t *make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
- boolean_t isreplace, int argc, char **argv);
+
+nvlist_t *make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
+ boolean_t isreplace, boolean_t dryrun, int argc, char **argv);
/*
* Pool list functions
*/
-int for_each_pool(int, char **, boolean_t unavail, zpool_proplist_t **,
+int for_each_pool(int, char **, boolean_t unavail, zprop_list_t **,
zpool_iter_f, void *);
typedef struct zpool_list zpool_list_t;
-zpool_list_t *pool_list_get(int, char **, zpool_proplist_t **, int *);
+zpool_list_t *pool_list_get(int, char **, zprop_list_t **, int *);
void pool_list_update(zpool_list_t *);
int pool_list_iter(zpool_list_t *, int unavail, zpool_iter_f, void *);
void pool_list_free(zpool_list_t *);
diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c
index cfed1f0..35a636c 100644
--- a/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c
+++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c
@@ -20,12 +20,10 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Functions to convert between a list of vdevs and an nvlist representing the
* configuration. Each entry in the list can be one of:
@@ -48,8 +46,8 @@
* Hot spares are a special case, and passed down as an array of disk vdevs, at
* the same level as the root of the vdev tree.
*
- * The only function exported by this file is 'get_vdev_spec'. The function
- * performs several passes:
+ * The only function exported by this file is 'make_root_vdev'. The
+ * function performs several passes:
*
* 1. Construct the vdev specification. Performs syntax validation and
* makes sure each device is valid.
@@ -59,6 +57,7 @@
* 3. Check for replication errors if the 'force' flag is not specified.
* validates that the replication level is consistent across the
* entire pool.
+ * 4. Call libzfs to label any whole disks with an EFI label.
*/
#include <assert.h>
@@ -76,8 +75,6 @@
#include <sys/mntent.h>
#include <libgeom.h>
-#include <libzfs.h>
-
#include "zpool_util.h"
/*
@@ -111,53 +108,105 @@ vdev_error(const char *fmt, ...)
}
/*
- * Validate a GEOM provider.
+ * Check that a file is valid. All we can do in this case is check that it's
+ * not in use by another pool, and not in use by swap.
*/
static int
-check_provider(const char *name, boolean_t force, boolean_t isspare)
+check_file(const char *file, boolean_t force, boolean_t isspare)
{
- struct gmesh mesh;
- struct gclass *mp;
- struct ggeom *gp;
- struct gprovider *pp;
- int rv;
-
- /* XXX: What to do with isspare? */
-
- if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
- name += sizeof(_PATH_DEV) - 1;
-
- rv = geom_gettree(&mesh);
- assert(rv == 0);
-
- pp = NULL;
- LIST_FOREACH(mp, &mesh.lg_class, lg_class) {
- LIST_FOREACH(gp, &mp->lg_geom, lg_geom) {
- LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
- if (strcmp(pp->lg_name, name) == 0)
- goto out;
+ char *name;
+ int fd;
+ int ret = 0;
+ int err;
+ pool_state_t state;
+ boolean_t inuse;
+
+#if 0
+ if (dm_inuse_swap(file, &err)) {
+ if (err)
+ libdiskmgt_error(err);
+ else
+ vdev_error(gettext("%s is currently used by swap. "
+ "Please see swap(1M).\n"), file);
+ return (-1);
+ }
+#endif
+
+ if ((fd = open(file, O_RDONLY)) < 0)
+ return (0);
+
+ if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) {
+ const char *desc;
+
+ switch (state) {
+ case POOL_STATE_ACTIVE:
+ desc = gettext("active");
+ break;
+
+ case POOL_STATE_EXPORTED:
+ desc = gettext("exported");
+ break;
+
+ case POOL_STATE_POTENTIALLY_ACTIVE:
+ desc = gettext("potentially active");
+ break;
+
+ default:
+ desc = gettext("unknown");
+ break;
+ }
+
+ /*
+ * Allow hot spares to be shared between pools.
+ */
+ if (state == POOL_STATE_SPARE && isspare)
+ return (0);
+
+ if (state == POOL_STATE_ACTIVE ||
+ state == POOL_STATE_SPARE || !force) {
+ switch (state) {
+ case POOL_STATE_SPARE:
+ vdev_error(gettext("%s is reserved as a hot "
+ "spare for pool %s\n"), file, name);
+ break;
+ default:
+ vdev_error(gettext("%s is part of %s pool "
+ "'%s'\n"), file, desc, name);
+ break;
}
+ ret = -1;
}
+
+ free(name);
}
-out:
- rv = -1;
- if (pp == NULL)
- vdev_error("no such provider %s\n", name);
- else {
- int acr, acw, ace;
-
- VERIFY(sscanf(pp->lg_mode, "r%dw%de%d", &acr, &acw, &ace) == 3);
- if (acw == 0 && ace == 0)
- rv = 0;
- else
- vdev_error("%s is in use (%s)\n", name, pp->lg_mode);
- }
- geom_deletetree(&mesh);
- return (rv);
+
+ (void) close(fd);
+ return (ret);
+}
+
+static int
+check_provider(const char *name, boolean_t force, boolean_t isspare)
+{
+ char path[MAXPATHLEN];
+
+ if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) != 0)
+ snprintf(path, sizeof(path), "%s%s", _PATH_DEV, name);
+ else
+ strlcpy(path, name, sizeof(path));
+
+ return (check_file(path, force, isspare));
}
+/*
+ * By "whole disk" we mean an entire physical disk (something we can
+ * label, toggle the write cache on, etc.) as opposed to the full
+ * capacity of a pseudo-device such as lofi or did. We act as if we
+ * are labeling the disk, which should be a pretty good test of whether
+ * it's a viable device or not. Returns B_TRUE if it is and B_FALSE if
+ * it isn't.
+ */
static boolean_t
-is_provider(const char *name)
+is_whole_disk(const char *name)
{
int fd;
@@ -167,8 +216,8 @@ is_provider(const char *name)
return (B_TRUE);
}
return (B_FALSE);
-
}
+
/*
* Create a leaf vdev. Determine if this is a GEOM provider.
* Valid forms for a leaf vdev are:
@@ -176,25 +225,81 @@ is_provider(const char *name)
* /dev/xxx Complete path to a GEOM provider
* xxx Shorthand for /dev/xxx
*/
-nvlist_t *
-make_leaf_vdev(const char *arg)
+static nvlist_t *
+make_leaf_vdev(const char *arg, uint64_t is_log)
{
- char ident[DISK_IDENT_SIZE], path[MAXPATHLEN];
+ char path[MAXPATHLEN];
struct stat64 statbuf;
nvlist_t *vdev = NULL;
char *type = NULL;
boolean_t wholedisk = B_FALSE;
- if (strncmp(arg, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
- strlcpy(path, arg, sizeof (path));
- else
- snprintf(path, sizeof (path), "%s%s", _PATH_DEV, arg);
+ /*
+ * Determine what type of vdev this is, and put the full path into
+ * 'path'. We detect whether this is a device of file afterwards by
+ * checking the st_mode of the file.
+ */
+ if (arg[0] == '/') {
+ /*
+ * Complete device or file path. Exact type is determined by
+ * examining the file descriptor afterwards.
+ */
+ wholedisk = is_whole_disk(arg);
+ if (!wholedisk && (stat64(arg, &statbuf) != 0)) {
+ (void) fprintf(stderr,
+ gettext("cannot open '%s': %s\n"),
+ arg, strerror(errno));
+ return (NULL);
+ }
+
+ (void) strlcpy(path, arg, sizeof (path));
+ } else {
+ /*
+ * This may be a short path for a device, or it could be total
+ * gibberish. Check to see if it's a known device in
+ * /dev/dsk/. As part of this check, see if we've been given a
+ * an entire disk (minus the slice number).
+ */
+ if (strncmp(arg, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
+ strlcpy(path, arg, sizeof (path));
+ else
+ snprintf(path, sizeof (path), "%s%s", _PATH_DEV, arg);
+ wholedisk = is_whole_disk(path);
+ if (!wholedisk && (stat64(path, &statbuf) != 0)) {
+ /*
+ * If we got ENOENT, then the user gave us
+ * gibberish, so try to direct them with a
+ * reasonable error message. Otherwise,
+ * regurgitate strerror() since it's the best we
+ * can do.
+ */
+ if (errno == ENOENT) {
+ (void) fprintf(stderr,
+ gettext("cannot open '%s': no such "
+ "GEOM provider\n"), arg);
+ (void) fprintf(stderr,
+ gettext("must be a full path or "
+ "shorthand device name\n"));
+ return (NULL);
+ } else {
+ (void) fprintf(stderr,
+ gettext("cannot open '%s': %s\n"),
+ path, strerror(errno));
+ return (NULL);
+ }
+ }
+ }
- if (is_provider(path))
+ /*
+ * Determine whether this is a device or a file.
+ */
+ if (wholedisk) {
type = VDEV_TYPE_DISK;
- else {
+ } else if (S_ISREG(statbuf.st_mode)) {
+ type = VDEV_TYPE_FILE;
+ } else {
(void) fprintf(stderr, gettext("cannot use '%s': must be a "
- "GEOM provider\n"), path);
+ "GEOM provider or regular file\n"), path);
return (NULL);
}
@@ -206,6 +311,7 @@ make_leaf_vdev(const char *arg)
verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0);
verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
+ verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0);
if (strcmp(type, VDEV_TYPE_DISK) == 0)
verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
(uint64_t)B_FALSE) == 0);
@@ -267,12 +373,14 @@ typedef struct replication_level {
uint64_t zprl_parity;
} replication_level_t;
+#define ZPOOL_FUZZ (16 * 1024 * 1024)
+
/*
* Given a list of toplevel vdevs, return the current replication level. If
* the config is inconsistent, then NULL is returned. If 'fatal' is set, then
* an error message will be displayed for each self-inconsistent vdev.
*/
-replication_level_t *
+static replication_level_t *
get_replication(nvlist_t *nvroot, boolean_t fatal)
{
nvlist_t **top;
@@ -291,10 +399,20 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
lastrep.zprl_type = NULL;
for (t = 0; t < toplevels; t++) {
+ uint64_t is_log = B_FALSE;
+
nv = top[t];
- verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
+ /*
+ * For separate logs we ignore the top level vdev replication
+ * constraints.
+ */
+ (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log);
+ if (is_log)
+ continue;
+ verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE,
+ &type) == 0);
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0) {
/*
@@ -328,7 +446,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
}
/*
- * The 'dontreport' variable indicatest that we've
+ * The 'dontreport' variable indicates that we've
* already reported an error for this spec, so don't
* bother doing it again.
*/
@@ -349,7 +467,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
ZPOOL_CONFIG_TYPE, &childtype) == 0);
/*
- * If this is a a replacing or spare vdev, then
+ * If this is a replacing or spare vdev, then
* get the real first child of the vdev.
*/
if (strcmp(childtype,
@@ -409,22 +527,30 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
*/
if ((fd = open(path, O_RDONLY)) >= 0) {
err = fstat64(fd, &statbuf);
+ if (err == 0 &&
+ S_ISCHR(statbuf.st_mode)) {
+ err = ioctl(fd, DIOCGMEDIASIZE,
+ &statbuf.st_size);
+ }
(void) close(fd);
} else {
err = stat64(path, &statbuf);
}
-
if (err != 0 || statbuf.st_size == 0)
continue;
size = statbuf.st_size;
/*
- * Also check the size of each device. If they
- * differ, then report an error.
+ * Also make sure that devices and
+ * slices have a consistent size. If
+ * they differ by a significant amount
+ * (~16MB) then report an error.
*/
- if (!dontreport && vdev_size != -1ULL &&
- size != vdev_size) {
+ if (!dontreport &&
+ (vdev_size != -1ULL &&
+ (labs(size - vdev_size) >
+ ZPOOL_FUZZ))) {
if (ret != NULL)
free(ret);
ret = NULL;
@@ -506,9 +632,11 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
* has a consistent replication level, then we ignore any errors. Otherwise,
* report any difference between the two.
*/
-int
+static int
check_replication(nvlist_t *config, nvlist_t *newroot)
{
+ nvlist_t **child;
+ uint_t children;
replication_level_t *current = NULL, *new;
int ret;
@@ -524,6 +652,23 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
if ((current = get_replication(nvroot, B_FALSE)) == NULL)
return (0);
}
+ /*
+ * for spares there may be no children, and therefore no
+ * replication level to check
+ */
+ if ((nvlist_lookup_nvlist_array(newroot, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) != 0) || (children == 0)) {
+ free(current);
+ return (0);
+ }
+
+ /*
+ * If all we have is logs then there's no replication level to check.
+ */
+ if (num_logs(newroot) == children) {
+ free(current);
+ return (0);
+ }
/*
* Get the replication level of the new vdev spec, reporting any
@@ -621,7 +766,7 @@ is_spare(nvlist_t *config, const char *path)
* Go through and find any devices that are in use. We rely on libdiskmgt for
* the majority of this task.
*/
-int
+static int
check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
int isspare)
{
@@ -653,6 +798,9 @@ check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
if (strcmp(type, VDEV_TYPE_DISK) == 0)
ret = check_provider(path, force, isspare);
+ if (strcmp(type, VDEV_TYPE_FILE) == 0)
+ ret = check_file(path, force, isspare);
+
return (ret);
}
@@ -668,10 +816,17 @@ check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
isreplacing, B_TRUE)) != 0)
return (ret);
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
+ &child, &children) == 0)
+ for (c = 0; c < children; c++)
+ if ((ret = check_in_use(config, child[c], force,
+ isreplacing, B_FALSE)) != 0)
+ return (ret);
+
return (0);
}
-const char *
+static const char *
is_grouping(const char *type, int *mindev)
{
if (strcmp(type, "raidz") == 0 || strcmp(type, "raidz1") == 0) {
@@ -698,6 +853,18 @@ is_grouping(const char *type, int *mindev)
return (VDEV_TYPE_SPARE);
}
+ if (strcmp(type, "log") == 0) {
+ if (mindev != NULL)
+ *mindev = 1;
+ return (VDEV_TYPE_LOG);
+ }
+
+ if (strcmp(type, "cache") == 0) {
+ if (mindev != NULL)
+ *mindev = 1;
+ return (VDEV_TYPE_L2CACHE);
+ }
+
return (NULL);
}
@@ -710,14 +877,21 @@ is_grouping(const char *type, int *mindev)
nvlist_t *
construct_spec(int argc, char **argv)
{
- nvlist_t *nvroot, *nv, **top, **spares;
- int t, toplevels, mindev, nspares;
+ nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
+ int t, toplevels, mindev, nspares, nlogs, nl2cache;
const char *type;
+ uint64_t is_log;
+ boolean_t seen_logs;
top = NULL;
toplevels = 0;
spares = NULL;
+ l2cache = NULL;
nspares = 0;
+ nlogs = 0;
+ nl2cache = 0;
+ is_log = B_FALSE;
+ seen_logs = B_FALSE;
while (argc > 0) {
nv = NULL;
@@ -730,12 +904,56 @@ construct_spec(int argc, char **argv)
nvlist_t **child = NULL;
int c, children = 0;
- if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
- spares != NULL) {
- (void) fprintf(stderr, gettext("invalid vdev "
- "specification: 'spare' can be "
- "specified only once\n"));
- return (NULL);
+ if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
+ if (spares != NULL) {
+ (void) fprintf(stderr,
+ gettext("invalid vdev "
+ "specification: 'spare' can be "
+ "specified only once\n"));
+ return (NULL);
+ }
+ is_log = B_FALSE;
+ }
+
+ if (strcmp(type, VDEV_TYPE_LOG) == 0) {
+ if (seen_logs) {
+ (void) fprintf(stderr,
+ gettext("invalid vdev "
+ "specification: 'log' can be "
+ "specified only once\n"));
+ return (NULL);
+ }
+ seen_logs = B_TRUE;
+ is_log = B_TRUE;
+ argc--;
+ argv++;
+ /*
+ * A log is not a real grouping device.
+ * We just set is_log and continue.
+ */
+ continue;
+ }
+
+ if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
+ if (l2cache != NULL) {
+ (void) fprintf(stderr,
+ gettext("invalid vdev "
+ "specification: 'cache' can be "
+ "specified only once\n"));
+ return (NULL);
+ }
+ is_log = B_FALSE;
+ }
+
+ if (is_log) {
+ if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
+ (void) fprintf(stderr,
+ gettext("invalid vdev "
+ "specification: unsupported 'log' "
+ "device: %s\n"), type);
+ return (NULL);
+ }
+ nlogs++;
}
for (c = 1; c < argc; c++) {
@@ -746,7 +964,8 @@ construct_spec(int argc, char **argv)
children * sizeof (nvlist_t *));
if (child == NULL)
zpool_no_memory();
- if ((nv = make_leaf_vdev(argv[c])) == NULL)
+ if ((nv = make_leaf_vdev(argv[c], B_FALSE))
+ == NULL)
return (NULL);
child[children - 1] = nv;
}
@@ -765,11 +984,17 @@ construct_spec(int argc, char **argv)
spares = child;
nspares = children;
continue;
+ } else if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
+ l2cache = child;
+ nl2cache = children;
+ continue;
} else {
verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
0) == 0);
verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
type) == 0);
+ verify(nvlist_add_uint64(nv,
+ ZPOOL_CONFIG_IS_LOG, is_log) == 0);
if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
verify(nvlist_add_uint64(nv,
ZPOOL_CONFIG_NPARITY,
@@ -788,8 +1013,10 @@ construct_spec(int argc, char **argv)
* We have a device. Pass off to make_leaf_vdev() to
* construct the appropriate nvlist describing the vdev.
*/
- if ((nv = make_leaf_vdev(argv[0])) == NULL)
+ if ((nv = make_leaf_vdev(argv[0], is_log)) == NULL)
return (NULL);
+ if (is_log)
+ nlogs++;
argc--;
argv++;
}
@@ -801,13 +1028,19 @@ construct_spec(int argc, char **argv)
top[toplevels - 1] = nv;
}
- if (toplevels == 0 && nspares == 0) {
+ if (toplevels == 0 && nspares == 0 && nl2cache == 0) {
(void) fprintf(stderr, gettext("invalid vdev "
"specification: at least one toplevel vdev must be "
"specified\n"));
return (NULL);
}
+ if (seen_logs && nlogs == 0) {
+ (void) fprintf(stderr, gettext("invalid vdev specification: "
+ "log requires at least 1 device\n"));
+ return (NULL);
+ }
+
/*
* Finally, create nvroot and add all top-level vdevs to it.
*/
@@ -819,18 +1052,26 @@ construct_spec(int argc, char **argv)
if (nspares != 0)
verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
spares, nspares) == 0);
+ if (nl2cache != 0)
+ verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+ l2cache, nl2cache) == 0);
for (t = 0; t < toplevels; t++)
nvlist_free(top[t]);
for (t = 0; t < nspares; t++)
nvlist_free(spares[t]);
+ for (t = 0; t < nl2cache; t++)
+ nvlist_free(l2cache[t]);
if (spares)
free(spares);
+ if (l2cache)
+ free(l2cache);
free(top);
return (nvroot);
}
+
/*
* Get and validate the contents of the given vdev specification. This ensures
* that the nvlist returned is well-formed, that all the devices exist, and that
@@ -842,11 +1083,11 @@ construct_spec(int argc, char **argv)
* added, even if they appear in use.
*/
nvlist_t *
-make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
- boolean_t isreplacing, int argc, char **argv)
+make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
+ boolean_t isreplacing, boolean_t dryrun, int argc, char **argv)
{
nvlist_t *newroot;
-
+ nvlist_t *poolconfig = NULL;
is_force = force;
/*
@@ -857,6 +1098,9 @@ make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
if ((newroot = construct_spec(argc, argv)) == NULL)
return (NULL);
+ if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL))
+ return (NULL);
+
/*
* Validate each device to make sure that its not shared with another
* subsystem. We do this even if 'force' is set, because there are some
diff --git a/cddl/contrib/opensolaris/cmd/ztest/ztest.c b/cddl/contrib/opensolaris/cmd/ztest/ztest.c
index 5d9f028..b7ca302 100644
--- a/cddl/contrib/opensolaris/cmd/ztest/ztest.c
+++ b/cddl/contrib/opensolaris/cmd/ztest/ztest.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* The objective of this program is to provide a DMU/ZAP/SPA stress test
* that runs entirely in userland, is easy to use, and easy to extend.
@@ -92,6 +90,7 @@
#include <sys/zio_compress.h>
#include <sys/zil.h>
#include <sys/vdev_impl.h>
+#include <sys/vdev_file.h>
#include <sys/spa_impl.h>
#include <sys/dsl_prop.h>
#include <sys/refcount.h>
@@ -128,8 +127,18 @@ static char *zopt_dir = "/tmp";
static uint64_t zopt_time = 300; /* 5 minutes */
static int zopt_maxfaults;
+typedef struct ztest_block_tag {
+ uint64_t bt_objset;
+ uint64_t bt_object;
+ uint64_t bt_offset;
+ uint64_t bt_txg;
+ uint64_t bt_thread;
+ uint64_t bt_seq;
+} ztest_block_tag_t;
+
typedef struct ztest_args {
- char *za_pool;
+ char za_pool[MAXNAMELEN];
+ spa_t *za_spa;
objset_t *za_os;
zilog_t *za_zilog;
thread_t za_thread;
@@ -142,6 +151,13 @@ typedef struct ztest_args {
hrtime_t za_stop;
hrtime_t za_kill;
traverse_handle_t *za_th;
+ /*
+ * Thread-local variables can go here to aid debugging.
+ */
+ ztest_block_tag_t za_rbt;
+ ztest_block_tag_t za_wbt;
+ dmu_object_info_t za_doi;
+ dmu_buf_t *za_dbuf;
} ztest_args_t;
typedef void ztest_func_t(ztest_args_t *);
@@ -160,14 +176,16 @@ ztest_func_t ztest_dmu_objset_create_destroy;
ztest_func_t ztest_dmu_snapshot_create_destroy;
ztest_func_t ztest_spa_create_destroy;
ztest_func_t ztest_fault_inject;
+ztest_func_t ztest_spa_rename;
ztest_func_t ztest_vdev_attach_detach;
ztest_func_t ztest_vdev_LUN_growth;
ztest_func_t ztest_vdev_add_remove;
+ztest_func_t ztest_vdev_aux_add_remove;
ztest_func_t ztest_scrub;
-ztest_func_t ztest_spa_rename;
typedef struct ztest_info {
ztest_func_t *zi_func; /* test function */
+ uint64_t zi_iters; /* iterations per execution */
uint64_t *zi_interval; /* execute every <interval> seconds */
uint64_t zi_calls; /* per-pass count */
uint64_t zi_call_time; /* per-pass time */
@@ -181,22 +199,23 @@ uint64_t zopt_sometimes = 10; /* every 10 seconds */
uint64_t zopt_rarely = 60; /* every 60 seconds */
ztest_info_t ztest_info[] = {
- { ztest_dmu_read_write, &zopt_always },
- { ztest_dmu_write_parallel, &zopt_always },
- { ztest_dmu_object_alloc_free, &zopt_always },
- { ztest_zap, &zopt_always },
- { ztest_zap_parallel, &zopt_always },
- { ztest_traverse, &zopt_often },
- { ztest_dsl_prop_get_set, &zopt_sometimes },
- { ztest_dmu_objset_create_destroy, &zopt_sometimes },
- { ztest_dmu_snapshot_create_destroy, &zopt_rarely },
- { ztest_spa_create_destroy, &zopt_sometimes },
- { ztest_fault_inject, &zopt_sometimes },
- { ztest_spa_rename, &zopt_rarely },
- { ztest_vdev_attach_detach, &zopt_rarely },
- { ztest_vdev_LUN_growth, &zopt_rarely },
- { ztest_vdev_add_remove, &zopt_vdevtime },
- { ztest_scrub, &zopt_vdevtime },
+ { ztest_dmu_read_write, 1, &zopt_always },
+ { ztest_dmu_write_parallel, 30, &zopt_always },
+ { ztest_dmu_object_alloc_free, 1, &zopt_always },
+ { ztest_zap, 30, &zopt_always },
+ { ztest_zap_parallel, 100, &zopt_always },
+ { ztest_traverse, 1, &zopt_often },
+ { ztest_dsl_prop_get_set, 1, &zopt_sometimes },
+ { ztest_dmu_objset_create_destroy, 1, &zopt_sometimes },
+ { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes },
+ { ztest_spa_create_destroy, 1, &zopt_sometimes },
+ { ztest_fault_inject, 1, &zopt_sometimes },
+ { ztest_spa_rename, 1, &zopt_rarely },
+ { ztest_vdev_attach_detach, 1, &zopt_rarely },
+ { ztest_vdev_LUN_growth, 1, &zopt_rarely },
+ { ztest_vdev_add_remove, 1, &zopt_vdevtime },
+ { ztest_vdev_aux_add_remove, 1, &zopt_vdevtime },
+ { ztest_scrub, 1, &zopt_vdevtime },
};
#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t))
@@ -210,34 +229,27 @@ typedef struct ztest_shared {
mutex_t zs_vdev_lock;
rwlock_t zs_name_lock;
uint64_t zs_vdev_primaries;
+ uint64_t zs_vdev_aux;
uint64_t zs_enospc_count;
hrtime_t zs_start_time;
hrtime_t zs_stop_time;
uint64_t zs_alloc;
uint64_t zs_space;
- uint64_t zs_txg;
ztest_info_t zs_info[ZTEST_FUNCS];
mutex_t zs_sync_lock[ZTEST_SYNC_LOCKS];
uint64_t zs_seq[ZTEST_SYNC_LOCKS];
} ztest_shared_t;
-typedef struct ztest_block_tag {
- uint64_t bt_objset;
- uint64_t bt_object;
- uint64_t bt_offset;
- uint64_t bt_txg;
- uint64_t bt_thread;
- uint64_t bt_seq;
-} ztest_block_tag_t;
-
static char ztest_dev_template[] = "%s/%s.%llua";
+static char ztest_aux_template[] = "%s/%s.%s.%llu";
static ztest_shared_t *ztest_shared;
static int ztest_random_fd;
static int ztest_dump_core = 1;
-extern uint64_t zio_gang_bang;
-extern uint16_t zio_zil_fail_shift;
+static boolean_t ztest_exiting;
+
+extern uint64_t metaslab_gang_bang;
#define ZTEST_DIROBJ 1
#define ZTEST_MICROZAP_OBJ 2
@@ -357,7 +369,7 @@ usage(boolean_t requested)
FILE *fp = requested ? stdout : stderr;
nicenum(zopt_vdev_size, nice_vdev_size);
- nicenum(zio_gang_bang, nice_gang_bang);
+ nicenum(metaslab_gang_bang, nice_gang_bang);
(void) fprintf(fp, "Usage: %s\n"
"\t[-v vdevs (default: %llu)]\n"
@@ -377,26 +389,24 @@ usage(boolean_t requested)
"\t[-E(xisting)] (use existing pool instead of creating new one)\n"
"\t[-T time] total run time (default: %llu sec)\n"
"\t[-P passtime] time per pass (default: %llu sec)\n"
- "\t[-z zil failure rate (default: fail every 2^%llu allocs)]\n"
"\t[-h] (print help)\n"
"",
cmdname,
- (u_longlong_t)zopt_vdevs, /* -v */
- nice_vdev_size, /* -s */
- zopt_ashift, /* -a */
- zopt_mirrors, /* -m */
- zopt_raidz, /* -r */
- zopt_raidz_parity, /* -R */
- zopt_datasets, /* -d */
- zopt_threads, /* -t */
- nice_gang_bang, /* -g */
- zopt_init, /* -i */
- (u_longlong_t)zopt_killrate, /* -k */
- zopt_pool, /* -p */
- zopt_dir, /* -f */
- (u_longlong_t)zopt_time, /* -T */
- (u_longlong_t)zopt_passtime, /* -P */
- (u_longlong_t)zio_zil_fail_shift); /* -z */
+ (u_longlong_t)zopt_vdevs, /* -v */
+ nice_vdev_size, /* -s */
+ zopt_ashift, /* -a */
+ zopt_mirrors, /* -m */
+ zopt_raidz, /* -r */
+ zopt_raidz_parity, /* -R */
+ zopt_datasets, /* -d */
+ zopt_threads, /* -t */
+ nice_gang_bang, /* -g */
+ zopt_init, /* -i */
+ (u_longlong_t)zopt_killrate, /* -k */
+ zopt_pool, /* -p */
+ zopt_dir, /* -f */
+ (u_longlong_t)zopt_time, /* -T */
+ (u_longlong_t)zopt_passtime); /* -P */
exit(requested ? 0 : 1);
}
@@ -431,91 +441,84 @@ process_options(int argc, char **argv)
progname = argv[0];
/* By default, test gang blocks for blocks 32K and greater */
- zio_gang_bang = 32 << 10;
-
- /* Default value, fail every 32nd allocation */
- zio_zil_fail_shift = 5;
+ metaslab_gang_bang = 32 << 10;
while ((opt = getopt(argc, argv,
- "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:z:h")) != EOF) {
+ "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:h")) != EOF) {
value = 0;
switch (opt) {
- case 'v':
- case 's':
- case 'a':
- case 'm':
- case 'r':
- case 'R':
- case 'd':
- case 't':
- case 'g':
- case 'i':
- case 'k':
- case 'T':
- case 'P':
- case 'z':
+ case 'v':
+ case 's':
+ case 'a':
+ case 'm':
+ case 'r':
+ case 'R':
+ case 'd':
+ case 't':
+ case 'g':
+ case 'i':
+ case 'k':
+ case 'T':
+ case 'P':
value = nicenumtoull(optarg);
}
switch (opt) {
- case 'v':
+ case 'v':
zopt_vdevs = value;
break;
- case 's':
+ case 's':
zopt_vdev_size = MAX(SPA_MINDEVSIZE, value);
break;
- case 'a':
+ case 'a':
zopt_ashift = value;
break;
- case 'm':
+ case 'm':
zopt_mirrors = value;
break;
- case 'r':
+ case 'r':
zopt_raidz = MAX(1, value);
break;
- case 'R':
+ case 'R':
zopt_raidz_parity = MIN(MAX(value, 1), 2);
break;
- case 'd':
+ case 'd':
zopt_datasets = MAX(1, value);
break;
- case 't':
+ case 't':
zopt_threads = MAX(1, value);
break;
- case 'g':
- zio_gang_bang = MAX(SPA_MINBLOCKSIZE << 1, value);
+ case 'g':
+ metaslab_gang_bang = MAX(SPA_MINBLOCKSIZE << 1, value);
break;
- case 'i':
+ case 'i':
zopt_init = value;
break;
- case 'k':
+ case 'k':
zopt_killrate = value;
break;
- case 'p':
+ case 'p':
zopt_pool = strdup(optarg);
break;
- case 'f':
+ case 'f':
zopt_dir = strdup(optarg);
break;
- case 'V':
+ case 'V':
zopt_verbose++;
break;
- case 'E':
+ case 'E':
zopt_init = 0;
break;
- case 'T':
+ case 'T':
zopt_time = value;
break;
- case 'P':
+ case 'P':
zopt_passtime = MAX(1, value);
break;
- case 'z':
- zio_zil_fail_shift = MIN(value, 16);
- break;
- case 'h':
+ case 'h':
usage(B_TRUE);
break;
- case '?':
- default:
+ case '?':
+ default:
usage(B_FALSE);
break;
}
@@ -536,51 +539,58 @@ ztest_get_ashift(void)
}
static nvlist_t *
-make_vdev_file(size_t size)
+make_vdev_file(char *path, char *aux, size_t size, uint64_t ashift)
{
- char dev_name[MAXPATHLEN];
+ char pathbuf[MAXPATHLEN];
uint64_t vdev;
- uint64_t ashift = ztest_get_ashift();
- int fd;
nvlist_t *file;
- if (size == 0) {
- (void) snprintf(dev_name, sizeof (dev_name), "%s",
- "/dev/bogus");
- } else {
- vdev = ztest_shared->zs_vdev_primaries++;
- (void) sprintf(dev_name, ztest_dev_template,
- zopt_dir, zopt_pool, vdev);
+ if (ashift == 0)
+ ashift = ztest_get_ashift();
+
+ if (path == NULL) {
+ path = pathbuf;
+
+ if (aux != NULL) {
+ vdev = ztest_shared->zs_vdev_aux;
+ (void) sprintf(path, ztest_aux_template,
+ zopt_dir, zopt_pool, aux, vdev);
+ } else {
+ vdev = ztest_shared->zs_vdev_primaries++;
+ (void) sprintf(path, ztest_dev_template,
+ zopt_dir, zopt_pool, vdev);
+ }
+ }
- fd = open(dev_name, O_RDWR | O_CREAT | O_TRUNC, 0666);
+ if (size != 0) {
+ int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666);
if (fd == -1)
- fatal(1, "can't open %s", dev_name);
+ fatal(1, "can't open %s", path);
if (ftruncate(fd, size) != 0)
- fatal(1, "can't ftruncate %s", dev_name);
+ fatal(1, "can't ftruncate %s", path);
(void) close(fd);
}
VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
- VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, dev_name) == 0);
+ VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0);
VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
return (file);
}
static nvlist_t *
-make_vdev_raidz(size_t size, int r)
+make_vdev_raidz(char *path, char *aux, size_t size, uint64_t ashift, int r)
{
nvlist_t *raidz, **child;
int c;
if (r < 2)
- return (make_vdev_file(size));
-
+ return (make_vdev_file(path, aux, size, ashift));
child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL);
for (c = 0; c < r; c++)
- child[c] = make_vdev_file(size);
+ child[c] = make_vdev_file(path, aux, size, ashift);
VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0);
VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE,
@@ -599,18 +609,19 @@ make_vdev_raidz(size_t size, int r)
}
static nvlist_t *
-make_vdev_mirror(size_t size, int r, int m)
+make_vdev_mirror(char *path, char *aux, size_t size, uint64_t ashift,
+ int r, int m)
{
nvlist_t *mirror, **child;
int c;
if (m < 1)
- return (make_vdev_raidz(size, r));
+ return (make_vdev_raidz(path, aux, size, ashift, r));
child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL);
for (c = 0; c < m; c++)
- child[c] = make_vdev_raidz(size, r);
+ child[c] = make_vdev_raidz(path, aux, size, ashift, r);
VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0);
VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE,
@@ -627,7 +638,8 @@ make_vdev_mirror(size_t size, int r, int m)
}
static nvlist_t *
-make_vdev_root(size_t size, int r, int m, int t)
+make_vdev_root(char *path, char *aux, size_t size, uint64_t ashift,
+ int log, int r, int m, int t)
{
nvlist_t *root, **child;
int c;
@@ -636,12 +648,15 @@ make_vdev_root(size_t size, int r, int m, int t)
child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL);
- for (c = 0; c < t; c++)
- child[c] = make_vdev_mirror(size, r, m);
+ for (c = 0; c < t; c++) {
+ child[c] = make_vdev_mirror(path, aux, size, ashift, r, m);
+ VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+ log) == 0);
+ }
VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0);
VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0);
- VERIFY(nvlist_add_nvlist_array(root, ZPOOL_CONFIG_CHILDREN,
+ VERIFY(nvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN,
child, t) == 0);
for (c = 0; c < t; c++)
@@ -785,8 +800,8 @@ ztest_spa_create_destroy(ztest_args_t *za)
/*
* Attempt to create using a bad file.
*/
- nvroot = make_vdev_root(0, 0, 0, 1);
- error = spa_create("ztest_bad_file", nvroot, NULL);
+ nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 0, 1);
+ error = spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL);
nvlist_free(nvroot);
if (error != ENOENT)
fatal(0, "spa_create(bad_file) = %d", error);
@@ -794,8 +809,8 @@ ztest_spa_create_destroy(ztest_args_t *za)
/*
* Attempt to create using a bad mirror.
*/
- nvroot = make_vdev_root(0, 0, 2, 1);
- error = spa_create("ztest_bad_mirror", nvroot, NULL);
+ nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 2, 1);
+ error = spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL);
nvlist_free(nvroot);
if (error != ENOENT)
fatal(0, "spa_create(bad_mirror) = %d", error);
@@ -805,8 +820,8 @@ ztest_spa_create_destroy(ztest_args_t *za)
* what's in the nvroot; we should fail with EEXIST.
*/
(void) rw_rdlock(&ztest_shared->zs_name_lock);
- nvroot = make_vdev_root(0, 0, 0, 1);
- error = spa_create(za->za_pool, nvroot, NULL);
+ nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 0, 1);
+ error = spa_create(za->za_pool, nvroot, NULL, NULL, NULL);
nvlist_free(nvroot);
if (error != EEXIST)
fatal(0, "spa_create(whatever) = %d", error);
@@ -823,30 +838,48 @@ ztest_spa_create_destroy(ztest_args_t *za)
(void) rw_unlock(&ztest_shared->zs_name_lock);
}
+static vdev_t *
+vdev_lookup_by_path(vdev_t *vd, const char *path)
+{
+ vdev_t *mvd;
+
+ if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0)
+ return (vd);
+
+ for (int c = 0; c < vd->vdev_children; c++)
+ if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) !=
+ NULL)
+ return (mvd);
+
+ return (NULL);
+}
+
/*
* Verify that vdev_add() works as expected.
*/
void
ztest_vdev_add_remove(ztest_args_t *za)
{
- spa_t *spa = dmu_objset_spa(za->za_os);
+ spa_t *spa = za->za_spa;
uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
nvlist_t *nvroot;
int error;
- if (zopt_verbose >= 6)
- (void) printf("adding vdev\n");
-
(void) mutex_lock(&ztest_shared->zs_vdev_lock);
- spa_config_enter(spa, RW_READER, FTAG);
+ spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
ztest_shared->zs_vdev_primaries =
spa->spa_root_vdev->vdev_children * leaves;
- spa_config_exit(spa, FTAG);
+ spa_config_exit(spa, SCL_VDEV, FTAG);
+
+ /*
+ * Make 1/4 of the devices be log devices.
+ */
+ nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0,
+ ztest_random(4) == 0, zopt_raidz, zopt_mirrors, 1);
- nvroot = make_vdev_root(zopt_vdev_size, zopt_raidz, zopt_mirrors, 1);
error = spa_vdev_add(spa, nvroot);
nvlist_free(nvroot);
@@ -856,37 +889,86 @@ ztest_vdev_add_remove(ztest_args_t *za)
ztest_record_enospc("spa_vdev_add");
else if (error != 0)
fatal(0, "spa_vdev_add() = %d", error);
-
- if (zopt_verbose >= 6)
- (void) printf("spa_vdev_add = %d, as expected\n", error);
}
-static vdev_t *
-vdev_lookup_by_path(vdev_t *vd, const char *path)
+/*
+ * Verify that adding/removing aux devices (l2arc, hot spare) works as expected.
+ */
+void
+ztest_vdev_aux_add_remove(ztest_args_t *za)
{
- int c;
- vdev_t *mvd;
+ spa_t *spa = za->za_spa;
+ vdev_t *rvd = spa->spa_root_vdev;
+ spa_aux_vdev_t *sav;
+ char *aux;
+ uint64_t guid = 0;
+ int error;
- if (vd->vdev_path != NULL) {
- if (vd->vdev_wholedisk == 1) {
- /*
- * For whole disks, the internal path has 's0', but the
- * path passed in by the user doesn't.
- */
- if (strlen(path) == strlen(vd->vdev_path) - 2 &&
- strncmp(path, vd->vdev_path, strlen(path)) == 0)
- return (vd);
- } else if (strcmp(path, vd->vdev_path) == 0) {
- return (vd);
+ if (ztest_random(2) == 0) {
+ sav = &spa->spa_spares;
+ aux = ZPOOL_CONFIG_SPARES;
+ } else {
+ sav = &spa->spa_l2cache;
+ aux = ZPOOL_CONFIG_L2CACHE;
+ }
+
+ (void) mutex_lock(&ztest_shared->zs_vdev_lock);
+
+ spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
+
+ if (sav->sav_count != 0 && ztest_random(4) == 0) {
+ /*
+ * Pick a random device to remove.
+ */
+ guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid;
+ } else {
+ /*
+ * Find an unused device we can add.
+ */
+ ztest_shared->zs_vdev_aux = 0;
+ for (;;) {
+ char path[MAXPATHLEN];
+ int c;
+ (void) sprintf(path, ztest_aux_template, zopt_dir,
+ zopt_pool, aux, ztest_shared->zs_vdev_aux);
+ for (c = 0; c < sav->sav_count; c++)
+ if (strcmp(sav->sav_vdevs[c]->vdev_path,
+ path) == 0)
+ break;
+ if (c == sav->sav_count &&
+ vdev_lookup_by_path(rvd, path) == NULL)
+ break;
+ ztest_shared->zs_vdev_aux++;
}
}
- for (c = 0; c < vd->vdev_children; c++)
- if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) !=
- NULL)
- return (mvd);
+ spa_config_exit(spa, SCL_VDEV, FTAG);
- return (NULL);
+ if (guid == 0) {
+ /*
+ * Add a new device.
+ */
+ nvlist_t *nvroot = make_vdev_root(NULL, aux,
+ (zopt_vdev_size * 5) / 4, 0, 0, 0, 0, 1);
+ error = spa_vdev_add(spa, nvroot);
+ if (error != 0)
+ fatal(0, "spa_vdev_add(%p) = %d", nvroot, error);
+ nvlist_free(nvroot);
+ } else {
+ /*
+ * Remove an existing device. Sometimes, dirty its
+ * vdev state first to make sure we handle removal
+ * of devices that have pending state changes.
+ */
+ if (ztest_random(2) == 0)
+ (void) vdev_online(spa, guid, B_FALSE, NULL);
+
+ error = spa_vdev_remove(spa, guid, B_FALSE);
+ if (error != 0 && error != EBUSY)
+ fatal(0, "spa_vdev_remove(%llu) = %d", guid, error);
+ }
+
+ (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
}
/*
@@ -895,22 +977,26 @@ vdev_lookup_by_path(vdev_t *vd, const char *path)
void
ztest_vdev_attach_detach(ztest_args_t *za)
{
- spa_t *spa = dmu_objset_spa(za->za_os);
+ spa_t *spa = za->za_spa;
+ spa_aux_vdev_t *sav = &spa->spa_spares;
vdev_t *rvd = spa->spa_root_vdev;
vdev_t *oldvd, *newvd, *pvd;
- nvlist_t *root, *file;
+ nvlist_t *root;
uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
uint64_t leaf, top;
uint64_t ashift = ztest_get_ashift();
+ uint64_t oldguid;
size_t oldsize, newsize;
char oldpath[MAXPATHLEN], newpath[MAXPATHLEN];
int replacing;
+ int oldvd_has_siblings = B_FALSE;
+ int newvd_is_spare = B_FALSE;
+ int oldvd_is_log;
int error, expected_error;
- int fd;
(void) mutex_lock(&ztest_shared->zs_vdev_lock);
- spa_config_enter(spa, RW_READER, FTAG);
+ spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
/*
* Decide whether to do an attach or a replace.
@@ -928,39 +1014,70 @@ ztest_vdev_attach_detach(ztest_args_t *za)
leaf = ztest_random(leaves);
/*
- * Generate the path to this leaf. The filename will end with 'a'.
- * We'll alternate replacements with a filename that ends with 'b'.
+ * Locate this vdev.
*/
- (void) snprintf(oldpath, sizeof (oldpath),
- ztest_dev_template, zopt_dir, zopt_pool, top * leaves + leaf);
-
- bcopy(oldpath, newpath, MAXPATHLEN);
+ oldvd = rvd->vdev_child[top];
+ if (zopt_mirrors >= 1)
+ oldvd = oldvd->vdev_child[leaf / zopt_raidz];
+ if (zopt_raidz > 1)
+ oldvd = oldvd->vdev_child[leaf % zopt_raidz];
/*
- * If the 'a' file isn't part of the pool, the 'b' file must be.
+ * If we're already doing an attach or replace, oldvd may be a
+ * mirror vdev -- in which case, pick a random child.
*/
- if (vdev_lookup_by_path(rvd, oldpath) == NULL)
- oldpath[strlen(oldpath) - 1] = 'b';
- else
- newpath[strlen(newpath) - 1] = 'b';
+ while (oldvd->vdev_children != 0) {
+ oldvd_has_siblings = B_TRUE;
+ ASSERT(oldvd->vdev_children == 2);
+ oldvd = oldvd->vdev_child[ztest_random(2)];
+ }
+
+ oldguid = oldvd->vdev_guid;
+ oldsize = vdev_get_rsize(oldvd);
+ oldvd_is_log = oldvd->vdev_top->vdev_islog;
+ (void) strcpy(oldpath, oldvd->vdev_path);
+ pvd = oldvd->vdev_parent;
/*
- * Now oldpath represents something that's already in the pool,
- * and newpath is the thing we'll try to attach.
+ * If oldvd has siblings, then half of the time, detach it.
*/
- oldvd = vdev_lookup_by_path(rvd, oldpath);
- newvd = vdev_lookup_by_path(rvd, newpath);
- ASSERT(oldvd != NULL);
- pvd = oldvd->vdev_parent;
+ if (oldvd_has_siblings && ztest_random(2) == 0) {
+ spa_config_exit(spa, SCL_VDEV, FTAG);
+ error = spa_vdev_detach(spa, oldguid, B_FALSE);
+ if (error != 0 && error != ENODEV && error != EBUSY)
+ fatal(0, "detach (%s) returned %d",
+ oldpath, error);
+ (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
+ return;
+ }
/*
- * Make newsize a little bigger or smaller than oldsize.
- * If it's smaller, the attach should fail.
- * If it's larger, and we're doing a replace,
- * we should get dynamic LUN growth when we're done.
+ * For the new vdev, choose with equal probability between the two
+ * standard paths (ending in either 'a' or 'b') or a random hot spare.
*/
- oldsize = vdev_get_rsize(oldvd);
- newsize = 10 * oldsize / (9 + ztest_random(3));
+ if (sav->sav_count != 0 && ztest_random(3) == 0) {
+ newvd = sav->sav_vdevs[ztest_random(sav->sav_count)];
+ newvd_is_spare = B_TRUE;
+ (void) strcpy(newpath, newvd->vdev_path);
+ } else {
+ (void) snprintf(newpath, sizeof (newpath), ztest_dev_template,
+ zopt_dir, zopt_pool, top * leaves + leaf);
+ if (ztest_random(2) == 0)
+ newpath[strlen(newpath) - 1] = 'b';
+ newvd = vdev_lookup_by_path(rvd, newpath);
+ }
+
+ if (newvd) {
+ newsize = vdev_get_rsize(newvd);
+ } else {
+ /*
+ * Make newsize a little bigger or smaller than oldsize.
+ * If it's smaller, the attach should fail.
+ * If it's larger, and we're doing a replace,
+ * we should get dynamic LUN growth when we're done.
+ */
+ newsize = 10 * oldsize / (9 + ztest_random(3));
+ }
/*
* If pvd is not a mirror or root, the attach should fail with ENOTSUP,
@@ -970,12 +1087,17 @@ ztest_vdev_attach_detach(ztest_args_t *za)
*
* If newvd is too small, it should fail with EOVERFLOW.
*/
- if (newvd != NULL)
- expected_error = EBUSY;
- else if (pvd->vdev_ops != &vdev_mirror_ops &&
- pvd->vdev_ops != &vdev_root_ops &&
- (!replacing || pvd->vdev_ops == &vdev_replacing_ops))
+ if (pvd->vdev_ops != &vdev_mirror_ops &&
+ pvd->vdev_ops != &vdev_root_ops && (!replacing ||
+ pvd->vdev_ops == &vdev_replacing_ops ||
+ pvd->vdev_ops == &vdev_spare_ops))
+ expected_error = ENOTSUP;
+ else if (newvd_is_spare && (!replacing || oldvd_is_log))
expected_error = ENOTSUP;
+ else if (newvd == oldvd)
+ expected_error = replacing ? 0 : EBUSY;
+ else if (vdev_lookup_by_path(rvd, newpath) != NULL)
+ expected_error = EBUSY;
else if (newsize < oldsize)
expected_error = EOVERFLOW;
else if (ashift > oldvd->vdev_top->vdev_ashift)
@@ -983,36 +1105,16 @@ ztest_vdev_attach_detach(ztest_args_t *za)
else
expected_error = 0;
- /*
- * If newvd isn't already part of the pool, create it.
- */
- if (newvd == NULL) {
- fd = open(newpath, O_RDWR | O_CREAT | O_TRUNC, 0666);
- if (fd == -1)
- fatal(1, "can't open %s", newpath);
- if (ftruncate(fd, newsize) != 0)
- fatal(1, "can't ftruncate %s", newpath);
- (void) close(fd);
- }
-
- spa_config_exit(spa, FTAG);
+ spa_config_exit(spa, SCL_VDEV, FTAG);
/*
* Build the nvlist describing newpath.
*/
- VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
- VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
- VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, newpath) == 0);
- VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
+ root = make_vdev_root(newpath, NULL, newvd == NULL ? newsize : 0,
+ ashift, 0, 0, 0, 1);
- VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0);
- VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0);
- VERIFY(nvlist_add_nvlist_array(root, ZPOOL_CONFIG_CHILDREN,
- &file, 1) == 0);
-
- error = spa_vdev_attach(spa, oldvd->vdev_guid, root, replacing);
+ error = spa_vdev_attach(spa, oldguid, root, replacing);
- nvlist_free(file);
nvlist_free(root);
/*
@@ -1027,12 +1129,15 @@ ztest_vdev_attach_detach(ztest_args_t *za)
/*
* If someone grew the LUN, the replacement may be too small.
*/
- if (error == EOVERFLOW)
+ if (error == EOVERFLOW || error == EBUSY)
expected_error = error;
- if (error != expected_error) {
- fatal(0, "attach (%s, %s, %d) returned %d, expected %d",
- oldpath, newpath, replacing, error, expected_error);
+ /* XXX workaround 6690467 */
+ if (error != expected_error && expected_error != EBUSY) {
+ fatal(0, "attach (%s %llu, %s %llu, %d) "
+ "returned %d, expected %d",
+ oldpath, (longlong_t)oldsize, newpath,
+ (longlong_t)newsize, replacing, error, expected_error);
}
(void) mutex_unlock(&ztest_shared->zs_vdev_lock);
@@ -1045,7 +1150,7 @@ ztest_vdev_attach_detach(ztest_args_t *za)
void
ztest_vdev_LUN_growth(ztest_args_t *za)
{
- spa_t *spa = dmu_objset_spa(za->za_os);
+ spa_t *spa = za->za_spa;
char dev_name[MAXPATHLEN];
uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
uint64_t vdev;
@@ -1057,9 +1162,9 @@ ztest_vdev_LUN_growth(ztest_args_t *za)
/*
* Pick a random leaf vdev.
*/
- spa_config_enter(spa, RW_READER, FTAG);
+ spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
vdev = ztest_random(spa->spa_root_vdev->vdev_children * leaves);
- spa_config_exit(spa, FTAG);
+ spa_config_exit(spa, SCL_VDEV, FTAG);
(void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
@@ -1088,14 +1193,14 @@ ztest_vdev_LUN_growth(ztest_args_t *za)
/* ARGSUSED */
static void
-ztest_create_cb(objset_t *os, void *arg, dmu_tx_t *tx)
+ztest_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
{
/*
* Create the directory object.
*/
VERIFY(dmu_object_claim(os, ZTEST_DIROBJ,
DMU_OT_UINT64_OTHER, ZTEST_DIROBJ_BLOCKSIZE,
- DMU_OT_UINT64_OTHER, sizeof (ztest_block_tag_t), tx) == 0);
+ DMU_OT_UINT64_OTHER, 5 * sizeof (ztest_block_tag_t), tx) == 0);
VERIFY(zap_create_claim(os, ZTEST_MICROZAP_OBJ,
DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0);
@@ -1104,26 +1209,26 @@ ztest_create_cb(objset_t *os, void *arg, dmu_tx_t *tx)
DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0);
}
-/* ARGSUSED */
static int
ztest_destroy_cb(char *name, void *arg)
{
+ ztest_args_t *za = arg;
objset_t *os;
- dmu_object_info_t doi;
+ dmu_object_info_t *doi = &za->za_doi;
int error;
/*
* Verify that the dataset contains a directory object.
*/
error = dmu_objset_open(name, DMU_OST_OTHER,
- DS_MODE_STANDARD | DS_MODE_READONLY, &os);
+ DS_MODE_USER | DS_MODE_READONLY, &os);
ASSERT3U(error, ==, 0);
- error = dmu_object_info(os, ZTEST_DIROBJ, &doi);
+ error = dmu_object_info(os, ZTEST_DIROBJ, doi);
if (error != ENOENT) {
/* We could have crashed in the middle of destroying it */
ASSERT3U(error, ==, 0);
- ASSERT3U(doi.doi_type, ==, DMU_OT_UINT64_OTHER);
- ASSERT3S(doi.doi_physical_blks, >=, 0);
+ ASSERT3U(doi->doi_type, ==, DMU_OT_UINT64_OTHER);
+ ASSERT3S(doi->doi_physical_blks, >=, 0);
}
dmu_objset_close(os);
@@ -1131,7 +1236,11 @@ ztest_destroy_cb(char *name, void *arg)
* Destroy the dataset.
*/
error = dmu_objset_destroy(name);
- ASSERT3U(error, ==, 0);
+ if (error) {
+ (void) dmu_objset_open(name, DMU_OST_OTHER,
+ DS_MODE_USER | DS_MODE_READONLY, &os);
+ fatal(0, "dmu_objset_destroy(os=%p) = %d\n", &os, error);
+ }
return (0);
}
@@ -1171,9 +1280,9 @@ void
ztest_dmu_objset_create_destroy(ztest_args_t *za)
{
int error;
- objset_t *os;
+ objset_t *os, *os2;
char name[100];
- int mode, basemode, expected_error;
+ int basemode, expected_error;
zilog_t *zilog;
uint64_t seq;
uint64_t objects;
@@ -1183,9 +1292,9 @@ ztest_dmu_objset_create_destroy(ztest_args_t *za)
(void) snprintf(name, 100, "%s/%s_temp_%llu", za->za_pool, za->za_pool,
(u_longlong_t)za->za_instance);
- basemode = DS_MODE_LEVEL(za->za_instance);
- if (basemode == DS_MODE_NONE)
- basemode++;
+ basemode = DS_MODE_TYPE(za->za_instance);
+ if (basemode != DS_MODE_USER && basemode != DS_MODE_OWNER)
+ basemode = DS_MODE_USER;
/*
* If this dataset exists from a previous run, process its replay log
@@ -1193,9 +1302,9 @@ ztest_dmu_objset_create_destroy(ztest_args_t *za)
* (invoked from ztest_destroy_cb() below) should just throw it away.
*/
if (ztest_random(2) == 0 &&
- dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_PRIMARY, &os) == 0) {
+ dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os) == 0) {
zr.zr_os = os;
- zil_replay(os, &zr, &zr.zr_assign, ztest_replay_vector);
+ zil_replay(os, &zr, &zr.zr_assign, ztest_replay_vector, NULL);
dmu_objset_close(os);
}
@@ -1204,7 +1313,7 @@ ztest_dmu_objset_create_destroy(ztest_args_t *za)
* create lying around from a previous run. If so, destroy it
* and all of its snapshots.
*/
- (void) dmu_objset_find(name, ztest_destroy_cb, NULL,
+ (void) dmu_objset_find(name, ztest_destroy_cb, za,
DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
/*
@@ -1218,8 +1327,8 @@ ztest_dmu_objset_create_destroy(ztest_args_t *za)
/*
* Verify that we can create a new dataset.
*/
- error = dmu_objset_create(name, DMU_OST_OTHER, NULL, ztest_create_cb,
- NULL);
+ error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0,
+ ztest_create_cb, NULL);
if (error) {
if (error == ENOSPC) {
ztest_record_enospc("dmu_objset_create");
@@ -1274,26 +1383,29 @@ ztest_dmu_objset_create_destroy(ztest_args_t *za)
/*
* Verify that we cannot create an existing dataset.
*/
- error = dmu_objset_create(name, DMU_OST_OTHER, NULL, NULL, NULL);
+ error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0, NULL, NULL);
if (error != EEXIST)
fatal(0, "created existing dataset, error = %d", error);
/*
- * Verify that multiple dataset opens are allowed, but only when
+ * Verify that multiple dataset holds are allowed, but only when
* the new access mode is compatible with the base mode.
- * We use a mixture of typed and typeless opens, and when the
- * open succeeds, verify that the discovered type is correct.
- */
- for (mode = DS_MODE_STANDARD; mode < DS_MODE_LEVELS; mode++) {
- objset_t *os2;
- error = dmu_objset_open(name, DMU_OST_OTHER, mode, &os2);
- expected_error = (basemode + mode < DS_MODE_LEVELS) ? 0 : EBUSY;
- if (error != expected_error)
- fatal(0, "dmu_objset_open('%s') = %d, expected %d",
- name, error, expected_error);
- if (error == 0)
+ */
+ if (basemode == DS_MODE_OWNER) {
+ error = dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_USER,
+ &os2);
+ if (error)
+ fatal(0, "dmu_objset_open('%s') = %d", name, error);
+ else
dmu_objset_close(os2);
}
+ error = dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os2);
+ expected_error = (basemode == DS_MODE_OWNER) ? EBUSY : 0;
+ if (error != expected_error)
+ fatal(0, "dmu_objset_open('%s') = %d, expected %d",
+ name, error, expected_error);
+ if (error == 0)
+ dmu_objset_close(os2);
zil_close(zilog);
dmu_objset_close(os);
@@ -1417,7 +1529,7 @@ ztest_blk_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
void
ztest_traverse(ztest_args_t *za)
{
- spa_t *spa = dmu_objset_spa(za->za_os);
+ spa_t *spa = za->za_spa;
traverse_handle_t *th = za->za_th;
int rc, advance;
uint64_t cbstart, cblimit;
@@ -1489,7 +1601,7 @@ ztest_dmu_object_alloc_free(ztest_args_t *za)
dmu_tx_t *tx;
uint64_t batchobj, object, batchsize, endoff, temp;
int b, c, error, bonuslen;
- dmu_object_info_t doi;
+ dmu_object_info_t *doi = &za->za_doi;
char osname[MAXNAMELEN];
dmu_objset_name(os, osname);
@@ -1500,7 +1612,7 @@ ztest_dmu_object_alloc_free(ztest_args_t *za)
/*
* Create a batch object if necessary, and record it in the directory.
*/
- VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
+ VERIFY3U(0, ==, dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
sizeof (uint64_t), &batchobj));
if (batchobj == 0) {
tx = dmu_tx_create(os);
@@ -1525,7 +1637,7 @@ ztest_dmu_object_alloc_free(ztest_args_t *za)
* Destroy the previous batch of objects.
*/
for (b = 0; b < batchsize; b++) {
- VERIFY(0 == dmu_read(os, batchobj, b * sizeof (uint64_t),
+ VERIFY3U(0, ==, dmu_read(os, batchobj, b * sizeof (uint64_t),
sizeof (uint64_t), &object));
if (object == 0)
continue;
@@ -1534,13 +1646,14 @@ ztest_dmu_object_alloc_free(ztest_args_t *za)
* We expect the nth byte of the bonus buffer to be n.
*/
VERIFY(0 == dmu_bonus_hold(os, object, FTAG, &db));
+ za->za_dbuf = db;
- dmu_object_info_from_db(db, &doi);
- ASSERT(doi.doi_type == DMU_OT_UINT64_OTHER);
- ASSERT(doi.doi_bonus_type == DMU_OT_PLAIN_OTHER);
- ASSERT3S(doi.doi_physical_blks, >=, 0);
+ dmu_object_info_from_db(db, doi);
+ ASSERT(doi->doi_type == DMU_OT_UINT64_OTHER);
+ ASSERT(doi->doi_bonus_type == DMU_OT_PLAIN_OTHER);
+ ASSERT3S(doi->doi_physical_blks, >=, 0);
- bonuslen = db->db_size;
+ bonuslen = doi->doi_bonus_size;
for (c = 0; c < bonuslen; c++) {
if (((uint8_t *)db->db_data)[c] !=
@@ -1554,6 +1667,7 @@ ztest_dmu_object_alloc_free(ztest_args_t *za)
}
dmu_buf_rele(db, FTAG);
+ za->za_dbuf = NULL;
/*
* We expect the word at endoff to be our object number.
@@ -1658,8 +1772,9 @@ ztest_dmu_object_alloc_free(ztest_args_t *za)
/*
* Write to both the bonus buffer and the regular data.
*/
- VERIFY(0 == dmu_bonus_hold(os, object, FTAG, &db));
- ASSERT3U(bonuslen, ==, db->db_size);
+ VERIFY(dmu_bonus_hold(os, object, FTAG, &db) == 0);
+ za->za_dbuf = db;
+ ASSERT3U(bonuslen, <=, db->db_size);
dmu_object_size_from_db(db, &va_blksize, &va_nblocks);
ASSERT3S(va_nblocks, >=, 0);
@@ -1670,10 +1785,11 @@ ztest_dmu_object_alloc_free(ztest_args_t *za)
* See comments above regarding the contents of
* the bonus buffer and the word at endoff.
*/
- for (c = 0; c < db->db_size; c++)
+ for (c = 0; c < bonuslen; c++)
((uint8_t *)db->db_data)[c] = (uint8_t)(c + bonuslen);
dmu_buf_rele(db, FTAG);
+ za->za_dbuf = NULL;
/*
* Write to a large offset to increase indirection.
@@ -1928,226 +2044,240 @@ ztest_dmu_read_write(ztest_args_t *za)
}
void
-ztest_dmu_check_future_leak(objset_t *os, uint64_t txg)
+ztest_dmu_check_future_leak(ztest_args_t *za)
{
+ objset_t *os = za->za_os;
dmu_buf_t *db;
- ztest_block_tag_t rbt;
-
- if (zopt_verbose >= 3) {
- char osname[MAXNAMELEN];
- dmu_objset_name(os, osname);
- (void) printf("checking %s for future leaks in txg %lld...\n",
- osname, (u_longlong_t)txg);
- }
+ ztest_block_tag_t *bt;
+ dmu_object_info_t *doi = &za->za_doi;
/*
* Make sure that, if there is a write record in the bonus buffer
* of the ZTEST_DIROBJ, that the txg for this record is <= the
* last synced txg of the pool.
*/
-
- VERIFY(0 == dmu_bonus_hold(os, ZTEST_DIROBJ, FTAG, &db));
- ASSERT3U(db->db_size, ==, sizeof (rbt));
- bcopy(db->db_data, &rbt, db->db_size);
- if (rbt.bt_objset != 0) {
- ASSERT3U(rbt.bt_objset, ==, dmu_objset_id(os));
- ASSERT3U(rbt.bt_object, ==, ZTEST_DIROBJ);
- ASSERT3U(rbt.bt_offset, ==, -1ULL);
- if (rbt.bt_txg > txg) {
- fatal(0,
- "future leak: got %llx, last synced txg is %llx",
- rbt.bt_txg, txg);
- }
+ VERIFY(dmu_bonus_hold(os, ZTEST_DIROBJ, FTAG, &db) == 0);
+ za->za_dbuf = db;
+ VERIFY(dmu_object_info(os, ZTEST_DIROBJ, doi) == 0);
+ ASSERT3U(doi->doi_bonus_size, >=, sizeof (*bt));
+ ASSERT3U(doi->doi_bonus_size, <=, db->db_size);
+ ASSERT3U(doi->doi_bonus_size % sizeof (*bt), ==, 0);
+ bt = (void *)((char *)db->db_data + doi->doi_bonus_size - sizeof (*bt));
+ if (bt->bt_objset != 0) {
+ ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os));
+ ASSERT3U(bt->bt_object, ==, ZTEST_DIROBJ);
+ ASSERT3U(bt->bt_offset, ==, -1ULL);
+ ASSERT3U(bt->bt_txg, <, spa_first_txg(za->za_spa));
}
dmu_buf_rele(db, FTAG);
+ za->za_dbuf = NULL;
}
void
ztest_dmu_write_parallel(ztest_args_t *za)
{
objset_t *os = za->za_os;
- dmu_tx_t *tx;
+ ztest_block_tag_t *rbt = &za->za_rbt;
+ ztest_block_tag_t *wbt = &za->za_wbt;
+ const size_t btsize = sizeof (ztest_block_tag_t);
dmu_buf_t *db;
- int i, b, error, do_free, bs;
- uint64_t off, txg_how, txg;
+ int b, error;
+ int bs = ZTEST_DIROBJ_BLOCKSIZE;
+ int do_free = 0;
+ uint64_t off, txg, txg_how;
mutex_t *lp;
char osname[MAXNAMELEN];
char iobuf[SPA_MAXBLOCKSIZE];
- ztest_block_tag_t rbt, wbt;
+ blkptr_t blk = { 0 };
+ uint64_t blkoff;
+ zbookmark_t zb;
+ dmu_tx_t *tx = dmu_tx_create(os);
dmu_objset_name(os, osname);
- bs = ZTEST_DIROBJ_BLOCKSIZE;
/*
* Have multiple threads write to large offsets in ZTEST_DIROBJ
* to verify that having multiple threads writing to the same object
* in parallel doesn't cause any trouble.
- * Also do parallel writes to the bonus buffer on occasion.
*/
- for (i = 0; i < 50; i++) {
+ if (ztest_random(4) == 0) {
+ /*
+ * Do the bonus buffer instead of a regular block.
+ * We need a lock to serialize resize vs. others,
+ * so we hash on the objset ID.
+ */
+ b = dmu_objset_id(os) % ZTEST_SYNC_LOCKS;
+ off = -1ULL;
+ dmu_tx_hold_bonus(tx, ZTEST_DIROBJ);
+ } else {
b = ztest_random(ZTEST_SYNC_LOCKS);
- lp = &ztest_shared->zs_sync_lock[b];
-
- do_free = (ztest_random(4) == 0);
-
- off = za->za_diroff_shared + ((uint64_t)b << SPA_MAXBLOCKSHIFT);
-
+ off = za->za_diroff_shared + (b << SPA_MAXBLOCKSHIFT);
if (ztest_random(4) == 0) {
- /*
- * Do the bonus buffer instead of a regular block.
- */
- do_free = 0;
- off = -1ULL;
- }
-
- tx = dmu_tx_create(os);
-
- if (off == -1ULL)
- dmu_tx_hold_bonus(tx, ZTEST_DIROBJ);
- else if (do_free)
+ do_free = 1;
dmu_tx_hold_free(tx, ZTEST_DIROBJ, off, bs);
- else
+ } else {
dmu_tx_hold_write(tx, ZTEST_DIROBJ, off, bs);
+ }
+ }
- txg_how = ztest_random(2) == 0 ? TXG_WAIT : TXG_NOWAIT;
- error = dmu_tx_assign(tx, txg_how);
- if (error) {
- if (error == ERESTART) {
- ASSERT(txg_how == TXG_NOWAIT);
- dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- continue;
- }
- dmu_tx_abort(tx);
+ txg_how = ztest_random(2) == 0 ? TXG_WAIT : TXG_NOWAIT;
+ error = dmu_tx_assign(tx, txg_how);
+ if (error) {
+ if (error == ERESTART) {
+ ASSERT(txg_how == TXG_NOWAIT);
+ dmu_tx_wait(tx);
+ } else {
ztest_record_enospc("dmu write parallel");
- return;
}
- txg = dmu_tx_get_txg(tx);
+ dmu_tx_abort(tx);
+ return;
+ }
+ txg = dmu_tx_get_txg(tx);
- if (do_free) {
- (void) mutex_lock(lp);
- VERIFY(0 == dmu_free_range(os, ZTEST_DIROBJ, off,
- bs, tx));
- (void) mutex_unlock(lp);
- dmu_tx_commit(tx);
- continue;
+ lp = &ztest_shared->zs_sync_lock[b];
+ (void) mutex_lock(lp);
+
+ wbt->bt_objset = dmu_objset_id(os);
+ wbt->bt_object = ZTEST_DIROBJ;
+ wbt->bt_offset = off;
+ wbt->bt_txg = txg;
+ wbt->bt_thread = za->za_instance;
+ wbt->bt_seq = ztest_shared->zs_seq[b]++; /* protected by lp */
+
+ /*
+ * Occasionally, write an all-zero block to test the behavior
+ * of blocks that compress into holes.
+ */
+ if (off != -1ULL && ztest_random(8) == 0)
+ bzero(wbt, btsize);
+
+ if (off == -1ULL) {
+ dmu_object_info_t *doi = &za->za_doi;
+ char *dboff;
+
+ VERIFY(dmu_bonus_hold(os, ZTEST_DIROBJ, FTAG, &db) == 0);
+ za->za_dbuf = db;
+ dmu_object_info_from_db(db, doi);
+ ASSERT3U(doi->doi_bonus_size, <=, db->db_size);
+ ASSERT3U(doi->doi_bonus_size, >=, btsize);
+ ASSERT3U(doi->doi_bonus_size % btsize, ==, 0);
+ dboff = (char *)db->db_data + doi->doi_bonus_size - btsize;
+ bcopy(dboff, rbt, btsize);
+ if (rbt->bt_objset != 0) {
+ ASSERT3U(rbt->bt_objset, ==, wbt->bt_objset);
+ ASSERT3U(rbt->bt_object, ==, wbt->bt_object);
+ ASSERT3U(rbt->bt_offset, ==, wbt->bt_offset);
+ ASSERT3U(rbt->bt_txg, <=, wbt->bt_txg);
}
-
- wbt.bt_objset = dmu_objset_id(os);
- wbt.bt_object = ZTEST_DIROBJ;
- wbt.bt_offset = off;
- wbt.bt_txg = txg;
- wbt.bt_thread = za->za_instance;
-
- if (off == -1ULL) {
- wbt.bt_seq = 0;
- VERIFY(0 == dmu_bonus_hold(os, ZTEST_DIROBJ,
- FTAG, &db));
- ASSERT3U(db->db_size, ==, sizeof (wbt));
- bcopy(db->db_data, &rbt, db->db_size);
- if (rbt.bt_objset != 0) {
- ASSERT3U(rbt.bt_objset, ==, wbt.bt_objset);
- ASSERT3U(rbt.bt_object, ==, wbt.bt_object);
- ASSERT3U(rbt.bt_offset, ==, wbt.bt_offset);
- ASSERT3U(rbt.bt_txg, <=, wbt.bt_txg);
- }
- dmu_buf_will_dirty(db, tx);
- bcopy(&wbt, db->db_data, db->db_size);
- dmu_buf_rele(db, FTAG);
- dmu_tx_commit(tx);
- continue;
+ if (ztest_random(10) == 0) {
+ int newsize = (ztest_random(db->db_size /
+ btsize) + 1) * btsize;
+
+ ASSERT3U(newsize, >=, btsize);
+ ASSERT3U(newsize, <=, db->db_size);
+ VERIFY3U(dmu_set_bonus(db, newsize, tx), ==, 0);
+ dboff = (char *)db->db_data + newsize - btsize;
}
+ dmu_buf_will_dirty(db, tx);
+ bcopy(wbt, dboff, btsize);
+ dmu_buf_rele(db, FTAG);
+ za->za_dbuf = NULL;
+ } else if (do_free) {
+ VERIFY(dmu_free_range(os, ZTEST_DIROBJ, off, bs, tx) == 0);
+ } else {
+ dmu_write(os, ZTEST_DIROBJ, off, btsize, wbt, tx);
+ }
- (void) mutex_lock(lp);
+ (void) mutex_unlock(lp);
- wbt.bt_seq = ztest_shared->zs_seq[b]++;
+ if (ztest_random(1000) == 0)
+ (void) poll(NULL, 0, 1); /* open dn_notxholds window */
- dmu_write(os, ZTEST_DIROBJ, off, sizeof (wbt), &wbt, tx);
+ dmu_tx_commit(tx);
+
+ if (ztest_random(10000) == 0)
+ txg_wait_synced(dmu_objset_pool(os), txg);
+
+ if (off == -1ULL || do_free)
+ return;
+ if (ztest_random(2) != 0)
+ return;
+
+ /*
+ * dmu_sync() the block we just wrote.
+ */
+ (void) mutex_lock(lp);
+
+ blkoff = P2ALIGN_TYPED(off, bs, uint64_t);
+ error = dmu_buf_hold(os, ZTEST_DIROBJ, blkoff, FTAG, &db);
+ za->za_dbuf = db;
+ if (error) {
+ dprintf("dmu_buf_hold(%s, %d, %llx) = %d\n",
+ osname, ZTEST_DIROBJ, blkoff, error);
(void) mutex_unlock(lp);
+ return;
+ }
+ blkoff = off - blkoff;
+ error = dmu_sync(NULL, db, &blk, txg, NULL, NULL);
+ dmu_buf_rele(db, FTAG);
+ za->za_dbuf = NULL;
- if (ztest_random(100) == 0)
- (void) poll(NULL, 0, 1); /* open dn_notxholds window */
+ (void) mutex_unlock(lp);
- dmu_tx_commit(tx);
+ if (error) {
+ dprintf("dmu_sync(%s, %d, %llx) = %d\n",
+ osname, ZTEST_DIROBJ, off, error);
+ return;
+ }
- if (ztest_random(1000) == 0)
- txg_wait_synced(dmu_objset_pool(os), txg);
-
- if (ztest_random(2) == 0) {
- blkptr_t blk = { 0 };
- uint64_t blkoff;
- zbookmark_t zb;
-
- (void) mutex_lock(lp);
- blkoff = P2ALIGN_TYPED(off, bs, uint64_t);
- error = dmu_buf_hold(os,
- ZTEST_DIROBJ, blkoff, FTAG, &db);
- if (error) {
- dprintf("dmu_buf_hold(%s, %d, %llx) = %d\n",
- osname, ZTEST_DIROBJ, blkoff, error);
- (void) mutex_unlock(lp);
- continue;
- }
- blkoff = off - blkoff;
- error = dmu_sync(NULL, db, &blk, txg, NULL, NULL);
- dmu_buf_rele(db, FTAG);
- (void) mutex_unlock(lp);
- if (error) {
- dprintf("dmu_sync(%s, %d, %llx) = %d\n",
- osname, ZTEST_DIROBJ, off, error);
- continue;
- }
+ if (blk.blk_birth == 0) /* concurrent free */
+ return;
- if (blk.blk_birth == 0) { /* concurrent free */
- continue;
- }
- txg_suspend(dmu_objset_pool(os));
+ txg_suspend(dmu_objset_pool(os));
- ASSERT(blk.blk_fill == 1);
- ASSERT3U(BP_GET_TYPE(&blk), ==, DMU_OT_UINT64_OTHER);
- ASSERT3U(BP_GET_LEVEL(&blk), ==, 0);
- ASSERT3U(BP_GET_LSIZE(&blk), ==, bs);
+ ASSERT(blk.blk_fill == 1);
+ ASSERT3U(BP_GET_TYPE(&blk), ==, DMU_OT_UINT64_OTHER);
+ ASSERT3U(BP_GET_LEVEL(&blk), ==, 0);
+ ASSERT3U(BP_GET_LSIZE(&blk), ==, bs);
- /*
- * Read the block that dmu_sync() returned to
- * make sure its contents match what we wrote.
- * We do this while still txg_suspend()ed to ensure
- * that the block can't be reused before we read it.
- */
- zb.zb_objset = dmu_objset_id(os);
- zb.zb_object = ZTEST_DIROBJ;
- zb.zb_level = 0;
- zb.zb_blkid = off / bs;
- error = zio_wait(zio_read(NULL, dmu_objset_spa(os),
- &blk, iobuf, bs, NULL, NULL,
- ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_MUSTSUCCEED, &zb));
- ASSERT(error == 0);
+ /*
+ * Read the block that dmu_sync() returned to make sure its contents
+ * match what we wrote. We do this while still txg_suspend()ed
+ * to ensure that the block can't be reused before we read it.
+ */
+ zb.zb_objset = dmu_objset_id(os);
+ zb.zb_object = ZTEST_DIROBJ;
+ zb.zb_level = 0;
+ zb.zb_blkid = off / bs;
+ error = zio_wait(zio_read(NULL, za->za_spa, &blk, iobuf, bs,
+ NULL, NULL, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_MUSTSUCCEED, &zb));
+ ASSERT3U(error, ==, 0);
- txg_resume(dmu_objset_pool(os));
+ txg_resume(dmu_objset_pool(os));
- bcopy(&iobuf[blkoff], &rbt, sizeof (rbt));
+ bcopy(&iobuf[blkoff], rbt, btsize);
- if (rbt.bt_objset == 0) /* concurrent free */
- continue;
+ if (rbt->bt_objset == 0) /* concurrent free */
+ return;
- ASSERT3U(rbt.bt_objset, ==, wbt.bt_objset);
- ASSERT3U(rbt.bt_object, ==, wbt.bt_object);
- ASSERT3U(rbt.bt_offset, ==, wbt.bt_offset);
+ if (wbt->bt_objset == 0) /* all-zero overwrite */
+ return;
- /*
- * The semantic of dmu_sync() is that we always
- * push the most recent version of the data,
- * so in the face of concurrent updates we may
- * see a newer version of the block. That's OK.
- */
- ASSERT3U(rbt.bt_txg, >=, wbt.bt_txg);
- if (rbt.bt_thread == wbt.bt_thread)
- ASSERT3U(rbt.bt_seq, ==, wbt.bt_seq);
- else
- ASSERT3U(rbt.bt_seq, >, wbt.bt_seq);
- }
- }
+ ASSERT3U(rbt->bt_objset, ==, wbt->bt_objset);
+ ASSERT3U(rbt->bt_object, ==, wbt->bt_object);
+ ASSERT3U(rbt->bt_offset, ==, wbt->bt_offset);
+
+ /*
+ * The semantic of dmu_sync() is that we always push the most recent
+ * version of the data, so in the face of concurrent updates we may
+ * see a newer version of the block. That's OK.
+ */
+ ASSERT3U(rbt->bt_txg, >=, wbt->bt_txg);
+ if (rbt->bt_thread == wbt->bt_thread)
+ ASSERT3U(rbt->bt_seq, ==, wbt->bt_seq);
+ else
+ ASSERT3U(rbt->bt_seq, >, wbt->bt_seq);
}
/*
@@ -2166,7 +2296,6 @@ ztest_zap(ztest_args_t *za)
uint64_t value[ZTEST_ZAP_MAX_INTS];
uint64_t zl_ints, zl_intsize, prop;
int i, ints;
- int iters = 100;
dmu_tx_t *tx;
char propname[100], txgname[100];
int error;
@@ -2230,122 +2359,113 @@ ztest_zap(ztest_args_t *za)
ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS);
- while (--iters >= 0) {
- prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
- (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop);
- (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop);
- bzero(value, sizeof (value));
- last_txg = 0;
-
- /*
- * If these zap entries already exist, validate their contents.
- */
- error = zap_length(os, object, txgname, &zl_intsize, &zl_ints);
- if (error == 0) {
- ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
- ASSERT3U(zl_ints, ==, 1);
-
- error = zap_lookup(os, object, txgname, zl_intsize,
- zl_ints, &last_txg);
+ prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
+ (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop);
+ (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop);
+ bzero(value, sizeof (value));
+ last_txg = 0;
- ASSERT3U(error, ==, 0);
+ /*
+ * If these zap entries already exist, validate their contents.
+ */
+ error = zap_length(os, object, txgname, &zl_intsize, &zl_ints);
+ if (error == 0) {
+ ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
+ ASSERT3U(zl_ints, ==, 1);
- error = zap_length(os, object, propname, &zl_intsize,
- &zl_ints);
+ VERIFY(zap_lookup(os, object, txgname, zl_intsize,
+ zl_ints, &last_txg) == 0);
- ASSERT3U(error, ==, 0);
- ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
- ASSERT3U(zl_ints, ==, ints);
+ VERIFY(zap_length(os, object, propname, &zl_intsize,
+ &zl_ints) == 0);
- error = zap_lookup(os, object, propname, zl_intsize,
- zl_ints, value);
+ ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
+ ASSERT3U(zl_ints, ==, ints);
- ASSERT3U(error, ==, 0);
+ VERIFY(zap_lookup(os, object, propname, zl_intsize,
+ zl_ints, value) == 0);
- for (i = 0; i < ints; i++) {
- ASSERT3U(value[i], ==, last_txg + object + i);
- }
- } else {
- ASSERT3U(error, ==, ENOENT);
+ for (i = 0; i < ints; i++) {
+ ASSERT3U(value[i], ==, last_txg + object + i);
}
+ } else {
+ ASSERT3U(error, ==, ENOENT);
+ }
- /*
- * Atomically update two entries in our zap object.
- * The first is named txg_%llu, and contains the txg
- * in which the property was last updated. The second
- * is named prop_%llu, and the nth element of its value
- * should be txg + object + n.
- */
- tx = dmu_tx_create(os);
- dmu_tx_hold_zap(tx, object, TRUE, NULL);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("create zap entry");
- dmu_tx_abort(tx);
- return;
- }
- txg = dmu_tx_get_txg(tx);
+ /*
+ * Atomically update two entries in our zap object.
+ * The first is named txg_%llu, and contains the txg
+ * in which the property was last updated. The second
+ * is named prop_%llu, and the nth element of its value
+ * should be txg + object + n.
+ */
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_zap(tx, object, TRUE, NULL);
+ error = dmu_tx_assign(tx, TXG_WAIT);
+ if (error) {
+ ztest_record_enospc("create zap entry");
+ dmu_tx_abort(tx);
+ return;
+ }
+ txg = dmu_tx_get_txg(tx);
- if (last_txg > txg)
- fatal(0, "zap future leak: old %llu new %llu",
- last_txg, txg);
+ if (last_txg > txg)
+ fatal(0, "zap future leak: old %llu new %llu", last_txg, txg);
- for (i = 0; i < ints; i++)
- value[i] = txg + object + i;
+ for (i = 0; i < ints; i++)
+ value[i] = txg + object + i;
- error = zap_update(os, object, txgname, sizeof (uint64_t),
- 1, &txg, tx);
- if (error)
- fatal(0, "zap_update('%s', %llu, '%s') = %d",
- osname, object, txgname, error);
+ error = zap_update(os, object, txgname, sizeof (uint64_t), 1, &txg, tx);
+ if (error)
+ fatal(0, "zap_update('%s', %llu, '%s') = %d",
+ osname, object, txgname, error);
- error = zap_update(os, object, propname, sizeof (uint64_t),
- ints, value, tx);
- if (error)
- fatal(0, "zap_update('%s', %llu, '%s') = %d",
- osname, object, propname, error);
+ error = zap_update(os, object, propname, sizeof (uint64_t),
+ ints, value, tx);
+ if (error)
+ fatal(0, "zap_update('%s', %llu, '%s') = %d",
+ osname, object, propname, error);
- dmu_tx_commit(tx);
+ dmu_tx_commit(tx);
- /*
- * Remove a random pair of entries.
- */
- prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
- (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop);
- (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop);
+ /*
+ * Remove a random pair of entries.
+ */
+ prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
+ (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop);
+ (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop);
- error = zap_length(os, object, txgname, &zl_intsize, &zl_ints);
+ error = zap_length(os, object, txgname, &zl_intsize, &zl_ints);
- if (error == ENOENT)
- continue;
+ if (error == ENOENT)
+ return;
- ASSERT3U(error, ==, 0);
+ ASSERT3U(error, ==, 0);
- tx = dmu_tx_create(os);
- dmu_tx_hold_zap(tx, object, TRUE, NULL);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("remove zap entry");
- dmu_tx_abort(tx);
- return;
- }
- error = zap_remove(os, object, txgname, tx);
- if (error)
- fatal(0, "zap_remove('%s', %llu, '%s') = %d",
- osname, object, txgname, error);
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_zap(tx, object, TRUE, NULL);
+ error = dmu_tx_assign(tx, TXG_WAIT);
+ if (error) {
+ ztest_record_enospc("remove zap entry");
+ dmu_tx_abort(tx);
+ return;
+ }
+ error = zap_remove(os, object, txgname, tx);
+ if (error)
+ fatal(0, "zap_remove('%s', %llu, '%s') = %d",
+ osname, object, txgname, error);
- error = zap_remove(os, object, propname, tx);
- if (error)
- fatal(0, "zap_remove('%s', %llu, '%s') = %d",
- osname, object, propname, error);
+ error = zap_remove(os, object, propname, tx);
+ if (error)
+ fatal(0, "zap_remove('%s', %llu, '%s') = %d",
+ osname, object, propname, error);
- dmu_tx_commit(tx);
- }
+ dmu_tx_commit(tx);
/*
* Once in a while, destroy the object.
*/
- if (ztest_random(100) != 0)
+ if (ztest_random(1000) != 0)
return;
tx = dmu_tx_create(os);
@@ -2372,111 +2492,107 @@ ztest_zap_parallel(ztest_args_t *za)
{
objset_t *os = za->za_os;
uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc;
- int iters = 100;
dmu_tx_t *tx;
int i, namelen, error;
char name[20], string_value[20];
void *data;
- while (--iters >= 0) {
- /*
- * Generate a random name of the form 'xxx.....' where each
- * x is a random printable character and the dots are dots.
- * There are 94 such characters, and the name length goes from
- * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names.
- */
- namelen = ztest_random(sizeof (name) - 5) + 5 + 1;
+ /*
+ * Generate a random name of the form 'xxx.....' where each
+ * x is a random printable character and the dots are dots.
+ * There are 94 such characters, and the name length goes from
+ * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names.
+ */
+ namelen = ztest_random(sizeof (name) - 5) + 5 + 1;
- for (i = 0; i < 3; i++)
- name[i] = '!' + ztest_random('~' - '!' + 1);
- for (; i < namelen - 1; i++)
- name[i] = '.';
- name[i] = '\0';
+ for (i = 0; i < 3; i++)
+ name[i] = '!' + ztest_random('~' - '!' + 1);
+ for (; i < namelen - 1; i++)
+ name[i] = '.';
+ name[i] = '\0';
- if (ztest_random(2) == 0)
- object = ZTEST_MICROZAP_OBJ;
- else
- object = ZTEST_FATZAP_OBJ;
+ if (ztest_random(2) == 0)
+ object = ZTEST_MICROZAP_OBJ;
+ else
+ object = ZTEST_FATZAP_OBJ;
- if ((namelen & 1) || object == ZTEST_MICROZAP_OBJ) {
- wsize = sizeof (txg);
- wc = 1;
- data = &txg;
- } else {
- wsize = 1;
- wc = namelen;
- data = string_value;
- }
+ if ((namelen & 1) || object == ZTEST_MICROZAP_OBJ) {
+ wsize = sizeof (txg);
+ wc = 1;
+ data = &txg;
+ } else {
+ wsize = 1;
+ wc = namelen;
+ data = string_value;
+ }
- count = -1ULL;
- VERIFY(zap_count(os, object, &count) == 0);
- ASSERT(count != -1ULL);
+ count = -1ULL;
+ VERIFY(zap_count(os, object, &count) == 0);
+ ASSERT(count != -1ULL);
- /*
- * Select an operation: length, lookup, add, update, remove.
- */
- i = ztest_random(5);
-
- if (i >= 2) {
- tx = dmu_tx_create(os);
- dmu_tx_hold_zap(tx, object, TRUE, NULL);
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- ztest_record_enospc("zap parallel");
- dmu_tx_abort(tx);
- return;
- }
- txg = dmu_tx_get_txg(tx);
- bcopy(name, string_value, namelen);
- } else {
- tx = NULL;
- txg = 0;
- bzero(string_value, namelen);
- }
+ /*
+ * Select an operation: length, lookup, add, update, remove.
+ */
+ i = ztest_random(5);
- switch (i) {
+ if (i >= 2) {
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_zap(tx, object, TRUE, NULL);
+ error = dmu_tx_assign(tx, TXG_WAIT);
+ if (error) {
+ ztest_record_enospc("zap parallel");
+ dmu_tx_abort(tx);
+ return;
+ }
+ txg = dmu_tx_get_txg(tx);
+ bcopy(name, string_value, namelen);
+ } else {
+ tx = NULL;
+ txg = 0;
+ bzero(string_value, namelen);
+ }
- case 0:
- error = zap_length(os, object, name, &zl_wsize, &zl_wc);
- if (error == 0) {
- ASSERT3U(wsize, ==, zl_wsize);
- ASSERT3U(wc, ==, zl_wc);
- } else {
- ASSERT3U(error, ==, ENOENT);
- }
- break;
+ switch (i) {
- case 1:
- error = zap_lookup(os, object, name, wsize, wc, data);
- if (error == 0) {
- if (data == string_value &&
- bcmp(name, data, namelen) != 0)
- fatal(0, "name '%s' != val '%s' len %d",
- name, data, namelen);
- } else {
- ASSERT3U(error, ==, ENOENT);
- }
- break;
+ case 0:
+ error = zap_length(os, object, name, &zl_wsize, &zl_wc);
+ if (error == 0) {
+ ASSERT3U(wsize, ==, zl_wsize);
+ ASSERT3U(wc, ==, zl_wc);
+ } else {
+ ASSERT3U(error, ==, ENOENT);
+ }
+ break;
- case 2:
- error = zap_add(os, object, name, wsize, wc, data, tx);
- ASSERT(error == 0 || error == EEXIST);
- break;
+ case 1:
+ error = zap_lookup(os, object, name, wsize, wc, data);
+ if (error == 0) {
+ if (data == string_value &&
+ bcmp(name, data, namelen) != 0)
+ fatal(0, "name '%s' != val '%s' len %d",
+ name, data, namelen);
+ } else {
+ ASSERT3U(error, ==, ENOENT);
+ }
+ break;
- case 3:
- VERIFY(zap_update(os, object, name, wsize, wc,
- data, tx) == 0);
- break;
+ case 2:
+ error = zap_add(os, object, name, wsize, wc, data, tx);
+ ASSERT(error == 0 || error == EEXIST);
+ break;
- case 4:
- error = zap_remove(os, object, name, tx);
- ASSERT(error == 0 || error == ENOENT);
- break;
- }
+ case 3:
+ VERIFY(zap_update(os, object, name, wsize, wc, data, tx) == 0);
+ break;
- if (tx != NULL)
- dmu_tx_commit(tx);
+ case 4:
+ error = zap_remove(os, object, name, tx);
+ ASSERT(error == 0 || error == ENOENT);
+ break;
}
+
+ if (tx != NULL)
+ dmu_tx_commit(tx);
}
void
@@ -2532,21 +2648,6 @@ ztest_dsl_prop_get_set(ztest_args_t *za)
(void) rw_unlock(&ztest_shared->zs_name_lock);
}
-static void
-ztest_error_setup(vdev_t *vd, int mode, int mask, uint64_t arg)
-{
- int c;
-
- for (c = 0; c < vd->vdev_children; c++)
- ztest_error_setup(vd->vdev_child[c], mode, mask, arg);
-
- if (vd->vdev_path != NULL) {
- vd->vdev_fault_mode = mode;
- vd->vdev_fault_mask = mask;
- vd->vdev_fault_arg = arg;
- }
-}
-
/*
* Inject random faults into the on-disk data.
*/
@@ -2561,67 +2662,97 @@ ztest_fault_inject(ztest_args_t *za)
char path0[MAXPATHLEN];
char pathrand[MAXPATHLEN];
size_t fsize;
- spa_t *spa = dmu_objset_spa(za->za_os);
+ spa_t *spa = za->za_spa;
int bshift = SPA_MAXBLOCKSHIFT + 2; /* don't scrog all labels */
int iters = 1000;
- vdev_t *vd0;
+ int maxfaults = zopt_maxfaults;
+ vdev_t *vd0 = NULL;
uint64_t guid0 = 0;
- /*
- * We can't inject faults when we have no fault tolerance.
- */
- if (zopt_maxfaults == 0)
- return;
-
- ASSERT(leaves >= 2);
+ ASSERT(leaves >= 1);
/*
- * Pick a random top-level vdev.
+ * We need SCL_STATE here because we're going to look at vd0->vdev_tsd.
*/
- spa_config_enter(spa, RW_READER, FTAG);
- top = ztest_random(spa->spa_root_vdev->vdev_children);
- spa_config_exit(spa, FTAG);
+ spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
- /*
- * Pick a random leaf.
- */
- leaf = ztest_random(leaves);
+ if (ztest_random(2) == 0) {
+ /*
+ * Inject errors on a normal data device.
+ */
+ top = ztest_random(spa->spa_root_vdev->vdev_children);
+ leaf = ztest_random(leaves);
- /*
- * Generate paths to the first two leaves in this top-level vdev,
- * and to the random leaf we selected. We'll induce transient
- * I/O errors and random online/offline activity on leaf 0,
- * and we'll write random garbage to the randomly chosen leaf.
- */
- (void) snprintf(path0, sizeof (path0),
- ztest_dev_template, zopt_dir, zopt_pool, top * leaves + 0);
- (void) snprintf(pathrand, sizeof (pathrand),
- ztest_dev_template, zopt_dir, zopt_pool, top * leaves + leaf);
+ /*
+ * Generate paths to the first leaf in this top-level vdev,
+ * and to the random leaf we selected. We'll induce transient
+ * write failures and random online/offline activity on leaf 0,
+ * and we'll write random garbage to the randomly chosen leaf.
+ */
+ (void) snprintf(path0, sizeof (path0), ztest_dev_template,
+ zopt_dir, zopt_pool, top * leaves + 0);
+ (void) snprintf(pathrand, sizeof (pathrand), ztest_dev_template,
+ zopt_dir, zopt_pool, top * leaves + leaf);
- dprintf("damaging %s and %s\n", path0, pathrand);
+ vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0);
+ if (vd0 != NULL && maxfaults != 1) {
+ /*
+ * Make vd0 explicitly claim to be unreadable,
+ * or unwriteable, or reach behind its back
+ * and close the underlying fd. We can do this if
+ * maxfaults == 0 because we'll fail and reexecute,
+ * and we can do it if maxfaults >= 2 because we'll
+ * have enough redundancy. If maxfaults == 1, the
+ * combination of this with injection of random data
+ * corruption below exceeds the pool's fault tolerance.
+ */
+ vdev_file_t *vf = vd0->vdev_tsd;
- spa_config_enter(spa, RW_READER, FTAG);
+ if (vf != NULL && ztest_random(3) == 0) {
+ (void) close(vf->vf_vnode->v_fd);
+ vf->vf_vnode->v_fd = -1;
+ } else if (ztest_random(2) == 0) {
+ vd0->vdev_cant_read = B_TRUE;
+ } else {
+ vd0->vdev_cant_write = B_TRUE;
+ }
+ guid0 = vd0->vdev_guid;
+ }
+ } else {
+ /*
+ * Inject errors on an l2cache device.
+ */
+ spa_aux_vdev_t *sav = &spa->spa_l2cache;
- /*
- * If we can tolerate two or more faults, make vd0 fail randomly.
- */
- vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0);
- if (vd0 != NULL && zopt_maxfaults >= 2) {
+ if (sav->sav_count == 0) {
+ spa_config_exit(spa, SCL_STATE, FTAG);
+ return;
+ }
+ vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)];
guid0 = vd0->vdev_guid;
- ztest_error_setup(vd0, VDEV_FAULT_COUNT,
- (1U << ZIO_TYPE_READ) | (1U << ZIO_TYPE_WRITE), 100);
+ (void) strcpy(path0, vd0->vdev_path);
+ (void) strcpy(pathrand, vd0->vdev_path);
+
+ leaf = 0;
+ leaves = 1;
+ maxfaults = INT_MAX; /* no limit on cache devices */
}
- spa_config_exit(spa, FTAG);
+ dprintf("damaging %s and %s\n", path0, pathrand);
+
+ spa_config_exit(spa, SCL_STATE, FTAG);
+
+ if (maxfaults == 0)
+ return;
/*
* If we can tolerate two or more faults, randomly online/offline vd0.
*/
- if (zopt_maxfaults >= 2 && guid0 != 0) {
+ if (maxfaults >= 2 && guid0 != 0) {
if (ztest_random(10) < 6)
(void) vdev_offline(spa, guid0, B_TRUE);
else
- (void) vdev_online(spa, guid0);
+ (void) vdev_online(spa, guid0, B_FALSE, NULL);
}
/*
@@ -2660,11 +2791,11 @@ ztest_fault_inject(ztest_args_t *za)
void
ztest_scrub(ztest_args_t *za)
{
- spa_t *spa = dmu_objset_spa(za->za_os);
+ spa_t *spa = za->za_spa;
- (void) spa_scrub(spa, POOL_SCRUB_EVERYTHING, B_FALSE);
+ (void) spa_scrub(spa, POOL_SCRUB_EVERYTHING);
(void) poll(NULL, 0, 1000); /* wait a second, then force a restart */
- (void) spa_scrub(spa, POOL_SCRUB_EVERYTHING, B_FALSE);
+ (void) spa_scrub(spa, POOL_SCRUB_EVERYTHING);
}
/*
@@ -2706,7 +2837,7 @@ ztest_spa_rename(ztest_args_t *za)
if (error != 0)
fatal(0, "spa_open('%s') = %d", newname, error);
- ASSERT(spa == dmu_objset_spa(za->za_os));
+ ASSERT(spa == za->za_spa);
spa_close(spa, FTAG);
/*
@@ -2724,7 +2855,7 @@ ztest_spa_rename(ztest_args_t *za)
if (error != 0)
fatal(0, "spa_open('%s') = %d", oldname, error);
- ASSERT(spa == dmu_objset_spa(za->za_os));
+ ASSERT(spa == za->za_spa);
spa_close(spa, FTAG);
umem_free(newname, strlen(newname) + 1);
@@ -2778,10 +2909,9 @@ static void
ztest_replace_one_disk(spa_t *spa, uint64_t vdev)
{
char dev_name[MAXPATHLEN];
- nvlist_t *file, *root;
+ nvlist_t *root;
int error;
uint64_t guid;
- uint64_t ashift = ztest_get_ashift();
vdev_t *vd;
(void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
@@ -2789,22 +2919,14 @@ ztest_replace_one_disk(spa_t *spa, uint64_t vdev)
/*
* Build the nvlist describing dev_name.
*/
- VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
- VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
- VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, dev_name) == 0);
- VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
-
- VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0);
- VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0);
- VERIFY(nvlist_add_nvlist_array(root, ZPOOL_CONFIG_CHILDREN,
- &file, 1) == 0);
+ root = make_vdev_root(dev_name, NULL, 0, 0, 0, 0, 0, 1);
- spa_config_enter(spa, RW_READER, FTAG);
+ spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
if ((vd = vdev_lookup_by_path(spa->spa_root_vdev, dev_name)) == NULL)
guid = 0;
else
guid = vd->vdev_guid;
- spa_config_exit(spa, FTAG);
+ spa_config_exit(spa, SCL_VDEV, FTAG);
error = spa_vdev_attach(spa, guid, root, B_TRUE);
if (error != 0 &&
error != EBUSY &&
@@ -2813,7 +2935,6 @@ ztest_replace_one_disk(spa_t *spa, uint64_t vdev)
error != EDOM)
fatal(0, "spa_vdev_attach(in-place) = %d", error);
- nvlist_free(file);
nvlist_free(root);
}
@@ -2824,6 +2945,9 @@ ztest_verify_blocks(char *pool)
char zdb[MAXPATHLEN + MAXNAMELEN + 20];
char zbuf[1024];
char *bin;
+ char *ztest;
+ char *isa;
+ int isalen;
FILE *fp;
if (realpath(progname, zdb) == NULL)
@@ -2831,13 +2955,19 @@ ztest_verify_blocks(char *pool)
/* zdb lives in /usr/sbin, while ztest lives in /usr/bin */
bin = strstr(zdb, "/usr/bin/");
- if (bin == NULL)
- bin = zdb;
+ ztest = strstr(bin, "/ztest");
+ isa = bin + 8;
+ isalen = ztest - isa;
+ isa = strdup(isa);
/* LINTED */
- (void) sprintf(bin, "/usr/sbin/zdb -bc%s%s -U -O %s %s",
+ (void) sprintf(bin,
+ "/usr/sbin%.*s/zdb -bc%s%s -U /tmp/zpool.cache -O %s %s",
+ isalen,
+ isa,
zopt_verbose >= 3 ? "s" : "",
zopt_verbose >= 4 ? "v" : "",
ztest_random(2) == 0 ? "pre" : "post", pool);
+ free(isa);
if (zopt_verbose >= 5)
(void) printf("Executing %s\n", strstr(zdb, "zdb "));
@@ -2909,7 +3039,7 @@ ztest_spa_import_export(char *oldname, char *newname)
/*
* Export it.
*/
- error = spa_export(oldname, &config);
+ error = spa_export(oldname, &config, B_FALSE);
if (error)
fatal(0, "spa_export('%s') = %d", oldname, error);
@@ -2958,35 +3088,41 @@ ztest_spa_import_export(char *oldname, char *newname)
}
static void *
+ztest_resume(void *arg)
+{
+ spa_t *spa = arg;
+
+ while (!ztest_exiting) {
+ (void) poll(NULL, 0, 1000);
+
+ if (!spa_suspended(spa))
+ continue;
+
+ spa_vdev_state_enter(spa);
+ vdev_clear(spa, NULL);
+ (void) spa_vdev_state_exit(spa, NULL, 0);
+
+ zio_resume(spa);
+ }
+ return (NULL);
+}
+
+static void *
ztest_thread(void *arg)
{
ztest_args_t *za = arg;
ztest_shared_t *zs = ztest_shared;
hrtime_t now, functime;
ztest_info_t *zi;
- int f;
+ int f, i;
while ((now = gethrtime()) < za->za_stop) {
/*
* See if it's time to force a crash.
*/
if (now > za->za_kill) {
- dmu_tx_t *tx;
- uint64_t txg;
-
- mutex_enter(&spa_namespace_lock);
- tx = dmu_tx_create(za->za_os);
- VERIFY(0 == dmu_tx_assign(tx, TXG_NOWAIT));
- txg = dmu_tx_get_txg(tx);
- dmu_tx_commit(tx);
- zs->zs_txg = txg;
- if (zopt_verbose >= 3)
- (void) printf(
- "killing process after txg %lld\n",
- (u_longlong_t)txg);
- txg_wait_synced(dmu_objset_pool(za->za_os), txg);
- zs->zs_alloc = spa_get_alloc(dmu_objset_spa(za->za_os));
- zs->zs_space = spa_get_space(dmu_objset_spa(za->za_os));
+ zs->zs_alloc = spa_get_alloc(za->za_spa);
+ zs->zs_space = spa_get_space(za->za_spa);
(void) kill(getpid(), SIGKILL);
}
@@ -3011,9 +3147,8 @@ ztest_thread(void *arg)
ZTEST_DIRSIZE;
za->za_diroff_shared = (1ULL << 63);
- ztest_dmu_write_parallel(za);
-
- zi->zi_func(za);
+ for (i = 0; i < zi->zi_iters; i++)
+ zi->zi_func(za);
functime = gethrtime() - now;
@@ -3047,6 +3182,9 @@ ztest_run(char *pool)
ztest_args_t *za;
spa_t *spa;
char name[100];
+ thread_t resume_tid;
+
+ ztest_exiting = B_FALSE;
(void) _mutex_init(&zs->zs_vdev_lock, USYNC_THREAD, NULL);
(void) rwlock_init(&zs->zs_name_lock, USYNC_THREAD, NULL);
@@ -3071,9 +3209,7 @@ ztest_run(char *pool)
* Kick off a replacement of the disk we just obliterated.
*/
kernel_init(FREAD | FWRITE);
- error = spa_open(pool, &spa, FTAG);
- if (error)
- fatal(0, "spa_open(%s) = %d", pool, error);
+ VERIFY(spa_open(pool, &spa, FTAG) == 0);
ztest_replace_one_disk(spa, 0);
if (zopt_verbose >= 5)
show_pool_stats(spa);
@@ -3106,9 +3242,13 @@ ztest_run(char *pool)
/*
* Open our pool.
*/
- error = spa_open(pool, &spa, FTAG);
- if (error)
- fatal(0, "spa_open() = %d", error);
+ VERIFY(spa_open(pool, &spa, FTAG) == 0);
+
+ /*
+ * Create a thread to periodically resume suspended I/O.
+ */
+ VERIFY(thr_create(0, 0, ztest_resume, spa, THR_BOUND,
+ &resume_tid) == 0);
/*
* Verify that we can safely inquire about about any object,
@@ -3144,71 +3284,62 @@ ztest_run(char *pool)
for (t = 0; t < zopt_threads; t++) {
d = t % zopt_datasets;
+
+ (void) strcpy(za[t].za_pool, pool);
+ za[t].za_os = za[d].za_os;
+ za[t].za_spa = spa;
+ za[t].za_zilog = za[d].za_zilog;
+ za[t].za_instance = t;
+ za[t].za_random = ztest_random(-1ULL);
+ za[t].za_start = za[0].za_start;
+ za[t].za_stop = za[0].za_stop;
+ za[t].za_kill = za[0].za_kill;
+
if (t < zopt_datasets) {
ztest_replay_t zr;
int test_future = FALSE;
(void) rw_rdlock(&ztest_shared->zs_name_lock);
(void) snprintf(name, 100, "%s/%s_%d", pool, pool, d);
- error = dmu_objset_create(name, DMU_OST_OTHER, NULL,
+ error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0,
ztest_create_cb, NULL);
if (error == EEXIST) {
test_future = TRUE;
+ } else if (error == ENOSPC) {
+ zs->zs_enospc_count++;
+ (void) rw_unlock(&ztest_shared->zs_name_lock);
+ break;
} else if (error != 0) {
- if (error == ENOSPC) {
- zs->zs_enospc_count++;
- (void) rw_unlock(
- &ztest_shared->zs_name_lock);
- break;
- }
fatal(0, "dmu_objset_create(%s) = %d",
name, error);
}
error = dmu_objset_open(name, DMU_OST_OTHER,
- DS_MODE_STANDARD, &za[d].za_os);
+ DS_MODE_USER, &za[d].za_os);
if (error)
fatal(0, "dmu_objset_open('%s') = %d",
name, error);
(void) rw_unlock(&ztest_shared->zs_name_lock);
- if (test_future && ztest_shared->zs_txg > 0)
- ztest_dmu_check_future_leak(za[d].za_os,
- ztest_shared->zs_txg);
+ if (test_future)
+ ztest_dmu_check_future_leak(&za[t]);
zr.zr_os = za[d].za_os;
zil_replay(zr.zr_os, &zr, &zr.zr_assign,
- ztest_replay_vector);
+ ztest_replay_vector, NULL);
za[d].za_zilog = zil_open(za[d].za_os, NULL);
}
- za[t].za_pool = spa_strdup(pool);
- za[t].za_os = za[d].za_os;
- za[t].za_zilog = za[d].za_zilog;
- za[t].za_instance = t;
- za[t].za_random = ztest_random(-1ULL);
- za[t].za_start = za[0].za_start;
- za[t].za_stop = za[0].za_stop;
- za[t].za_kill = za[0].za_kill;
- error = thr_create(0, 0, ztest_thread, &za[t], THR_BOUND,
- &za[t].za_thread);
- if (error)
- fatal(0, "can't create thread %d: error %d",
- t, error);
+ VERIFY(thr_create(0, 0, ztest_thread, &za[t], THR_BOUND,
+ &za[t].za_thread) == 0);
}
- ztest_shared->zs_txg = 0;
while (--t >= 0) {
- error = thr_join(za[t].za_thread, NULL, NULL);
- if (error)
- fatal(0, "thr_join(%d) = %d", t, error);
+ VERIFY(thr_join(za[t].za_thread, NULL, NULL) == 0);
if (za[t].za_th)
traverse_fini(za[t].za_th);
if (t < zopt_datasets) {
zil_close(za[t].za_zilog);
dmu_objset_close(za[t].za_os);
}
- spa_strfree(za[t].za_pool);
}
- umem_free(za, zopt_threads * sizeof (ztest_args_t));
-
if (zopt_verbose >= 3)
show_pool_stats(spa);
@@ -3218,21 +3349,27 @@ ztest_run(char *pool)
zs->zs_space = spa_get_space(spa);
/*
- * Did we have out-of-space errors? If so, destroy a random objset.
+ * If we had out-of-space errors, destroy a random objset.
*/
if (zs->zs_enospc_count != 0) {
(void) rw_rdlock(&ztest_shared->zs_name_lock);
- (void) snprintf(name, 100, "%s/%s_%d", pool, pool,
- (int)ztest_random(zopt_datasets));
+ d = (int)ztest_random(zopt_datasets);
+ (void) snprintf(name, 100, "%s/%s_%d", pool, pool, d);
if (zopt_verbose >= 3)
(void) printf("Destroying %s to free up space\n", name);
- (void) dmu_objset_find(name, ztest_destroy_cb, NULL,
+ (void) dmu_objset_find(name, ztest_destroy_cb, &za[d],
DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
(void) rw_unlock(&ztest_shared->zs_name_lock);
}
txg_wait_synced(spa_get_dsl(spa), 0);
+ umem_free(za, zopt_threads * sizeof (ztest_args_t));
+
+ /* Kill the resume thread */
+ ztest_exiting = B_TRUE;
+ VERIFY(thr_join(resume_tid, NULL, NULL) == 0);
+
/*
* Right before closing the pool, kick off a bunch of async I/O;
* spa_close() should wait for it to complete.
@@ -3288,8 +3425,9 @@ ztest_init(char *pool)
*/
(void) spa_destroy(pool);
ztest_shared->zs_vdev_primaries = 0;
- nvroot = make_vdev_root(zopt_vdev_size, zopt_raidz, zopt_mirrors, 1);
- error = spa_create(pool, nvroot, NULL);
+ nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0,
+ 0, zopt_raidz, zopt_mirrors, 1);
+ error = spa_create(pool, nvroot, NULL, NULL, NULL);
nvlist_free(nvroot);
if (error)
@@ -3320,7 +3458,7 @@ main(int argc, char **argv)
(void) setvbuf(stdout, NULL, _IOLBF, 0);
/* Override location of zpool.cache */
- spa_config_dir = "/tmp";
+ spa_config_path = "/tmp/zpool.cache";
ztest_random_fd = open("/dev/urandom", O_RDONLY);
OpenPOWER on IntegriCloud