diff options
author | pjd <pjd@FreeBSD.org> | 2008-11-17 20:49:29 +0000 |
---|---|---|
committer | pjd <pjd@FreeBSD.org> | 2008-11-17 20:49:29 +0000 |
commit | bbe899b96e388a8b82439f81ed3707e0d9c6070d (patch) | |
tree | 81b89fa4ac6467771d5aa291a97f4665981a6108 /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c | |
parent | d2f579595c362ce27b4d87e2c40e1c4e09b929e3 (diff) | |
download | FreeBSD-src-bbe899b96e388a8b82439f81ed3707e0d9c6070d.zip FreeBSD-src-bbe899b96e388a8b82439f81ed3707e0d9c6070d.tar.gz |
Update ZFS from version 6 to 13 and bring some FreeBSD-specific changes.
This bring huge amount of changes, I'll enumerate only user-visible changes:
- Delegated Administration
Allows regular users to perform ZFS operations, like file system
creation, snapshot creation, etc.
- L2ARC
Level 2 cache for ZFS - allows to use additional disks for cache.
Huge performance improvements mostly for random read of mostly
static content.
- slog
Allow to use additional disks for ZFS Intent Log to speed up
operations like fsync(2).
- vfs.zfs.super_owner
Allows regular users to perform privileged operations on files stored
on ZFS file systems owned by him. Very careful with this one.
- chflags(2)
Not all the flags are supported. This still needs work.
- ZFSBoot
Support to boot off of ZFS pool. Not finished, AFAIK.
Submitted by: dfr
- Snapshot properties
- New failure modes
Before if write requested failed, system paniced. Now one
can select from one of three failure modes:
- panic - panic on write error
- wait - wait for disk to reappear
- continue - serve read requests if possible, block write requests
- Refquota, refreservation properties
Just quota and reservation properties, but don't count space consumed
by children file systems, clones and snapshots.
- Sparse volumes
ZVOLs that don't reserve space in the pool.
- External attributes
Compatible with extattr(2).
- NFSv4-ACLs
Not sure about the status, might not be complete yet.
Submitted by: trasz
- Creation-time properties
- Regression tests for zpool(8) command.
Obtained from: OpenSolaris
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c')
-rw-r--r-- | sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c | 232 |
1 files changed, 154 insertions, 78 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c index 9e8bcf3..1ffdb10 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c @@ -20,12 +20,10 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/zfs_context.h> #include <sys/spa.h> #include <sys/spa_impl.h> @@ -43,16 +41,18 @@ /* * Pool configuration repository. * - * The configuration for all pools, in addition to being stored on disk, is - * stored in /etc/zfs/zpool.cache as a packed nvlist. The kernel maintains - * this list as pools are created, destroyed, or modified. + * Pool configuration is stored as a packed nvlist on the filesystem. By + * default, all pools are stored in /etc/zfs/zpool.cache and loaded on boot + * (when the ZFS module is loaded). Pools can also have the 'cachefile' + * property set that allows them to be stored in an alternate location until + * the control of external software. * - * We have a single nvlist which holds all the configuration information. When - * the module loads, we read this information from the cache and populate the - * SPA namespace. This namespace is maintained independently in spa.c. - * Whenever the namespace is modified, or the configuration of a pool is - * changed, we call spa_config_sync(), which walks through all the active pools - * and writes the configuration to disk. + * For each cache file, we have a single nvlist which holds all the + * configuration information. When the module loads, we read this information + * from /etc/zfs/zpool.cache and populate the SPA namespace. This namespace is + * maintained independently in spa.c. Whenever the namespace is modified, or + * the configuration of a pool is changed, we call spa_config_sync(), which + * walks through all the active pools and writes the configuration to disk. */ static uint64_t spa_config_generation = 1; @@ -61,7 +61,7 @@ static uint64_t spa_config_generation = 1; * This can be overridden in userland to preserve an alternate namespace for * userland pools when doing testing. */ -const char *spa_config_dir = ZPOOL_CACHE_DIR; +const char *spa_config_path = ZPOOL_CACHE; /* * Called when the module is first loaded, this routine loads the configuration @@ -75,17 +75,21 @@ spa_config_load(void) nvlist_t *nvlist, *child; nvpair_t *nvpair; spa_t *spa; - char pathname[128]; + char *pathname; struct _buf *file; uint64_t fsize; /* * Open the configuration file. */ - (void) snprintf(pathname, sizeof (pathname), "%s/%s", - spa_config_dir, ZPOOL_CACHE_FILE); + pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + (void) snprintf(pathname, MAXPATHLEN, "%s", spa_config_path); file = kobj_open_file(pathname); + + kmem_free(pathname, MAXPATHLEN); + if (file == (struct _buf *)-1) { ZFS_LOG(1, "Cannot open %s.", pathname); return; @@ -148,47 +152,32 @@ out: kobj_close_file(file); } -/* - * Synchronize all pools to disk. This must be called with the namespace lock - * held. - */ -void -spa_config_sync(void) +static void +spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) { - spa_t *spa = NULL; - nvlist_t *config; size_t buflen; char *buf; vnode_t *vp; int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX; - char pathname[128]; - char pathname2[128]; - - ASSERT(MUTEX_HELD(&spa_namespace_lock)); - - VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0); + char *temp; /* - * Add all known pools to the configuration list, ignoring those with - * alternate root paths. + * If the nvlist is empty (NULL), then remove the old cachefile. */ - spa = NULL; - while ((spa = spa_next(spa)) != NULL) { - mutex_enter(&spa->spa_config_cache_lock); - if (spa->spa_config && spa->spa_name && spa->spa_root == NULL) - VERIFY(nvlist_add_nvlist(config, spa->spa_name, - spa->spa_config) == 0); - mutex_exit(&spa->spa_config_cache_lock); + if (nvl == NULL) { + (void) vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE); + return; } /* * Pack the configuration into a buffer. */ - VERIFY(nvlist_size(config, &buflen, NV_ENCODE_XDR) == 0); + VERIFY(nvlist_size(nvl, &buflen, NV_ENCODE_XDR) == 0); buf = kmem_alloc(buflen, KM_SLEEP); + temp = kmem_zalloc(MAXPATHLEN, KM_SLEEP); - VERIFY(nvlist_pack(config, &buf, &buflen, NV_ENCODE_XDR, + VERIFY(nvlist_pack(nvl, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP) == 0); /* @@ -196,29 +185,92 @@ spa_config_sync(void) * 'write to temporary file, sync, move over original' to make sure we * always have a consistent view of the data. */ - (void) snprintf(pathname, sizeof (pathname), "%s/%s", spa_config_dir, - ZPOOL_CACHE_TMP); + (void) snprintf(temp, MAXPATHLEN, "%s.tmp", dp->scd_path); - if (vn_open(pathname, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0) != 0) - goto out; + if (vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0) == 0) { + if (vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE, + 0, RLIM64_INFINITY, kcred, NULL) == 0 && + VOP_FSYNC(vp, FSYNC, kcred, NULL) == 0) { + (void) vn_rename(temp, dp->scd_path, UIO_SYSSPACE); + } + (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL); + VN_RELE(vp); + } + + (void) vn_remove(temp, UIO_SYSSPACE, RMFILE); + + kmem_free(buf, buflen); + kmem_free(temp, MAXPATHLEN); +} + +/* + * Synchronize pool configuration to disk. This must be called with the + * namespace lock held. + */ +void +spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) +{ + spa_config_dirent_t *dp, *tdp; + nvlist_t *nvl; + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + + /* + * Iterate over all cachefiles for the pool, past or present. When the + * cachefile is changed, the new one is pushed onto this list, allowing + * us to update previous cachefiles that no longer contain this pool. + */ + for (dp = list_head(&target->spa_config_list); dp != NULL; + dp = list_next(&target->spa_config_list, dp)) { + spa_t *spa = NULL; + if (dp->scd_path == NULL) + continue; - if (vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE, - 0, RLIM64_INFINITY, kcred, NULL) == 0 && - VOP_FSYNC(vp, FSYNC, kcred) == 0) { - (void) snprintf(pathname2, sizeof (pathname2), "%s/%s", - spa_config_dir, ZPOOL_CACHE_FILE); - (void) vn_rename(pathname, pathname2, UIO_SYSSPACE); + /* + * Iterate over all pools, adding any matching pools to 'nvl'. + */ + nvl = NULL; + while ((spa = spa_next(spa)) != NULL) { + if (spa == target && removing) + continue; + + mutex_enter(&spa->spa_props_lock); + tdp = list_head(&spa->spa_config_list); + if (spa->spa_config == NULL || + tdp->scd_path == NULL || + strcmp(tdp->scd_path, dp->scd_path) != 0) { + mutex_exit(&spa->spa_props_lock); + continue; + } + + if (nvl == NULL) + VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, + KM_SLEEP) == 0); + + VERIFY(nvlist_add_nvlist(nvl, spa->spa_name, + spa->spa_config) == 0); + mutex_exit(&spa->spa_props_lock); + } + + spa_config_write(dp, nvl); + nvlist_free(nvl); } - (void) VOP_CLOSE(vp, oflags, 1, 0, kcred); - VN_RELE(vp); + /* + * Remove any config entries older than the current one. + */ + dp = list_head(&target->spa_config_list); + while ((tdp = list_next(&target->spa_config_list, dp)) != NULL) { + list_remove(&target->spa_config_list, tdp); + if (tdp->scd_path != NULL) + spa_strfree(tdp->scd_path); + kmem_free(tdp, sizeof (spa_config_dirent_t)); + } -out: - (void) vn_remove(pathname, UIO_SYSSPACE, RMFILE); spa_config_generation++; - kmem_free(buf, buflen); - nvlist_free(config); + if (postsysevent) + spa_event_notify(target, NULL, ESC_ZFS_CONFIG_SYNC); } /* @@ -231,27 +283,25 @@ nvlist_t * spa_all_configs(uint64_t *generation) { nvlist_t *pools; - spa_t *spa; + spa_t *spa = NULL; if (*generation == spa_config_generation) return (NULL); VERIFY(nvlist_alloc(&pools, NV_UNIQUE_NAME, KM_SLEEP) == 0); - spa = NULL; mutex_enter(&spa_namespace_lock); while ((spa = spa_next(spa)) != NULL) { - if (INGLOBALZONE(curproc) || + if (INGLOBALZONE(curthread) || zone_dataset_visible(spa_name(spa), NULL)) { - mutex_enter(&spa->spa_config_cache_lock); + mutex_enter(&spa->spa_props_lock); VERIFY(nvlist_add_nvlist(pools, spa_name(spa), spa->spa_config) == 0); - mutex_exit(&spa->spa_config_cache_lock); + mutex_exit(&spa->spa_props_lock); } } - mutex_exit(&spa_namespace_lock); - *generation = spa_config_generation; + mutex_exit(&spa_namespace_lock); return (pools); } @@ -259,11 +309,11 @@ spa_all_configs(uint64_t *generation) void spa_config_set(spa_t *spa, nvlist_t *config) { - mutex_enter(&spa->spa_config_cache_lock); + mutex_enter(&spa->spa_props_lock); if (spa->spa_config != NULL) nvlist_free(spa->spa_config); spa->spa_config = config; - mutex_exit(&spa->spa_config_cache_lock); + mutex_exit(&spa->spa_props_lock); } /* @@ -277,11 +327,16 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) nvlist_t *config, *nvroot; vdev_t *rvd = spa->spa_root_vdev; unsigned long hostid = 0; + boolean_t locked = B_FALSE; - ASSERT(spa_config_held(spa, RW_READER)); - - if (vd == NULL) + if (vd == NULL) { vd = rvd; + locked = B_TRUE; + spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); + } + + ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) == + (SCL_CONFIG | SCL_STATE)); /* * If txg is -1, report the current value of spa->spa_config_txg. @@ -302,8 +357,10 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa)) == 0); (void) ddi_strtoul(hw_serial, NULL, 10, &hostid); - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, - hostid) == 0); + if (hostid != 0) { + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, + hostid) == 0); + } VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, utsname.nodename) == 0); @@ -315,30 +372,48 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) if (vd->vdev_isspare) VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE, 1ULL) == 0); + if (vd->vdev_islog) + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG, + 1ULL) == 0); vd = vd->vdev_top; /* label contains top config */ } - nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE); + nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE, B_FALSE); VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); nvlist_free(nvroot); + if (locked) + spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); + return (config); } /* - * Update all disk labels, generate a fresh config based on the current - * in-core state, and sync the global config cache. + * For a pool that's not currently a booting rootpool, update all disk labels, + * generate a fresh config based on the current in-core state, and sync the + * global config cache. */ void spa_config_update(spa_t *spa, int what) { + spa_config_update_common(spa, what, FALSE); +} + +/* + * Update all disk labels, generate a fresh config based on the current + * in-core state, and sync the global config cache (do not sync the config + * cache if this is a booting rootpool). + */ +void +spa_config_update_common(spa_t *spa, int what, boolean_t isroot) +{ vdev_t *rvd = spa->spa_root_vdev; uint64_t txg; int c; ASSERT(MUTEX_HELD(&spa_namespace_lock)); - spa_config_enter(spa, RW_WRITER, FTAG); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); txg = spa_last_synced_txg(spa) + 1; if (what == SPA_CONFIG_UPDATE_POOL) { vdev_config_dirty(rvd); @@ -358,7 +433,7 @@ spa_config_update(spa_t *spa, int what) } } } - spa_config_exit(spa, FTAG); + spa_config_exit(spa, SCL_ALL, FTAG); /* * Wait for the mosconfig to be regenerated and synced. @@ -368,8 +443,9 @@ spa_config_update(spa_t *spa, int what) /* * Update the global config cache to reflect the new mosconfig. */ - spa_config_sync(); + if (!isroot) + spa_config_sync(spa, B_FALSE, what != SPA_CONFIG_UPDATE_POOL); if (what == SPA_CONFIG_UPDATE_POOL) - spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS); + spa_config_update_common(spa, SPA_CONFIG_UPDATE_VDEVS, isroot); } |