diff options
author | avg <avg@FreeBSD.org> | 2014-01-16 15:57:39 +0000 |
---|---|---|
committer | avg <avg@FreeBSD.org> | 2014-01-16 15:57:39 +0000 |
commit | 26096ba436a54f225f8e56bb965a0e2214256c71 (patch) | |
tree | a24d61eb5c3c255647d4c4bb29075b63b1da94fc /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c | |
parent | 81894fb568ed816e9e9f3a43f778e69751857339 (diff) | |
download | FreeBSD-src-26096ba436a54f225f8e56bb965a0e2214256c71.zip FreeBSD-src-26096ba436a54f225f8e56bb965a0e2214256c71.tar.gz |
MFC r258632,258704: MFV r255255: 4045 zfs write throttle & i/o scheduler
performance work
Sponsored by: HybridCluster [merge]
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c')
-rw-r--r-- | sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c | 61 |
1 files changed, 35 insertions, 26 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c index 718371d..eec4d3a 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c @@ -259,23 +259,29 @@ TUNABLE_INT("vfs.zfs.recover", &zfs_recover); SYSCTL_INT(_vfs_zfs, OID_AUTO, recover, CTLFLAG_RDTUN, &zfs_recover, 0, "Try to recover from otherwise-fatal errors."); -extern int zfs_txg_synctime_ms; +/* + * Expiration time in milliseconds. This value has two meanings. First it is + * used to determine when the spa_deadman() logic should fire. By default the + * spa_deadman() will fire if spa_sync() has not completed in 1000 seconds. + * Secondly, the value determines if an I/O is considered "hung". Any I/O that + * has not completed in zfs_deadman_synctime_ms is considered "hung" resulting + * in a system panic. + */ +uint64_t zfs_deadman_synctime_ms = 1000000ULL; +TUNABLE_QUAD("vfs.zfs.deadman_synctime_ms", &zfs_deadman_synctime_ms); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, deadman_synctime_ms, CTLFLAG_RDTUN, + &zfs_deadman_synctime_ms, 0, + "Stalled ZFS I/O expiration time in milliseconds"); /* - * Expiration time in units of zfs_txg_synctime_ms. This value has two - * meanings. First it is used to determine when the spa_deadman logic - * should fire. By default the spa_deadman will fire if spa_sync has - * not completed in 1000 * zfs_txg_synctime_ms (i.e. 1000 seconds). - * Secondly, the value determines if an I/O is considered "hung". - * Any I/O that has not completed in zfs_deadman_synctime is considered - * "hung" resulting in a system panic. - * 1000 zfs_txg_synctime_ms (i.e. 1000 seconds). + * Check time in milliseconds. This defines the frequency at which we check + * for hung I/O. */ -uint64_t zfs_deadman_synctime = 1000ULL; -TUNABLE_QUAD("vfs.zfs.deadman_synctime", &zfs_deadman_synctime); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, deadman_synctime, CTLFLAG_RDTUN, - &zfs_deadman_synctime, 0, - "Stalled ZFS I/O expiration time in units of vfs.zfs.txg.synctime_ms"); +uint64_t zfs_deadman_checktime_ms = 5000ULL; +TUNABLE_QUAD("vfs.zfs.deadman_checktime_ms", &zfs_deadman_checktime_ms); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, deadman_checktime_ms, CTLFLAG_RDTUN, + &zfs_deadman_checktime_ms, 0, + "Period of checks for stalled ZFS I/O in milliseconds"); /* * Default value of -1 for zfs_deadman_enabled is resolved in @@ -286,6 +292,17 @@ TUNABLE_INT("vfs.zfs.deadman_enabled", &zfs_deadman_enabled); SYSCTL_INT(_vfs_zfs, OID_AUTO, deadman_enabled, CTLFLAG_RDTUN, &zfs_deadman_enabled, 0, "Kernel panic on stalled ZFS I/O"); +/* + * The worst case is single-sector max-parity RAID-Z blocks, in which + * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1) + * times the size; so just assume that. Add to this the fact that + * we can have up to 3 DVAs per bp, and one more factor of 2 because + * the block may be dittoed with up to 3 DVAs by ddt_sync(). All together, + * the worst case is: + * (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2 == 24 + */ +int spa_asize_inflation = 24; + #ifndef illumos #ifdef _KERNEL static void @@ -538,17 +555,16 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) hdlr.cyh_level = CY_LOW_LEVEL; #endif - spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime * - zfs_txg_synctime_ms); + spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms); #ifdef illumos /* * This determines how often we need to check for hung I/Os after * the cyclic has already fired. Since checking for hung I/Os is * an expensive operation we don't want to check too frequently. - * Instead wait for 5 synctimes before checking again. + * Instead wait for 5 seconds before checking again. */ - when.cyt_interval = MSEC2NSEC(5 * zfs_txg_synctime_ms); + when.cyt_interval = MSEC2NSEC(zfs_deadman_checktime_ms); when.cyt_when = CY_INFINITY; mutex_enter(&cpu_lock); spa->spa_deadman_cycid = cyclic_add(&hdlr, &when); @@ -1538,14 +1554,7 @@ spa_freeze_txg(spa_t *spa) uint64_t spa_get_asize(spa_t *spa, uint64_t lsize) { - /* - * The worst case is single-sector max-parity RAID-Z blocks, in which - * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1) - * times the size; so just assume that. Add to this the fact that - * we can have up to 3 DVAs per bp, and one more factor of 2 because - * the block may be dittoed with up to 3 DVAs by ddt_sync(). - */ - return (lsize * (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2); + return (lsize * spa_asize_inflation); } uint64_t |