summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
diff options
context:
space:
mode:
authoravg <avg@FreeBSD.org>2014-01-16 15:57:39 +0000
committeravg <avg@FreeBSD.org>2014-01-16 15:57:39 +0000
commit26096ba436a54f225f8e56bb965a0e2214256c71 (patch)
treea24d61eb5c3c255647d4c4bb29075b63b1da94fc /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
parent81894fb568ed816e9e9f3a43f778e69751857339 (diff)
downloadFreeBSD-src-26096ba436a54f225f8e56bb965a0e2214256c71.zip
FreeBSD-src-26096ba436a54f225f8e56bb965a0e2214256c71.tar.gz
MFC r258632,258704: MFV r255255: 4045 zfs write throttle & i/o scheduler
performance work Sponsored by: HybridCluster [merge]
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c61
1 files changed, 35 insertions, 26 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
index 718371d..eec4d3a 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
@@ -259,23 +259,29 @@ TUNABLE_INT("vfs.zfs.recover", &zfs_recover);
SYSCTL_INT(_vfs_zfs, OID_AUTO, recover, CTLFLAG_RDTUN, &zfs_recover, 0,
"Try to recover from otherwise-fatal errors.");
-extern int zfs_txg_synctime_ms;
+/*
+ * Expiration time in milliseconds. This value has two meanings. First it is
+ * used to determine when the spa_deadman() logic should fire. By default the
+ * spa_deadman() will fire if spa_sync() has not completed in 1000 seconds.
+ * Secondly, the value determines if an I/O is considered "hung". Any I/O that
+ * has not completed in zfs_deadman_synctime_ms is considered "hung" resulting
+ * in a system panic.
+ */
+uint64_t zfs_deadman_synctime_ms = 1000000ULL;
+TUNABLE_QUAD("vfs.zfs.deadman_synctime_ms", &zfs_deadman_synctime_ms);
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, deadman_synctime_ms, CTLFLAG_RDTUN,
+ &zfs_deadman_synctime_ms, 0,
+ "Stalled ZFS I/O expiration time in milliseconds");
/*
- * Expiration time in units of zfs_txg_synctime_ms. This value has two
- * meanings. First it is used to determine when the spa_deadman logic
- * should fire. By default the spa_deadman will fire if spa_sync has
- * not completed in 1000 * zfs_txg_synctime_ms (i.e. 1000 seconds).
- * Secondly, the value determines if an I/O is considered "hung".
- * Any I/O that has not completed in zfs_deadman_synctime is considered
- * "hung" resulting in a system panic.
- * 1000 zfs_txg_synctime_ms (i.e. 1000 seconds).
+ * Check time in milliseconds. This defines the frequency at which we check
+ * for hung I/O.
*/
-uint64_t zfs_deadman_synctime = 1000ULL;
-TUNABLE_QUAD("vfs.zfs.deadman_synctime", &zfs_deadman_synctime);
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, deadman_synctime, CTLFLAG_RDTUN,
- &zfs_deadman_synctime, 0,
- "Stalled ZFS I/O expiration time in units of vfs.zfs.txg.synctime_ms");
+uint64_t zfs_deadman_checktime_ms = 5000ULL;
+TUNABLE_QUAD("vfs.zfs.deadman_checktime_ms", &zfs_deadman_checktime_ms);
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, deadman_checktime_ms, CTLFLAG_RDTUN,
+ &zfs_deadman_checktime_ms, 0,
+ "Period of checks for stalled ZFS I/O in milliseconds");
/*
* Default value of -1 for zfs_deadman_enabled is resolved in
@@ -286,6 +292,17 @@ TUNABLE_INT("vfs.zfs.deadman_enabled", &zfs_deadman_enabled);
SYSCTL_INT(_vfs_zfs, OID_AUTO, deadman_enabled, CTLFLAG_RDTUN,
&zfs_deadman_enabled, 0, "Kernel panic on stalled ZFS I/O");
+/*
+ * The worst case is single-sector max-parity RAID-Z blocks, in which
+ * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1)
+ * times the size; so just assume that. Add to this the fact that
+ * we can have up to 3 DVAs per bp, and one more factor of 2 because
+ * the block may be dittoed with up to 3 DVAs by ddt_sync(). All together,
+ * the worst case is:
+ * (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2 == 24
+ */
+int spa_asize_inflation = 24;
+
#ifndef illumos
#ifdef _KERNEL
static void
@@ -538,17 +555,16 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
hdlr.cyh_level = CY_LOW_LEVEL;
#endif
- spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime *
- zfs_txg_synctime_ms);
+ spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms);
#ifdef illumos
/*
* This determines how often we need to check for hung I/Os after
* the cyclic has already fired. Since checking for hung I/Os is
* an expensive operation we don't want to check too frequently.
- * Instead wait for 5 synctimes before checking again.
+ * Instead wait for 5 seconds before checking again.
*/
- when.cyt_interval = MSEC2NSEC(5 * zfs_txg_synctime_ms);
+ when.cyt_interval = MSEC2NSEC(zfs_deadman_checktime_ms);
when.cyt_when = CY_INFINITY;
mutex_enter(&cpu_lock);
spa->spa_deadman_cycid = cyclic_add(&hdlr, &when);
@@ -1538,14 +1554,7 @@ spa_freeze_txg(spa_t *spa)
uint64_t
spa_get_asize(spa_t *spa, uint64_t lsize)
{
- /*
- * The worst case is single-sector max-parity RAID-Z blocks, in which
- * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1)
- * times the size; so just assume that. Add to this the fact that
- * we can have up to 3 DVAs per bp, and one more factor of 2 because
- * the block may be dittoed with up to 3 DVAs by ddt_sync().
- */
- return (lsize * (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2);
+ return (lsize * spa_asize_inflation);
}
uint64_t
OpenPOWER on IntegriCloud