summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
diff options
context:
space:
mode:
authormm <mm@FreeBSD.org>2013-02-25 12:33:31 +0000
committermm <mm@FreeBSD.org>2013-02-25 12:33:31 +0000
commit935fd1194aa4a1bf2f3e80315a915faa331dc729 (patch)
treeec42bfc38bb1575d8989dd9b950b2a8672bdf3cd /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
parent75d62de01add614d86b5cf30bd3b8157f26d2812 (diff)
downloadFreeBSD-src-935fd1194aa4a1bf2f3e80315a915faa331dc729.zip
FreeBSD-src-935fd1194aa4a1bf2f3e80315a915faa331dc729.tar.gz
MFV v242732:
Merge the ZFS I/O deadman thread from vendor (illumos). This feature panics the system on hanging ZFS I/O, helps debugging and resumes failed service. The panic behavior can be controlled with the loader-only tunables: vfs.zfs.deadman_enabled (enable or disable panic on stalled ZFS I/O) vfs.zfs.deadman_synctime (expiration time for stalled ZFS I/O) By default, ZFS I/O deadman is enabled by default on amd64 and i386 excluding virtual guest machines. Illumos ZFS issues: 3246 ZFS I/O deadman thread References: https://www.illumos.org/issues/3246 MFC after: 2 weeks
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c122
1 files changed, 121 insertions, 1 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
index 606fd18..dc64682 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
@@ -22,10 +22,12 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/spa_impl.h>
+#include <sys/spa_boot.h>
#include <sys/zio.h>
#include <sys/zio_checksum.h>
#include <sys/zio_compress.h>
@@ -253,6 +255,52 @@ TUNABLE_INT("vfs.zfs.recover", &zfs_recover);
SYSCTL_INT(_vfs_zfs, OID_AUTO, recover, CTLFLAG_RDTUN, &zfs_recover, 0,
"Try to recover from otherwise-fatal errors.");
+extern int zfs_txg_synctime_ms;
+
+/*
+ * Expiration time in units of zfs_txg_synctime_ms. This value has two
+ * meanings. First it is used to determine when the spa_deadman logic
+ * should fire. By default the spa_deadman will fire if spa_sync has
+ * not completed in 1000 * zfs_txg_synctime_ms (i.e. 1000 seconds).
+ * Secondly, the value determines if an I/O is considered "hung".
+ * Any I/O that has not completed in zfs_deadman_synctime is considered
+ * "hung" resulting in a system panic.
+ * 1000 zfs_txg_synctime_ms (i.e. 1000 seconds).
+ */
+uint64_t zfs_deadman_synctime = 1000ULL;
+TUNABLE_QUAD("vfs.zfs.deadman_synctime", &zfs_deadman_synctime);
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, deadman_synctime, CTLFLAG_RDTUN,
+ &zfs_deadman_synctime, 0,
+ "Stalled ZFS I/O expiration time in units of vfs.zfs.txg_synctime_ms");
+
+/*
+ * Default value of -1 for zfs_deadman_enabled is resolved in
+ * zfs_deadman_init()
+ */
+int zfs_deadman_enabled = -1;
+TUNABLE_INT("vfs.zfs.deadman_enabled", &zfs_deadman_enabled);
+SYSCTL_INT(_vfs_zfs, OID_AUTO, deadman_enabled, CTLFLAG_RDTUN,
+ &zfs_deadman_enabled, 0, "Kernel panic on stalled ZFS I/O");
+
+#ifndef illumos
+#ifdef _KERNEL
+static void
+zfs_deadman_init()
+{
+ /*
+ * If we are not i386 or amd64 or in a virtual machine,
+ * disable ZFS deadman thread by default
+ */
+ if (zfs_deadman_enabled == -1) {
+#if defined(__amd64__) || defined(__i386__)
+ zfs_deadman_enabled = (vm_guest == VM_GUEST_NO) ? 1 : 0;
+#else
+ zfs_deadman_enabled = 0;
+#endif
+ }
+}
+#endif /* _KERNEL */
+#endif /* !illumos */
/*
* ==========================================================================
@@ -422,6 +470,23 @@ spa_lookup(const char *name)
}
/*
+ * Fires when spa_sync has not completed within zfs_deadman_synctime_ms.
+ * If the zfs_deadman_enabled flag is set then it inspects all vdev queues
+ * looking for potentially hung I/Os.
+ */
+void
+spa_deadman(void *arg)
+{
+ spa_t *spa = arg;
+
+ zfs_dbgmsg("slow spa_sync: started %llu seconds ago, calls %llu",
+ (gethrtime() - spa->spa_sync_starttime) / NANOSEC,
+ ++spa->spa_deadman_calls);
+ if (zfs_deadman_enabled)
+ vdev_deadman(spa->spa_root_vdev);
+}
+
+/*
* Create an uninitialized spa_t with the given name. Requires
* spa_namespace_lock. The caller must ensure that the spa_t doesn't already
* exist by calling spa_lookup() first.
@@ -431,6 +496,10 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
{
spa_t *spa;
spa_config_dirent_t *dp;
+#ifdef illumos
+ cyc_handler_t hdlr;
+ cyc_time_t when;
+#endif
ASSERT(MUTEX_HELD(&spa_namespace_lock));
@@ -462,6 +531,32 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
spa->spa_proc = &p0;
spa->spa_proc_state = SPA_PROC_NONE;
+#ifdef illumos
+ hdlr.cyh_func = spa_deadman;
+ hdlr.cyh_arg = spa;
+ hdlr.cyh_level = CY_LOW_LEVEL;
+#endif
+
+ spa->spa_deadman_synctime = zfs_deadman_synctime *
+ zfs_txg_synctime_ms * MICROSEC;
+
+#ifdef illumos
+ /*
+ * This determines how often we need to check for hung I/Os after
+ * the cyclic has already fired. Since checking for hung I/Os is
+ * an expensive operation we don't want to check too frequently.
+ * Instead wait for 5 synctimes before checking again.
+ */
+ when.cyt_interval = 5ULL * zfs_txg_synctime_ms * MICROSEC;
+ when.cyt_when = CY_INFINITY;
+ mutex_enter(&cpu_lock);
+ spa->spa_deadman_cycid = cyclic_add(&hdlr, &when);
+ mutex_exit(&cpu_lock);
+#else /* !illumos */
+#ifdef _KERNEL
+ callout_init(&spa->spa_deadman_cycid, CALLOUT_MPSAFE);
+#endif
+#endif
refcount_create(&spa->spa_refcount);
spa_config_lock_init(spa);
@@ -544,6 +639,18 @@ spa_remove(spa_t *spa)
nvlist_free(spa->spa_load_info);
spa_config_set(spa, NULL);
+#ifdef illumos
+ mutex_enter(&cpu_lock);
+ if (spa->spa_deadman_cycid != CYCLIC_NONE)
+ cyclic_remove(spa->spa_deadman_cycid);
+ mutex_exit(&cpu_lock);
+ spa->spa_deadman_cycid = CYCLIC_NONE;
+#else /* !illumos */
+#ifdef _KERNEL
+ callout_drain(&spa->spa_deadman_cycid);
+#endif
+#endif
+
refcount_destroy(&spa->spa_refcount);
spa_config_lock_destroy(spa);
@@ -1511,6 +1618,12 @@ spa_prev_software_version(spa_t *spa)
}
uint64_t
+spa_deadman_synctime(spa_t *spa)
+{
+ return (spa->spa_deadman_synctime);
+}
+
+uint64_t
dva_get_dsize_sync(spa_t *spa, const dva_t *dva)
{
uint64_t asize = DVA_GET_ASIZE(dva);
@@ -1605,7 +1718,9 @@ spa_init(int mode)
spa_mode_global = mode;
#ifdef illumos
-#ifndef _KERNEL
+#ifdef _KERNEL
+ spa_arch_init();
+#else
if (spa_mode_global != FREAD && dprintf_find_string("watch")) {
arc_procfd = open("/proc/self/ctl", O_WRONLY);
if (arc_procfd == -1) {
@@ -1629,6 +1744,11 @@ spa_init(int mode)
zpool_feature_init();
spa_config_load();
l2arc_start();
+#ifndef illumos
+#ifdef _KERNEL
+ zfs_deadman_init();
+#endif
+#endif /* !illumos */
}
void
OpenPOWER on IntegriCloud