From 935fd1194aa4a1bf2f3e80315a915faa331dc729 Mon Sep 17 00:00:00 2001 From: mm Date: Mon, 25 Feb 2013 12:33:31 +0000 Subject: MFV v242732: Merge the ZFS I/O deadman thread from vendor (illumos). This feature panics the system on hanging ZFS I/O, helps debugging and resumes failed service. The panic behavior can be controlled with the loader-only tunables: vfs.zfs.deadman_enabled (enable or disable panic on stalled ZFS I/O) vfs.zfs.deadman_synctime (expiration time for stalled ZFS I/O) By default, ZFS I/O deadman is enabled by default on amd64 and i386 excluding virtual guest machines. Illumos ZFS issues: 3246 ZFS I/O deadman thread References: https://www.illumos.org/issues/3246 MFC after: 2 weeks --- .../contrib/opensolaris/uts/common/fs/zfs/vdev.c | 38 ++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c') diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c index 2d92975..17cb6c1 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c @@ -3173,3 +3173,41 @@ vdev_split(vdev_t *vd) } vdev_propagate_state(cvd); } + +void +vdev_deadman(vdev_t *vd) +{ + for (int c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + + vdev_deadman(cvd); + } + + if (vd->vdev_ops->vdev_op_leaf) { + vdev_queue_t *vq = &vd->vdev_queue; + + mutex_enter(&vq->vq_lock); + if (avl_numnodes(&vq->vq_pending_tree) > 0) { + spa_t *spa = vd->vdev_spa; + zio_t *fio; + uint64_t delta; + + /* + * Look at the head of all the pending queues, + * if any I/O has been outstanding for longer than + * the spa_deadman_synctime we panic the system. + */ + fio = avl_first(&vq->vq_pending_tree); + delta = ddi_get_lbolt64() - fio->io_timestamp; + if (delta > NSEC_TO_TICK(spa_deadman_synctime(spa))) { + zfs_dbgmsg("SLOW IO: zio timestamp %llu, " + "delta %llu, last io %llu", + fio->io_timestamp, delta, + vq->vq_io_complete_ts); + fm_panic("I/O to pool '%s' appears to be " + "hung.", spa_name(spa)); + } + } + mutex_exit(&vq->vq_lock); + } +} -- cgit v1.1