diff options
author | mm <mm@FreeBSD.org> | 2010-09-27 09:42:31 +0000 |
---|---|---|
committer | mm <mm@FreeBSD.org> | 2010-09-27 09:42:31 +0000 |
commit | 240333eaa7cd7f7a595350c52872044734528869 (patch) | |
tree | cd91e345f4a206c0d89bd52cbf33237f67e560a6 /sys/cddl/contrib | |
parent | 06d7ad088338844d37a5a4328b1c4bb5de1a68a2 (diff) | |
download | FreeBSD-src-240333eaa7cd7f7a595350c52872044734528869.zip FreeBSD-src-240333eaa7cd7f7a595350c52872044734528869.tar.gz |
Properly handle IO with B_FAILFAST
Retry IO once with ZIO_FLAG_TRYHARD before declaring a pool faulted
OpenSolaris revision and Bug IDs:
9725:0bf7402e8022
6843014 ZFS B_FAILFAST handling is broken
Approved by: delphij (mentor)
Obtained from: OpenSolaris (Bug ID 6843014)
MFC after: 3 weeks
Diffstat (limited to 'sys/cddl/contrib')
10 files changed, 103 insertions, 44 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c index 32caec6..c04102e 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c @@ -4252,10 +4252,16 @@ spa_sync(spa_t *spa, uint64_t txg) if (svdcount == SPA_DVAS_PER_BP) break; } - error = vdev_config_sync(svd, svdcount, txg); + error = vdev_config_sync(svd, svdcount, txg, B_FALSE); + if (error != 0) + error = vdev_config_sync(svd, svdcount, txg, + B_TRUE); } else { error = vdev_config_sync(rvd->vdev_child, - rvd->vdev_children, txg); + rvd->vdev_children, txg, B_FALSE); + if (error != 0) + error = vdev_config_sync(rvd->vdev_child, + rvd->vdev_children, txg, B_TRUE); } spa_config_exit(spa, SCL_STATE, FTAG); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h index b8313a9..9332554 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -113,7 +113,8 @@ extern void vdev_queue_io_done(zio_t *zio); extern void vdev_config_dirty(vdev_t *vd); extern void vdev_config_clean(vdev_t *vd); -extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg); +extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, + boolean_t); extern void vdev_state_dirty(vdev_t *vd); extern void vdev_state_clean(vdev_t *vd); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h index 15a4a76..bf107d6 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h @@ -118,7 +118,7 @@ typedef struct zinject_record { uint32_t zi_error; uint64_t zi_type; uint32_t zi_freq; - uint32_t zi_pad; /* pad out to 64 bit alignment */ + uint32_t zi_failfast; } zinject_record_t; #define ZINJECT_NULL 0x1 diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h index 109b64e..4a4d286 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h @@ -117,31 +117,33 @@ enum zio_compress { #define ZIO_PRIORITY_SCRUB (zio_priority_table[10]) #define ZIO_PRIORITY_TABLE_SIZE 11 -#define ZIO_FLAG_MUSTSUCCEED 0x00000 -#define ZIO_FLAG_CANFAIL 0x00001 -#define ZIO_FLAG_SPECULATIVE 0x00002 -#define ZIO_FLAG_CONFIG_WRITER 0x00004 -#define ZIO_FLAG_DONT_RETRY 0x00008 - -#define ZIO_FLAG_DONT_CACHE 0x00010 -#define ZIO_FLAG_DONT_QUEUE 0x00020 -#define ZIO_FLAG_DONT_AGGREGATE 0x00040 -#define ZIO_FLAG_DONT_PROPAGATE 0x00080 - -#define ZIO_FLAG_IO_BYPASS 0x00100 -#define ZIO_FLAG_IO_REPAIR 0x00200 -#define ZIO_FLAG_IO_RETRY 0x00400 -#define ZIO_FLAG_IO_REWRITE 0x00800 - -#define ZIO_FLAG_SELF_HEAL 0x01000 -#define ZIO_FLAG_RESILVER 0x02000 -#define ZIO_FLAG_SCRUB 0x04000 -#define ZIO_FLAG_SCRUB_THREAD 0x08000 - -#define ZIO_FLAG_PROBE 0x10000 -#define ZIO_FLAG_GANG_CHILD 0x20000 -#define ZIO_FLAG_RAW 0x40000 -#define ZIO_FLAG_GODFATHER 0x80000 +#define ZIO_FLAG_MUSTSUCCEED 0x000000 +#define ZIO_FLAG_CANFAIL 0x000001 +#define ZIO_FLAG_SPECULATIVE 0x000002 +#define ZIO_FLAG_CONFIG_WRITER 0x000004 +#define ZIO_FLAG_DONT_RETRY 0x000008 + +#define ZIO_FLAG_DONT_CACHE 0x000010 +#define ZIO_FLAG_DONT_QUEUE 0x000020 +#define ZIO_FLAG_DONT_AGGREGATE 0x000040 +#define ZIO_FLAG_DONT_PROPAGATE 0x000080 + +#define ZIO_FLAG_IO_BYPASS 0x000100 +#define ZIO_FLAG_IO_REPAIR 0x000200 +#define ZIO_FLAG_IO_RETRY 0x000400 +#define ZIO_FLAG_IO_REWRITE 0x000800 + +#define ZIO_FLAG_SELF_HEAL 0x001000 +#define ZIO_FLAG_RESILVER 0x002000 +#define ZIO_FLAG_SCRUB 0x004000 +#define ZIO_FLAG_SCRUB_THREAD 0x008000 + +#define ZIO_FLAG_PROBE 0x010000 +#define ZIO_FLAG_GANG_CHILD 0x020000 +#define ZIO_FLAG_RAW 0x040000 +#define ZIO_FLAG_GODFATHER 0x080000 + +#define ZIO_FLAG_TRYHARD 0x100000 #define ZIO_FLAG_GANG_INHERIT \ (ZIO_FLAG_CANFAIL | \ @@ -159,7 +161,8 @@ enum zio_compress { (ZIO_FLAG_GANG_INHERIT | \ ZIO_FLAG_IO_REPAIR | \ ZIO_FLAG_IO_RETRY | \ - ZIO_FLAG_PROBE) + ZIO_FLAG_PROBE | \ + ZIO_FLAG_TRYHARD) #define ZIO_FLAG_AGG_INHERIT \ (ZIO_FLAG_DONT_AGGREGATE | \ @@ -440,7 +443,7 @@ extern int zio_inject_list_next(int *id, char *name, size_t buflen, struct zinject_record *record); extern int zio_clear_fault(int id); extern int zio_handle_fault_injection(zio_t *zio, int error); -extern int zio_handle_device_injection(vdev_t *vd, int error); +extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error); extern int zio_handle_label_injection(zio_t *zio, int error); #ifdef __cplusplus diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c index 82006fc..cb43af3 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c @@ -928,7 +928,7 @@ vdev_probe(vdev_t *vd, zio_t *zio) vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE | - ZIO_FLAG_DONT_RETRY; + ZIO_FLAG_TRYHARD; if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) { /* @@ -1025,7 +1025,7 @@ vdev_open(vdev_t *vd) error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift); if (zio_injection_enabled && error == 0) - error = zio_handle_device_injection(vd, ENXIO); + error = zio_handle_device_injection(vd, NULL, ENXIO); if (error) { if (vd->vdev_removed && @@ -2207,6 +2207,16 @@ vdev_stat_update(zio_t *zio, uint64_t psize) if (flags & ZIO_FLAG_SPECULATIVE) return; + /* + * If this is an I/O error that is going to be retried, then ignore the + * error. Otherwise, the user may interpret B_FAILFAST I/O errors as + * hard errors, when in reality they can happen for any number of + * innocuous reasons (bus resets, MPxIO link failure, etc). + */ + if (zio->io_error == EIO && + !(zio->io_flags & ZIO_FLAG_IO_RETRY)) + return; + mutex_enter(&vd->vdev_stat_lock); if (type == ZIO_TYPE_READ && !vdev_is_dead(vd)) { if (zio->io_error == ECKSUM) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c index e6d5743..5db7a6a 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c @@ -401,8 +401,9 @@ vdev_disk_io_start(zio_t *zio) bioinit(bp); bp->b_flags = B_BUSY | B_NOCACHE | - (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE) | - ((zio->io_flags & ZIO_FLAG_IO_RETRY) ? 0 : B_FAILFAST); + (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE); + if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD))) + bp->b_flags |= B_FAILFAST; bp->b_bcount = zio->io_size; bp->b_un.b_addr = zio->io_data; bp->b_lblkno = lbtodb(zio->io_offset); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c index f1f3bb0..48d5fc2 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c @@ -339,8 +339,8 @@ vdev_label_read_config(vdev_t *vd) nvlist_t *config = NULL; vdev_phys_t *vp; zio_t *zio; - int flags = - ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SPECULATIVE; ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); @@ -349,6 +349,7 @@ vdev_label_read_config(vdev_t *vd) vp = zio_buf_alloc(sizeof (vdev_phys_t)); +retry: for (int l = 0; l < VDEV_LABELS; l++) { zio = zio_root(spa, NULL, NULL, flags); @@ -368,6 +369,11 @@ vdev_label_read_config(vdev_t *vd) } } + if (config == NULL && !(flags & ZIO_FLAG_TRYHARD)) { + flags |= ZIO_FLAG_TRYHARD; + goto retry; + } + zio_buf_free(vp, sizeof (vdev_phys_t)); return (config); @@ -648,6 +654,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) /* * Write everything in parallel. */ +retry: zio = zio_root(spa, NULL, NULL, flags); for (int l = 0; l < VDEV_LABELS; l++) { @@ -674,6 +681,11 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) error = zio_wait(zio); + if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) { + flags |= ZIO_FLAG_TRYHARD; + goto retry; + } + nvlist_free(label); zio_buf_free(pad2, VDEV_PAD_SIZE); zio_buf_free(ub, VDEV_UBERBLOCK_SIZE(vd)); @@ -760,8 +772,8 @@ vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest) { spa_t *spa = vd->vdev_spa; vdev_t *rvd = spa->spa_root_vdev; - int flags = - ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD; if (vd == rvd) { ASSERT(zio == NULL); @@ -999,7 +1011,7 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) * at any time, you can just call it again, and it will resume its work. */ int -vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg) +vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, boolean_t tryhard) { spa_t *spa = svd[0]->vdev_spa; uberblock_t *ub = &spa->spa_uberblock; @@ -1008,6 +1020,16 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg) int error; int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; + /* + * Normally, we don't want to try too hard to write every label and + * uberblock. If there is a flaky disk, we don't want the rest of the + * sync process to block while we retry. But if we can't write a + * single label out, we should retry with ZIO_FLAG_TRYHARD before + * bailing out and declaring the pool faulted. + */ + if (tryhard) + flags |= ZIO_FLAG_TRYHARD; + ASSERT(ub->ub_txg <= txg); /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c index cf2f90d..9d6cefb 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c @@ -134,6 +134,15 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, if (zio->io_flags & ZIO_FLAG_SPECULATIVE) return; + /* + * If this I/O is not a retry I/O, don't post an ereport. + * Otherwise, we risk making bad diagnoses based on B_FAILFAST + * I/Os. + */ + if (zio->io_error == EIO && + !(zio->io_flags & ZIO_FLAG_IO_RETRY)) + return; + if (vd != NULL) { /* * If the vdev has already been marked as failing due diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c index 8ddf7cd..b477563 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c @@ -1870,7 +1870,8 @@ zio_vdev_io_done(zio_t *zio) vdev_cache_write(zio); if (zio_injection_enabled && zio->io_error == 0) - zio->io_error = zio_handle_device_injection(vd, EIO); + zio->io_error = zio_handle_device_injection(vd, + zio, EIO); if (zio_injection_enabled && zio->io_error == 0) zio->io_error = zio_handle_label_injection(zio, EIO); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c index b3469fd..f8e6880 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -195,7 +195,7 @@ zio_handle_label_injection(zio_t *zio, int error) int -zio_handle_device_injection(vdev_t *vd, int error) +zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error) { inject_handler_t *handler; int ret = 0; @@ -210,6 +210,12 @@ zio_handle_device_injection(vdev_t *vd, int error) continue; if (vd->vdev_guid == handler->zi_record.zi_guid) { + if (handler->zi_record.zi_failfast && + (zio == NULL || (zio->io_flags & + (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) { + continue; + } + if (handler->zi_record.zi_error == error) { /* * For a failed open, pretend like the device |