summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormm <mm@FreeBSD.org>2010-09-27 09:42:31 +0000
committermm <mm@FreeBSD.org>2010-09-27 09:42:31 +0000
commit240333eaa7cd7f7a595350c52872044734528869 (patch)
treecd91e345f4a206c0d89bd52cbf33237f67e560a6
parent06d7ad088338844d37a5a4328b1c4bb5de1a68a2 (diff)
downloadFreeBSD-src-240333eaa7cd7f7a595350c52872044734528869.zip
FreeBSD-src-240333eaa7cd7f7a595350c52872044734528869.tar.gz
Properly handle IO with B_FAILFAST
Retry IO once with ZIO_FLAG_TRYHARD before declaring a pool faulted OpenSolaris revision and Bug IDs: 9725:0bf7402e8022 6843014 ZFS B_FAILFAST handling is broken Approved by: delphij (mentor) Obtained from: OpenSolaris (Bug ID 6843014) MFC after: 3 weeks
-rw-r--r--cddl/contrib/opensolaris/cmd/zinject/zinject.c11
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c10
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h5
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h57
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c14
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c5
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c32
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c9
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c10
11 files changed, 109 insertions, 49 deletions
diff --git a/cddl/contrib/opensolaris/cmd/zinject/zinject.c b/cddl/contrib/opensolaris/cmd/zinject/zinject.c
index cd48b68..e8327e8 100644
--- a/cddl/contrib/opensolaris/cmd/zinject/zinject.c
+++ b/cddl/contrib/opensolaris/cmd/zinject/zinject.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* ZFS Fault Injector
*
@@ -227,7 +225,7 @@ usage(void)
"\t\tClear the particular record (if given a numeric ID), or\n"
"\t\tall records if 'all' is specificed.\n"
"\n"
- "\tzinject -d device [-e errno] [-L <nvlist|uber>] pool\n"
+ "\tzinject -d device [-e errno] [-L <nvlist|uber>] [-F] pool\n"
"\t\tInject a fault into a particular device or the device's\n"
"\t\tlabel. Label injection can either be 'nvlist' or 'uber'.\n"
"\t\t'errno' can either be 'nxio' (the default) or 'io'.\n"
@@ -519,7 +517,7 @@ main(int argc, char **argv)
return (0);
}
- while ((c = getopt(argc, argv, ":ab:d:f:qhc:t:l:mr:e:uL:")) != -1) {
+ while ((c = getopt(argc, argv, ":ab:d:f:Fqhc:t:l:mr:e:uL:")) != -1) {
switch (c) {
case 'a':
flags |= ZINJECT_FLUSH_ARC;
@@ -556,6 +554,9 @@ main(int argc, char **argv)
return (1);
}
break;
+ case 'F':
+ record.zi_failfast = B_TRUE;
+ break;
case 'h':
usage();
return (0);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
index 32caec6..c04102e 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
@@ -4252,10 +4252,16 @@ spa_sync(spa_t *spa, uint64_t txg)
if (svdcount == SPA_DVAS_PER_BP)
break;
}
- error = vdev_config_sync(svd, svdcount, txg);
+ error = vdev_config_sync(svd, svdcount, txg, B_FALSE);
+ if (error != 0)
+ error = vdev_config_sync(svd, svdcount, txg,
+ B_TRUE);
} else {
error = vdev_config_sync(rvd->vdev_child,
- rvd->vdev_children, txg);
+ rvd->vdev_children, txg, B_FALSE);
+ if (error != 0)
+ error = vdev_config_sync(rvd->vdev_child,
+ rvd->vdev_children, txg, B_TRUE);
}
spa_config_exit(spa, SCL_STATE, FTAG);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
index b8313a9..9332554 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -113,7 +113,8 @@ extern void vdev_queue_io_done(zio_t *zio);
extern void vdev_config_dirty(vdev_t *vd);
extern void vdev_config_clean(vdev_t *vd);
-extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg);
+extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg,
+ boolean_t);
extern void vdev_state_dirty(vdev_t *vd);
extern void vdev_state_clean(vdev_t *vd);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
index 15a4a76..bf107d6 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
@@ -118,7 +118,7 @@ typedef struct zinject_record {
uint32_t zi_error;
uint64_t zi_type;
uint32_t zi_freq;
- uint32_t zi_pad; /* pad out to 64 bit alignment */
+ uint32_t zi_failfast;
} zinject_record_t;
#define ZINJECT_NULL 0x1
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
index 109b64e..4a4d286 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
@@ -117,31 +117,33 @@ enum zio_compress {
#define ZIO_PRIORITY_SCRUB (zio_priority_table[10])
#define ZIO_PRIORITY_TABLE_SIZE 11
-#define ZIO_FLAG_MUSTSUCCEED 0x00000
-#define ZIO_FLAG_CANFAIL 0x00001
-#define ZIO_FLAG_SPECULATIVE 0x00002
-#define ZIO_FLAG_CONFIG_WRITER 0x00004
-#define ZIO_FLAG_DONT_RETRY 0x00008
-
-#define ZIO_FLAG_DONT_CACHE 0x00010
-#define ZIO_FLAG_DONT_QUEUE 0x00020
-#define ZIO_FLAG_DONT_AGGREGATE 0x00040
-#define ZIO_FLAG_DONT_PROPAGATE 0x00080
-
-#define ZIO_FLAG_IO_BYPASS 0x00100
-#define ZIO_FLAG_IO_REPAIR 0x00200
-#define ZIO_FLAG_IO_RETRY 0x00400
-#define ZIO_FLAG_IO_REWRITE 0x00800
-
-#define ZIO_FLAG_SELF_HEAL 0x01000
-#define ZIO_FLAG_RESILVER 0x02000
-#define ZIO_FLAG_SCRUB 0x04000
-#define ZIO_FLAG_SCRUB_THREAD 0x08000
-
-#define ZIO_FLAG_PROBE 0x10000
-#define ZIO_FLAG_GANG_CHILD 0x20000
-#define ZIO_FLAG_RAW 0x40000
-#define ZIO_FLAG_GODFATHER 0x80000
+#define ZIO_FLAG_MUSTSUCCEED 0x000000
+#define ZIO_FLAG_CANFAIL 0x000001
+#define ZIO_FLAG_SPECULATIVE 0x000002
+#define ZIO_FLAG_CONFIG_WRITER 0x000004
+#define ZIO_FLAG_DONT_RETRY 0x000008
+
+#define ZIO_FLAG_DONT_CACHE 0x000010
+#define ZIO_FLAG_DONT_QUEUE 0x000020
+#define ZIO_FLAG_DONT_AGGREGATE 0x000040
+#define ZIO_FLAG_DONT_PROPAGATE 0x000080
+
+#define ZIO_FLAG_IO_BYPASS 0x000100
+#define ZIO_FLAG_IO_REPAIR 0x000200
+#define ZIO_FLAG_IO_RETRY 0x000400
+#define ZIO_FLAG_IO_REWRITE 0x000800
+
+#define ZIO_FLAG_SELF_HEAL 0x001000
+#define ZIO_FLAG_RESILVER 0x002000
+#define ZIO_FLAG_SCRUB 0x004000
+#define ZIO_FLAG_SCRUB_THREAD 0x008000
+
+#define ZIO_FLAG_PROBE 0x010000
+#define ZIO_FLAG_GANG_CHILD 0x020000
+#define ZIO_FLAG_RAW 0x040000
+#define ZIO_FLAG_GODFATHER 0x080000
+
+#define ZIO_FLAG_TRYHARD 0x100000
#define ZIO_FLAG_GANG_INHERIT \
(ZIO_FLAG_CANFAIL | \
@@ -159,7 +161,8 @@ enum zio_compress {
(ZIO_FLAG_GANG_INHERIT | \
ZIO_FLAG_IO_REPAIR | \
ZIO_FLAG_IO_RETRY | \
- ZIO_FLAG_PROBE)
+ ZIO_FLAG_PROBE | \
+ ZIO_FLAG_TRYHARD)
#define ZIO_FLAG_AGG_INHERIT \
(ZIO_FLAG_DONT_AGGREGATE | \
@@ -440,7 +443,7 @@ extern int zio_inject_list_next(int *id, char *name, size_t buflen,
struct zinject_record *record);
extern int zio_clear_fault(int id);
extern int zio_handle_fault_injection(zio_t *zio, int error);
-extern int zio_handle_device_injection(vdev_t *vd, int error);
+extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
extern int zio_handle_label_injection(zio_t *zio, int error);
#ifdef __cplusplus
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
index 82006fc..cb43af3 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
@@ -928,7 +928,7 @@ vdev_probe(vdev_t *vd, zio_t *zio)
vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE |
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE |
- ZIO_FLAG_DONT_RETRY;
+ ZIO_FLAG_TRYHARD;
if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) {
/*
@@ -1025,7 +1025,7 @@ vdev_open(vdev_t *vd)
error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift);
if (zio_injection_enabled && error == 0)
- error = zio_handle_device_injection(vd, ENXIO);
+ error = zio_handle_device_injection(vd, NULL, ENXIO);
if (error) {
if (vd->vdev_removed &&
@@ -2207,6 +2207,16 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
if (flags & ZIO_FLAG_SPECULATIVE)
return;
+ /*
+ * If this is an I/O error that is going to be retried, then ignore the
+ * error. Otherwise, the user may interpret B_FAILFAST I/O errors as
+ * hard errors, when in reality they can happen for any number of
+ * innocuous reasons (bus resets, MPxIO link failure, etc).
+ */
+ if (zio->io_error == EIO &&
+ !(zio->io_flags & ZIO_FLAG_IO_RETRY))
+ return;
+
mutex_enter(&vd->vdev_stat_lock);
if (type == ZIO_TYPE_READ && !vdev_is_dead(vd)) {
if (zio->io_error == ECKSUM)
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
index e6d5743..5db7a6a 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
@@ -401,8 +401,9 @@ vdev_disk_io_start(zio_t *zio)
bioinit(bp);
bp->b_flags = B_BUSY | B_NOCACHE |
- (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE) |
- ((zio->io_flags & ZIO_FLAG_IO_RETRY) ? 0 : B_FAILFAST);
+ (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
+ if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
+ bp->b_flags |= B_FAILFAST;
bp->b_bcount = zio->io_size;
bp->b_un.b_addr = zio->io_data;
bp->b_lblkno = lbtodb(zio->io_offset);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c
index f1f3bb0..48d5fc2 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c
@@ -339,8 +339,8 @@ vdev_label_read_config(vdev_t *vd)
nvlist_t *config = NULL;
vdev_phys_t *vp;
zio_t *zio;
- int flags =
- ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
+ int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
+ ZIO_FLAG_SPECULATIVE;
ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
@@ -349,6 +349,7 @@ vdev_label_read_config(vdev_t *vd)
vp = zio_buf_alloc(sizeof (vdev_phys_t));
+retry:
for (int l = 0; l < VDEV_LABELS; l++) {
zio = zio_root(spa, NULL, NULL, flags);
@@ -368,6 +369,11 @@ vdev_label_read_config(vdev_t *vd)
}
}
+ if (config == NULL && !(flags & ZIO_FLAG_TRYHARD)) {
+ flags |= ZIO_FLAG_TRYHARD;
+ goto retry;
+ }
+
zio_buf_free(vp, sizeof (vdev_phys_t));
return (config);
@@ -648,6 +654,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
/*
* Write everything in parallel.
*/
+retry:
zio = zio_root(spa, NULL, NULL, flags);
for (int l = 0; l < VDEV_LABELS; l++) {
@@ -674,6 +681,11 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
error = zio_wait(zio);
+ if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) {
+ flags |= ZIO_FLAG_TRYHARD;
+ goto retry;
+ }
+
nvlist_free(label);
zio_buf_free(pad2, VDEV_PAD_SIZE);
zio_buf_free(ub, VDEV_UBERBLOCK_SIZE(vd));
@@ -760,8 +772,8 @@ vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest)
{
spa_t *spa = vd->vdev_spa;
vdev_t *rvd = spa->spa_root_vdev;
- int flags =
- ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
+ int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
+ ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
if (vd == rvd) {
ASSERT(zio == NULL);
@@ -999,7 +1011,7 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags)
* at any time, you can just call it again, and it will resume its work.
*/
int
-vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg)
+vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, boolean_t tryhard)
{
spa_t *spa = svd[0]->vdev_spa;
uberblock_t *ub = &spa->spa_uberblock;
@@ -1008,6 +1020,16 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg)
int error;
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
+ /*
+ * Normally, we don't want to try too hard to write every label and
+ * uberblock. If there is a flaky disk, we don't want the rest of the
+ * sync process to block while we retry. But if we can't write a
+ * single label out, we should retry with ZIO_FLAG_TRYHARD before
+ * bailing out and declaring the pool faulted.
+ */
+ if (tryhard)
+ flags |= ZIO_FLAG_TRYHARD;
+
ASSERT(ub->ub_txg <= txg);
/*
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c
index cf2f90d..9d6cefb 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c
@@ -134,6 +134,15 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
if (zio->io_flags & ZIO_FLAG_SPECULATIVE)
return;
+ /*
+ * If this I/O is not a retry I/O, don't post an ereport.
+ * Otherwise, we risk making bad diagnoses based on B_FAILFAST
+ * I/Os.
+ */
+ if (zio->io_error == EIO &&
+ !(zio->io_flags & ZIO_FLAG_IO_RETRY))
+ return;
+
if (vd != NULL) {
/*
* If the vdev has already been marked as failing due
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
index 8ddf7cd..b477563 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
@@ -1870,7 +1870,8 @@ zio_vdev_io_done(zio_t *zio)
vdev_cache_write(zio);
if (zio_injection_enabled && zio->io_error == 0)
- zio->io_error = zio_handle_device_injection(vd, EIO);
+ zio->io_error = zio_handle_device_injection(vd,
+ zio, EIO);
if (zio_injection_enabled && zio->io_error == 0)
zio->io_error = zio_handle_label_injection(zio, EIO);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c
index b3469fd..f8e6880 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -195,7 +195,7 @@ zio_handle_label_injection(zio_t *zio, int error)
int
-zio_handle_device_injection(vdev_t *vd, int error)
+zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
{
inject_handler_t *handler;
int ret = 0;
@@ -210,6 +210,12 @@ zio_handle_device_injection(vdev_t *vd, int error)
continue;
if (vd->vdev_guid == handler->zi_record.zi_guid) {
+ if (handler->zi_record.zi_failfast &&
+ (zio == NULL || (zio->io_flags &
+ (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) {
+ continue;
+ }
+
if (handler->zi_record.zi_error == error) {
/*
* For a failed open, pretend like the device
OpenPOWER on IntegriCloud