summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2008-11-17 20:49:29 +0000
committerpjd <pjd@FreeBSD.org>2008-11-17 20:49:29 +0000
commitbbe899b96e388a8b82439f81ed3707e0d9c6070d (patch)
tree81b89fa4ac6467771d5aa291a97f4665981a6108 /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c
parentd2f579595c362ce27b4d87e2c40e1c4e09b929e3 (diff)
downloadFreeBSD-src-bbe899b96e388a8b82439f81ed3707e0d9c6070d.zip
FreeBSD-src-bbe899b96e388a8b82439f81ed3707e0d9c6070d.tar.gz
Update ZFS from version 6 to 13 and bring some FreeBSD-specific changes.
This bring huge amount of changes, I'll enumerate only user-visible changes: - Delegated Administration Allows regular users to perform ZFS operations, like file system creation, snapshot creation, etc. - L2ARC Level 2 cache for ZFS - allows to use additional disks for cache. Huge performance improvements mostly for random read of mostly static content. - slog Allow to use additional disks for ZFS Intent Log to speed up operations like fsync(2). - vfs.zfs.super_owner Allows regular users to perform privileged operations on files stored on ZFS file systems owned by him. Very careful with this one. - chflags(2) Not all the flags are supported. This still needs work. - ZFSBoot Support to boot off of ZFS pool. Not finished, AFAIK. Submitted by: dfr - Snapshot properties - New failure modes Before if write requested failed, system paniced. Now one can select from one of three failure modes: - panic - panic on write error - wait - wait for disk to reappear - continue - serve read requests if possible, block write requests - Refquota, refreservation properties Just quota and reservation properties, but don't count space consumed by children file systems, clones and snapshots. - Sparse volumes ZVOLs that don't reserve space in the pool. - External attributes Compatible with extattr(2). - NFSv4-ACLs Not sure about the status, might not be complete yet. Submitted by: trasz - Creation-time properties - Regression tests for zpool(8) command. Obtained from: OpenSolaris
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c120
1 files changed, 77 insertions, 43 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c
index e2385a0..17e4b0a 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/vdev.h>
@@ -53,7 +51,7 @@ extern void devctl_notify(const char *__system, const char *__subsystem,
* pool X
*
* If we are in a loading state, all errors are chained together by the same
- * SPA-wide ENA.
+ * SPA-wide ENA (Error Numeric Association).
*
* For isolated I/O requests, we get the ENA from the zio_t. The propagation
* gets very complicated due to RAID-Z, gang blocks, and vdev caching. We want
@@ -90,11 +88,10 @@ extern void devctl_notify(const char *__system, const char *__subsystem,
* We keep track of the ENA for a ZIO chain through the 'io_logical' member.
* When a new logical I/O is issued, we set this to point to itself. Child I/Os
* then inherit this pointer, so that when it is first set subsequent failures
- * will use the same ENA. If a physical I/O is issued (by passing the
- * ZIO_FLAG_NOBOOKMARK flag), then this pointer is reset, guaranteeing that a
- * unique ENA will be generated. For an aggregate I/O, this pointer is set to
- * NULL, and no ereport will be generated (since it doesn't actually correspond
- * to any particular device or piece of data).
+ * will use the same ENA. For vdev cache fill and queue aggregation I/O,
+ * this pointer is set to NULL, and no ereport will be generated (since it
+ * doesn't actually correspond to any particular device or piece of data,
+ * and the caller will always retry without caching or queueing anyway).
*/
void
zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
@@ -104,6 +101,7 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
char buf[1024];
struct sbuf sb;
struct timespec ts;
+ int state;
/*
* If we are doing a spa_tryimport(), ignore errors.
@@ -120,21 +118,33 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
spa->spa_last_open_failed)
return;
- /*
- * Ignore any errors from I/Os that we are going to retry anyway - we
- * only generate errors from the final failure.
- */
- if (zio && zio_should_retry(zio))
- return;
+ if (zio != NULL) {
+ /*
+ * If this is not a read or write zio, ignore the error. This
+ * can occur if the DKIOCFLUSHWRITECACHE ioctl fails.
+ */
+ if (zio->io_type != ZIO_TYPE_READ &&
+ zio->io_type != ZIO_TYPE_WRITE)
+ return;
- /*
- * If this is not a read or write zio, ignore the error. This can occur
- * if the DKIOCFLUSHWRITECACHE ioctl fails.
- */
- if (zio && zio->io_type != ZIO_TYPE_READ &&
- zio->io_type != ZIO_TYPE_WRITE)
- return;
+ /*
+ * Ignore any errors from speculative I/Os, as failure is an
+ * expected result.
+ */
+ if (zio->io_flags & ZIO_FLAG_SPECULATIVE)
+ return;
+ /*
+ * If the vdev has already been marked as failing due to a
+ * failed probe, then ignore any subsequent I/O errors, as the
+ * DE will automatically fault the vdev on the first such
+ * failure.
+ */
+ if (vd != NULL &&
+ (!vdev_readable(vd) || !vdev_writeable(vd)) &&
+ strcmp(subclass, FM_EREPORT_ZFS_PROBE_FAILURE) != 0)
+ return;
+ }
nanotime(&ts);
sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
@@ -187,22 +197,28 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
*/
/*
+ * If we are importing a faulted pool, then we treat it like an open,
+ * not an import. Otherwise, the DE will ignore all faults during
+ * import, since the default behavior is to mark the devices as
+ * persistently unavailable, not leave them in the faulted state.
+ */
+ state = spa->spa_import_faulted ? SPA_LOAD_OPEN : spa->spa_load_state;
+
+ /*
* Generic payload members common to all ereports.
- *
- * The direct reference to spa_name is used rather than spa_name()
- * because of the asynchronous nature of the zio pipeline. spa_name()
- * asserts that the config lock is held in some form. This is always
- * the case in I/O context, but because the check for RW_WRITER compares
- * against 'curthread', we may be in an asynchronous context and blow
- * this assert. Rather than loosen this assert, we acknowledge that all
- * contexts in which this function is called (pool open, I/O) are safe,
- * and dereference the name directly.
*/
- sbuf_printf(&sb, " %s=%s", FM_EREPORT_PAYLOAD_ZFS_POOL, spa->spa_name);
+ sbuf_printf(&sb, " %s=%s", FM_EREPORT_PAYLOAD_ZFS_POOL, spa_name(spa));
sbuf_printf(&sb, " %s=%ju", FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
spa_guid(spa));
- sbuf_printf(&sb, " %s=%u", FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT,
- spa->spa_load_state);
+ sbuf_printf(&sb, " %s=%d", FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, state);
+
+ if (spa != NULL) {
+ sbuf_printf(&sb, " %s=%s", FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE,
+ spa_get_failmode(spa) == ZIO_FAILURE_MODE_WAIT ?
+ FM_EREPORT_FAILMODE_WAIT :
+ spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE ?
+ FM_EREPORT_FAILMODE_CONTINUE : FM_EREPORT_FAILMODE_PANIC);
+ }
if (vd != NULL) {
vdev_t *pvd = vd->vdev_parent;
@@ -290,7 +306,6 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
mutex_exit(&spa->spa_errlist_lock);
sbuf_finish(&sb);
- ZFS_LOG(1, "%s", sbuf_data(&sb));
devctl_notify("ZFS", spa->spa_name, subclass, sbuf_data(&sb));
if (sbuf_overflowed(&sb))
printf("ZFS WARNING: sbuf overflowed\n");
@@ -298,13 +313,8 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
#endif
}
-/*
- * The 'resource.fs.zfs.ok' event is an internal signal that the associated
- * resource (pool or disk) has been identified by ZFS as healthy. This will
- * then trigger the DE to close the associated case, if any.
- */
-void
-zfs_post_ok(spa_t *spa, vdev_t *vd)
+static void
+zfs_post_common(spa_t *spa, vdev_t *vd, const char *name)
{
#ifdef _KERNEL
char buf[1024];
@@ -318,7 +328,7 @@ zfs_post_ok(spa_t *spa, vdev_t *vd)
sbuf_printf(&sb, "time=%ju.%ld", (uintmax_t)ts.tv_sec, ts.tv_nsec);
snprintf(class, sizeof(class), "%s.%s.%s", FM_RSRC_RESOURCE,
- ZFS_ERROR_CLASS, FM_RESOURCE_OK);
+ ZFS_ERROR_CLASS, name);
sbuf_printf(&sb, " %s=%hhu", FM_VERSION, FM_RSRC_VERSION);
sbuf_printf(&sb, " %s=%s", FM_CLASS, class);
sbuf_printf(&sb, " %s=%ju", FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
@@ -327,9 +337,33 @@ zfs_post_ok(spa_t *spa, vdev_t *vd)
sbuf_printf(&sb, " %s=%ju", FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
vd->vdev_guid);
sbuf_finish(&sb);
+ ZFS_LOG(1, "%s", sbuf_data(&sb));
devctl_notify("ZFS", spa->spa_name, class, sbuf_data(&sb));
if (sbuf_overflowed(&sb))
printf("ZFS WARNING: sbuf overflowed\n");
sbuf_delete(&sb);
#endif
}
+
+/*
+ * The 'resource.fs.zfs.removed' event is an internal signal that the given vdev
+ * has been removed from the system. This will cause the DE to ignore any
+ * recent I/O errors, inferring that they are due to the asynchronous device
+ * removal.
+ */
+void
+zfs_post_remove(spa_t *spa, vdev_t *vd)
+{
+ zfs_post_common(spa, vd, FM_RESOURCE_REMOVED);
+}
+
+/*
+ * The 'resource.fs.zfs.autoreplace' event is an internal signal that the pool
+ * has the 'autoreplace' property set, and therefore any broken vdevs will be
+ * handled by higher level logic, and no vdev fault should be generated.
+ */
+void
+zfs_post_autoreplace(spa_t *spa, vdev_t *vd)
+{
+ zfs_post_common(spa, vd, FM_RESOURCE_AUTOREPLACE);
+}
OpenPOWER on IntegriCloud