summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authormm <mm@FreeBSD.org>2010-09-27 09:05:51 +0000
committermm <mm@FreeBSD.org>2010-09-27 09:05:51 +0000
commit06d7ad088338844d37a5a4328b1c4bb5de1a68a2 (patch)
tree51bc74c1fcdcb6583ce0325e215ee1938ced28f3 /sys
parent776968f31350da9df36c96599691f3e375ccfba4 (diff)
downloadFreeBSD-src-06d7ad088338844d37a5a4328b1c4bb5de1a68a2.zip
FreeBSD-src-06d7ad088338844d37a5a4328b1c4bb5de1a68a2.tar.gz
Enable offlining of log devices.
OpenSolaris revision and Bug IDs: 9701:cc5b64682e64 6803605 should be able to offline log devices 6726045 vdev_deflate_ratio is not set when offlining a log device 6599442 zpool import has faults in the display Approved by: delphij (mentor) Obtained from: OpenSolaris (Bug ID 6803605, 6726045, 6599442) MFC after: 3 weeks
Diffstat (limited to 'sys')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c36
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h1
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c105
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c76
6 files changed, 147 insertions, 75 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c
index d11f106..50cc069 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c
@@ -351,7 +351,7 @@ traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
return;
/*
- * One block ("stumpy") can be allocated a long time ago; we
+ * One block ("stubby") can be allocated a long time ago; we
* want to visit that one because it has been allocated
* (on-disk) even if it hasn't been claimed (even though for
* plain scrub there's nothing to do to it).
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
index da030f1..32caec6 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
@@ -1110,6 +1110,33 @@ spa_check_removed(vdev_t *vd)
}
/*
+ * Load the slog device state from the config object since it's possible
+ * that the label does not contain the most up-to-date information.
+ */
+void
+spa_load_log_state(spa_t *spa)
+{
+ nvlist_t *nv, *nvroot, **child;
+ uint64_t is_log;
+ uint_t children, c;
+ vdev_t *rvd = spa->spa_root_vdev;
+
+ VERIFY(load_nvlist(spa, spa->spa_config_object, &nv) == 0);
+ VERIFY(nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+ VERIFY(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) == 0);
+
+ for (c = 0; c < children; c++) {
+ vdev_t *tvd = rvd->vdev_child[c];
+
+ if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+ &is_log) == 0 && is_log)
+ vdev_load_log_state(tvd, child[c]);
+ }
+ nvlist_free(nv);
+}
+
+/*
* Check for missing log devices
*/
int
@@ -1125,13 +1152,7 @@ spa_check_logs(spa_t *spa)
return (1);
}
break;
-
- case SPA_LOG_CLEAR:
- (void) dmu_objset_find(spa->spa_name, zil_clear_log_chain, NULL,
- DS_FIND_CHILDREN);
- break;
}
- spa->spa_log_state = SPA_LOG_GOOD;
return (0);
}
@@ -1455,6 +1476,8 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
spa_config_exit(spa, SCL_ALL, FTAG);
}
+ spa_load_log_state(spa);
+
if (spa_check_logs(spa)) {
vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_BAD_LOG);
@@ -1542,6 +1565,7 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
zil_claim, tx, DS_FIND_CHILDREN);
dmu_tx_commit(tx);
+ spa->spa_log_state = SPA_LOG_GOOD;
spa->spa_sync_on = B_TRUE;
txg_sync_start(spa->spa_dsl_pool);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
index 1406d15..93e4102 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
@@ -259,6 +259,7 @@ extern void vdev_remove_parent(vdev_t *cvd);
/*
* vdev sync load and sync
*/
+extern void vdev_load_log_state(vdev_t *vd, nvlist_t *nv);
extern void vdev_load(vdev_t *vd);
extern void vdev_sync(vdev_t *vd, uint64_t txg);
extern void vdev_sync_done(vdev_t *vd, uint64_t txg);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
index e992f6a..efbf65e 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
@@ -366,9 +366,9 @@ extern uint64_t zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx);
extern void zil_commit(zilog_t *zilog, uint64_t seq, uint64_t oid);
+extern int zil_vdev_offline(char *osname, void *txarg);
extern int zil_claim(char *osname, void *txarg);
extern int zil_check_log_chain(char *osname, void *txarg);
-extern int zil_clear_log_chain(char *osname, void *txarg);
extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx);
extern void zil_clean(zilog_t *zilog);
extern int zil_is_committed(zilog_t *zilog);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
index 140deed..82006fc 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
@@ -39,6 +39,7 @@
#include <sys/zap.h>
#include <sys/fs/zfs.h>
#include <sys/arc.h>
+#include <sys/zil.h>
SYSCTL_DECL(_vfs_zfs);
SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV");
@@ -765,6 +766,15 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
if (vd->vdev_ms_shift == 0) /* not being allocated from yet */
return (0);
+ /*
+ * Compute the raidz-deflation ratio. Note, we hard-code
+ * in 128k (1 << 17) because it is the current "typical" blocksize.
+ * Even if SPA_MAXBLOCKSIZE changes, this algorithm must never change,
+ * or we will inconsistently account for existing bp's.
+ */
+ vd->vdev_deflate_ratio = (1 << 17) /
+ (vdev_psize_to_asize(vd, 1 << 17) >> SPA_MINBLOCKSHIFT);
+
ASSERT(oldc <= newc);
if (vd->vdev_islog)
@@ -998,6 +1008,8 @@ vdev_open(vdev_t *vd)
vd->vdev_state == VDEV_STATE_OFFLINE);
vd->vdev_stat.vs_aux = VDEV_AUX_NONE;
+ vd->vdev_cant_read = B_FALSE;
+ vd->vdev_cant_write = B_FALSE;
if (!vd->vdev_removed && vd->vdev_faulted) {
ASSERT(vd->vdev_children == 0);
@@ -1113,18 +1125,6 @@ vdev_open(vdev_t *vd)
}
/*
- * If this is a top-level vdev, compute the raidz-deflation
- * ratio. Note, we hard-code in 128k (1<<17) because it is the
- * current "typical" blocksize. Even if SPA_MAXBLOCKSIZE
- * changes, this algorithm must never change, or we will
- * inconsistently account for existing bp's.
- */
- if (vd->vdev_top == vd) {
- vd->vdev_deflate_ratio = (1<<17) /
- (vdev_psize_to_asize(vd, 1<<17) >> SPA_MINBLOCKSHIFT);
- }
-
- /*
* If a leaf vdev has a DTL, and seems healthy, then kick off a
* resilver. But don't do this if we are doing a reopen for a scrub,
* since this would just restart the scrub we are already doing.
@@ -1937,7 +1937,8 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
int
vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags)
{
- vdev_t *vd;
+ vdev_t *vd, *tvd;
+ int error;
spa_vdev_state_enter(spa);
@@ -1947,34 +1948,58 @@ vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags)
if (!vd->vdev_ops->vdev_op_leaf)
return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
+ tvd = vd->vdev_top;
+
/*
* If the device isn't already offline, try to offline it.
*/
if (!vd->vdev_offline) {
/*
* If this device has the only valid copy of some data,
- * don't allow it to be offlined.
+ * don't allow it to be offlined. Log devices are always
+ * expendable.
*/
- if (vd->vdev_aux == NULL && vdev_dtl_required(vd))
+ if (!tvd->vdev_islog && vd->vdev_aux == NULL &&
+ vdev_dtl_required(vd))
return (spa_vdev_state_exit(spa, NULL, EBUSY));
/*
* Offline this device and reopen its top-level vdev.
- * If this action results in the top-level vdev becoming
- * unusable, undo it and fail the request.
+ * If the top-level vdev is a log device then just offline
+ * it. Otherwise, if this action results in the top-level
+ * vdev becoming unusable, undo it and fail the request.
*/
vd->vdev_offline = B_TRUE;
- vdev_reopen(vd->vdev_top);
- if (vd->vdev_aux == NULL && vdev_is_dead(vd->vdev_top)) {
+ vdev_reopen(tvd);
+
+ if (!tvd->vdev_islog && vd->vdev_aux == NULL &&
+ vdev_is_dead(tvd)) {
vd->vdev_offline = B_FALSE;
- vdev_reopen(vd->vdev_top);
+ vdev_reopen(tvd);
return (spa_vdev_state_exit(spa, NULL, EBUSY));
}
}
vd->vdev_tmpoffline = !!(flags & ZFS_OFFLINE_TEMPORARY);
- return (spa_vdev_state_exit(spa, vd, 0));
+ if (!tvd->vdev_islog || !vdev_is_dead(tvd))
+ return (spa_vdev_state_exit(spa, vd, 0));
+
+ (void) spa_vdev_state_exit(spa, vd, 0);
+
+ error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
+ NULL, DS_FIND_CHILDREN);
+ if (error) {
+ (void) vdev_online(spa, guid, 0, NULL);
+ return (error);
+ }
+ /*
+ * If we successfully offlined the log device then we need to
+ * sync out the current txg so that the "stubby" block can be
+ * removed by zil_sync().
+ */
+ txg_wait_synced(spa->spa_dsl_pool, 0);
+ return (0);
}
/*
@@ -2279,6 +2304,7 @@ vdev_space_update(vdev_t *vd, int64_t space_delta, int64_t alloc_delta,
* childrens', thus not accurate enough for us.
*/
ASSERT((dspace_delta & (SPA_MINBLOCKSIZE-1)) == 0);
+ ASSERT(vd->vdev_deflate_ratio != 0 || vd->vdev_isl2cache);
dspace_delta = (dspace_delta >> SPA_MINBLOCKSHIFT) *
vd->vdev_deflate_ratio;
@@ -2631,11 +2657,7 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux)
boolean_t
vdev_is_bootable(vdev_t *vd)
{
-#ifdef __FreeBSD_version
- return (B_TRUE);
-#else
- int c;
-
+#ifdef sun
if (!vd->vdev_ops->vdev_op_leaf) {
char *vdev_type = vd->vdev_ops->vdev_op_type;
@@ -2654,6 +2676,35 @@ vdev_is_bootable(vdev_t *vd)
if (!vdev_is_bootable(vd->vdev_child[c]))
return (B_FALSE);
}
+#endif /* sun */
return (B_TRUE);
-#endif
+}
+
+void
+vdev_load_log_state(vdev_t *vd, nvlist_t *nv)
+{
+ uint_t c, children;
+ nvlist_t **child;
+ uint64_t val;
+ spa_t *spa = vd->vdev_spa;
+
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) == 0) {
+ for (c = 0; c < children; c++)
+ vdev_load_log_state(vd->vdev_child[c], child[c]);
+ }
+
+ if (vd->vdev_ops->vdev_op_leaf && nvlist_lookup_uint64(nv,
+ ZPOOL_CONFIG_OFFLINE, &val) == 0 && val) {
+
+ /*
+ * It would be nice to call vdev_offline()
+ * directly but the pool isn't fully loaded and
+ * the txg threads have not been started yet.
+ */
+ spa_config_enter(spa, SCL_STATE_ALL, FTAG, RW_WRITER);
+ vd->vdev_offline = val;
+ vdev_reopen(vd->vdev_top);
+ spa_config_exit(spa, SCL_STATE_ALL, FTAG);
+ }
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
index 7839713..1dd8ea0 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
@@ -25,6 +25,7 @@
#include <sys/zfs_context.h>
#include <sys/spa.h>
+#include <sys/spa_impl.h>
#include <sys/dmu.h>
#include <sys/zap.h>
#include <sys/arc.h>
@@ -515,6 +516,13 @@ zil_claim(char *osname, void *txarg)
zilog = dmu_objset_zil(os);
zh = zil_header_in_syncing_context(zilog);
+ if (zilog->zl_spa->spa_log_state == SPA_LOG_CLEAR) {
+ if (!BP_IS_HOLE(&zh->zh_log))
+ zio_free_blk(zilog->zl_spa, &zh->zh_log, first_txg);
+ BP_ZERO(&zh->zh_log);
+ dsl_dataset_dirty(dmu_objset_ds(os), tx);
+ }
+
/*
* Record here whether the zil has any records to replay.
* If the header block pointer is null or the block points
@@ -527,8 +535,10 @@ zil_claim(char *osname, void *txarg)
* Note, the intent log can be empty but still need the
* stubby to be claimed.
*/
- if (!zil_empty(zilog))
+ if (!zil_empty(zilog)) {
zh->zh_flags |= ZIL_REPLAY_NEEDED;
+ dsl_dataset_dirty(dmu_objset_ds(os), tx);
+ }
/*
* Claim all log blocks if we haven't already done so, and remember
@@ -597,36 +607,6 @@ zil_check_log_chain(char *osname, void *txarg)
return (error);
}
-/*
- * Clear a log chain
- */
-/* ARGSUSED */
-int
-zil_clear_log_chain(char *osname, void *txarg)
-{
- zilog_t *zilog;
- zil_header_t *zh;
- objset_t *os;
- dmu_tx_t *tx;
- int error;
-
- error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
- if (error) {
- cmn_err(CE_WARN, "can't open objset for %s", osname);
- return (0);
- }
-
- zilog = dmu_objset_zil(os);
- tx = dmu_tx_create(zilog->zl_os);
- (void) dmu_tx_assign(tx, TXG_WAIT);
- zh = zil_header_in_syncing_context(zilog);
- BP_ZERO(&zh->zh_log);
- dsl_dataset_dirty(dmu_objset_ds(os), tx);
- dmu_tx_commit(tx);
- dmu_objset_close(os);
- return (0);
-}
-
static int
zil_vdev_compare(const void *x1, const void *x2)
{
@@ -771,9 +751,9 @@ zil_lwb_write_init(zilog_t *zilog, lwb_t *lwb)
}
if (lwb->lwb_zio == NULL) {
lwb->lwb_zio = zio_rewrite(zilog->zl_root_zio, zilog->zl_spa,
- 0, &lwb->lwb_blk, lwb->lwb_buf,
- lwb->lwb_sz, zil_lwb_write_done, lwb,
- ZIO_PRIORITY_LOG_WRITE, ZIO_FLAG_CANFAIL, &zb);
+ 0, &lwb->lwb_blk, lwb->lwb_buf, lwb->lwb_sz,
+ zil_lwb_write_done, lwb, ZIO_PRIORITY_LOG_WRITE,
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &zb);
}
}
@@ -1270,12 +1250,7 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
}
}
- for (;;) {
- lwb = list_head(&zilog->zl_lwb_list);
- if (lwb == NULL) {
- mutex_exit(&zilog->zl_lock);
- return;
- }
+ while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
zh->zh_log = lwb->lwb_blk;
if (lwb->lwb_buf != NULL || lwb->lwb_max_txg > txg)
break;
@@ -1692,3 +1667,24 @@ out:
mutex_exit(&zilog->zl_lock);
return (ret);
}
+
+/* ARGSUSED */
+int
+zil_vdev_offline(char *osname, void *arg)
+{
+ objset_t *os;
+ zilog_t *zilog;
+ int error;
+
+ error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
+ if (error)
+ return (error);
+
+ zilog = dmu_objset_zil(os);
+ if (zil_suspend(zilog) != 0)
+ error = EEXIST;
+ else
+ zil_resume(zilog);
+ dmu_objset_close(os);
+ return (error);
+}
OpenPOWER on IntegriCloud