summaryrefslogtreecommitdiffstats
path: root/sys/cddl
diff options
context:
space:
mode:
authordelphij <delphij@FreeBSD.org>2014-01-02 08:10:35 +0000
committerdelphij <delphij@FreeBSD.org>2014-01-02 08:10:35 +0000
commit37fa7d55544f4fadafd355039fdfcc7851a270b1 (patch)
treea9dd55df2744690a49f81fecb597eb0092cd576a /sys/cddl
parentf255244d0aa68ae44bd002d75471ad452862f5a4 (diff)
downloadFreeBSD-src-37fa7d55544f4fadafd355039fdfcc7851a270b1.zip
FreeBSD-src-37fa7d55544f4fadafd355039fdfcc7851a270b1.tar.gz
MFV r260155:
When we encounter an I/O error on a piece of metadata while deleting a file system or zvol, we don't update the bptree_entry_phys_t's bookmark. This would lead to double free of bp's which will lead to space map corruption. Instead of tolerating and allowing the corruption, panic immediately. See Illumos #4390 for more details. 4391 panic system rather than corrupting pool if we hit bug 4390 Illumos/illumos-gate@8b36997aa24d9817807faa4efa301ac9c07a2b78 MFC after: 2 weeks
Diffstat (limited to 'sys/cddl')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c27
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h1
5 files changed, 26 insertions, 9 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c
index f70f23c..83f3658 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c
@@ -180,6 +180,7 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
err = 0;
for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) {
bptree_entry_phys_t bte;
+ int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST;
ASSERT(!free || i == ba.ba_phys->bt_begin);
@@ -188,13 +189,13 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
if (err != 0)
break;
+ if (zfs_recover)
+ flags |= TRAVERSE_HARD;
err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp,
- bte.be_birth_txg, &bte.be_zb,
- TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST,
+ bte.be_birth_txg, &bte.be_zb, flags,
bptree_visit_cb, &ba);
if (free) {
- ASSERT(err == 0 || err == ERESTART);
- if (err != 0) {
+ if (err == ERESTART) {
/* save bookmark for future resume */
ASSERT3U(bte.be_zb.zb_objset, ==,
ZB_DESTROYED_OBJSET);
@@ -202,11 +203,21 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
dmu_write(os, obj, i * sizeof (bte),
sizeof (bte), &bte, tx);
break;
- } else {
- ba.ba_phys->bt_begin++;
- (void) dmu_free_range(os, obj,
- i * sizeof (bte), sizeof (bte), tx);
}
+ if (err != 0) {
+ /*
+ * We can not properly handle an i/o
+ * error, because the traversal code
+ * does not know how to resume from an
+ * arbitrary bookmark.
+ */
+ zfs_panic_recover("error %u from "
+ "traverse_dataset_destroyed()", err);
+ }
+
+ ba.ba_phys->bt_begin++;
+ (void) dmu_free_range(os, obj,
+ i * sizeof (bte), sizeof (bte), tx);
}
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
index 68bcac4..146aad1 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
@@ -383,7 +383,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
(void) arc_buf_remove_ref(buf, &buf);
post:
- if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) {
+ if (err == 0 && (td->td_flags & TRAVERSE_POST)) {
err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg);
if (err == ERESTART)
pause = B_TRUE;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
index 35e867d..c62be1a 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
@@ -1348,6 +1348,9 @@ dsl_scan_free_should_pause(dsl_scan_t *scn)
{
uint64_t elapsed_nanosecs;
+ if (zfs_recover)
+ return (B_FALSE);
+
elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
(NSEC2MSEC(elapsed_nanosecs) > zfs_free_min_time_ms &&
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
index 5c16233..dbcb729 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
@@ -252,6 +252,8 @@ SYSCTL_INT(_debug, OID_AUTO, zfs_flags, CTLFLAG_RWTUN, &zfs_flags, 0,
* zfs_recover can be set to nonzero to attempt to recover from
* otherwise-fatal errors, typically caused by on-disk corruption. When
* set, calls to zfs_panic_recover() will turn into warning messages.
+ * This should only be used as a last resort, as it typically results
+ * in leaked space, or worse.
*/
int zfs_recover = 0;
SYSCTL_DECL(_vfs_zfs);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h
index 36cb64a..f55251f 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h
@@ -49,6 +49,7 @@ extern "C" {
#endif
extern int zfs_flags;
+extern int zfs_recover;
#define ZFS_DEBUG_DPRINTF (1<<0)
#define ZFS_DEBUG_DBUF_VERIFY (1<<1)
OpenPOWER on IntegriCloud