From 89127898f0e9466fcbed3849d07298f745e46c95 Mon Sep 17 00:00:00 2001 From: avg Date: Mon, 17 Feb 2014 18:24:37 +0000 Subject: [PATCH] MFC r260185: MFV r260155: 4391 panic system rather than corrupting pool if we hit bug 4390 git-svn-id: svn://svn.freebsd.org/base/stable/8@262119 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- .../opensolaris/uts/common/fs/zfs/bptree.c | 27 +++++++++++++------ .../uts/common/fs/zfs/dmu_traverse.c | 2 +- .../opensolaris/uts/common/fs/zfs/dsl_scan.c | 3 +++ .../opensolaris/uts/common/fs/zfs/spa_misc.c | 2 ++ .../uts/common/fs/zfs/sys/zfs_debug.h | 1 + 5 files changed, 26 insertions(+), 9 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c index a0c90cc4d..41b166ca9 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c @@ -180,6 +180,7 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func, err = 0; for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) { bptree_entry_phys_t bte; + int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST; ASSERT(!free || i == ba.ba_phys->bt_begin); @@ -188,13 +189,13 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func, if (err != 0) break; + if (zfs_recover) + flags |= TRAVERSE_HARD; err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp, - bte.be_birth_txg, &bte.be_zb, - TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST, + bte.be_birth_txg, &bte.be_zb, flags, bptree_visit_cb, &ba); if (free) { - ASSERT(err == 0 || err == ERESTART); - if (err != 0) { + if (err == ERESTART) { /* save bookmark for future resume */ ASSERT3U(bte.be_zb.zb_objset, ==, ZB_DESTROYED_OBJSET); @@ -202,11 +203,21 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func, dmu_write(os, obj, i * sizeof (bte), sizeof (bte), &bte, tx); break; - } else { - ba.ba_phys->bt_begin++; - (void) dmu_free_range(os, obj, - i * sizeof (bte), sizeof (bte), tx); } + if (err != 0) { + /* + * We can not properly handle an i/o + * error, because the traversal code + * does not know how to resume from an + * arbitrary bookmark. + */ + zfs_panic_recover("error %u from " + "traverse_dataset_destroyed()", err); + } + + ba.ba_phys->bt_begin++; + (void) dmu_free_range(os, obj, + i * sizeof (bte), sizeof (bte), tx); } } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c index a0ca63fb5..26bd28d3e 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c @@ -361,7 +361,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, (void) arc_buf_remove_ref(buf, &buf); post: - if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) { + if (err == 0 && (td->td_flags & TRAVERSE_POST)) { err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg); if (err == ERESTART) pause = B_TRUE; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c index 5472c3ae5..7e34b0be6 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c @@ -1347,6 +1347,9 @@ dsl_scan_free_should_pause(dsl_scan_t *scn) { uint64_t elapsed_nanosecs; + if (zfs_recover) + return (B_FALSE); + elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time; return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout || (NSEC2MSEC(elapsed_nanosecs) > zfs_free_min_time_ms && diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c index 28673cf7d..593ee7495 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c @@ -252,6 +252,8 @@ SYSCTL_INT(_debug, OID_AUTO, zfs_flags, CTLFLAG_RWTUN, &zfs_flags, 0, * zfs_recover can be set to nonzero to attempt to recover from * otherwise-fatal errors, typically caused by on-disk corruption. When * set, calls to zfs_panic_recover() will turn into warning messages. + * This should only be used as a last resort, as it typically results + * in leaked space, or worse. */ int zfs_recover = 0; SYSCTL_DECL(_vfs_zfs); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h index 36cb64ae5..f55251f85 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h @@ -49,6 +49,7 @@ extern "C" { #endif extern int zfs_flags; +extern int zfs_recover; #define ZFS_DEBUG_DPRINTF (1<<0) #define ZFS_DEBUG_DBUF_VERIFY (1<<1) -- 2.45.0