From 5ed46bbf4d6d6974af90514f084c262000274edf Mon Sep 17 00:00:00 2001 From: Xin LI Date: Wed, 1 Jan 2014 01:18:37 +0000 Subject: [PATCH] 4391 panic system rather than corrupting pool if we hit bug 4390 llumos/illumos-gate@8b36997aa24d9817807faa4efa301ac9c07a2b78 --- uts/common/fs/zfs/bptree.c | 27 +++++++++++++++++++-------- uts/common/fs/zfs/dmu_traverse.c | 2 +- uts/common/fs/zfs/dsl_scan.c | 3 +++ uts/common/fs/zfs/spa_misc.c | 2 ++ uts/common/fs/zfs/sys/zfs_debug.h | 1 + 5 files changed, 26 insertions(+), 9 deletions(-) diff --git a/uts/common/fs/zfs/bptree.c b/uts/common/fs/zfs/bptree.c index f70f23c698a..83f365864de 100644 --- a/uts/common/fs/zfs/bptree.c +++ b/uts/common/fs/zfs/bptree.c @@ -180,6 +180,7 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func, err = 0; for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) { bptree_entry_phys_t bte; + int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST; ASSERT(!free || i == ba.ba_phys->bt_begin); @@ -188,13 +189,13 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func, if (err != 0) break; + if (zfs_recover) + flags |= TRAVERSE_HARD; err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp, - bte.be_birth_txg, &bte.be_zb, - TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST, + bte.be_birth_txg, &bte.be_zb, flags, bptree_visit_cb, &ba); if (free) { - ASSERT(err == 0 || err == ERESTART); - if (err != 0) { + if (err == ERESTART) { /* save bookmark for future resume */ ASSERT3U(bte.be_zb.zb_objset, ==, ZB_DESTROYED_OBJSET); @@ -202,11 +203,21 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func, dmu_write(os, obj, i * sizeof (bte), sizeof (bte), &bte, tx); break; - } else { - ba.ba_phys->bt_begin++; - (void) dmu_free_range(os, obj, - i * sizeof (bte), sizeof (bte), tx); } + if (err != 0) { + /* + * We can not properly handle an i/o + * error, because the traversal code + * does not know how to resume from an + * arbitrary bookmark. + */ + zfs_panic_recover("error %u from " + "traverse_dataset_destroyed()", err); + } + + ba.ba_phys->bt_begin++; + (void) dmu_free_range(os, obj, + i * sizeof (bte), sizeof (bte), tx); } } diff --git a/uts/common/fs/zfs/dmu_traverse.c b/uts/common/fs/zfs/dmu_traverse.c index 68bcac48844..146aad1fbc0 100644 --- a/uts/common/fs/zfs/dmu_traverse.c +++ b/uts/common/fs/zfs/dmu_traverse.c @@ -383,7 +383,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, (void) arc_buf_remove_ref(buf, &buf); post: - if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) { + if (err == 0 && (td->td_flags & TRAVERSE_POST)) { err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg); if (err == ERESTART) pause = B_TRUE; diff --git a/uts/common/fs/zfs/dsl_scan.c b/uts/common/fs/zfs/dsl_scan.c index ee8f65cf7a0..f5e2ea825d4 100644 --- a/uts/common/fs/zfs/dsl_scan.c +++ b/uts/common/fs/zfs/dsl_scan.c @@ -1317,6 +1317,9 @@ dsl_scan_free_should_pause(dsl_scan_t *scn) { uint64_t elapsed_nanosecs; + if (zfs_recover) + return (B_FALSE); + elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time; return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout || (NSEC2MSEC(elapsed_nanosecs) > zfs_free_min_time_ms && diff --git a/uts/common/fs/zfs/spa_misc.c b/uts/common/fs/zfs/spa_misc.c index 27ea88cbb6a..fa0658493ea 100644 --- a/uts/common/fs/zfs/spa_misc.c +++ b/uts/common/fs/zfs/spa_misc.c @@ -247,6 +247,8 @@ int zfs_flags = 0; * zfs_recover can be set to nonzero to attempt to recover from * otherwise-fatal errors, typically caused by on-disk corruption. When * set, calls to zfs_panic_recover() will turn into warning messages. + * This should only be used as a last resort, as it typically results + * in leaked space, or worse. */ int zfs_recover = 0; diff --git a/uts/common/fs/zfs/sys/zfs_debug.h b/uts/common/fs/zfs/sys/zfs_debug.h index c4dcfaec651..e6a1fa28287 100644 --- a/uts/common/fs/zfs/sys/zfs_debug.h +++ b/uts/common/fs/zfs/sys/zfs_debug.h @@ -47,6 +47,7 @@ extern "C" { #endif extern int zfs_flags; +extern int zfs_recover; #define ZFS_DEBUG_DPRINTF (1<<0) #define ZFS_DEBUG_DBUF_VERIFY (1<<1) -- 2.45.0