From c67ecba8966d9573318136a86a05a2a7d7e95ee9 Mon Sep 17 00:00:00 2001 From: delphij Date: Tue, 15 Jul 2014 05:36:26 +0000 Subject: [PATCH] MFC r268086: MFV r267570: 4756 metaslab_group_preload() could deadlock git-svn-id: svn://svn.freebsd.org/base/stable/10@268656 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- cddl/contrib/opensolaris/cmd/ztest/ztest.c | 4 ++- .../opensolaris/uts/common/fs/zfs/metaslab.c | 25 ++++++++++++++++--- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/cddl/contrib/opensolaris/cmd/ztest/ztest.c b/cddl/contrib/opensolaris/cmd/ztest/ztest.c index c0ece9848..8f5d7ced9 100644 --- a/cddl/contrib/opensolaris/cmd/ztest/ztest.c +++ b/cddl/contrib/opensolaris/cmd/ztest/ztest.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2011, 2014 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012 Martin Matuska . All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. @@ -187,6 +187,7 @@ static const ztest_shared_opts_t ztest_opts_defaults = { extern uint64_t metaslab_gang_bang; extern uint64_t metaslab_df_alloc_threshold; extern uint64_t zfs_deadman_synctime_ms; +extern int metaslab_preload_limit; static ztest_shared_opts_t *ztest_shared_opts; static ztest_shared_opts_t ztest_opts; @@ -5596,6 +5597,7 @@ ztest_run(ztest_shared_t *zs) kernel_init(FREAD | FWRITE); VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG)); spa->spa_debug = B_TRUE; + metaslab_preload_limit = ztest_random(20) + 1; ztest_spa = spa; VERIFY0(dmu_objset_own(ztest_opts.zo_pool, diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c index 407c6eaf4..d834e83fe 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c @@ -1299,6 +1299,8 @@ metaslab_preload(void *arg) metaslab_t *msp = arg; spa_t *spa = msp->ms_group->mg_vd->vdev_spa; + ASSERT(!MUTEX_HELD(&msp->ms_group->mg_lock)); + mutex_enter(&msp->ms_lock); metaslab_load_wait(msp); if (!msp->ms_loaded) @@ -1323,19 +1325,36 @@ metaslab_group_preload(metaslab_group_t *mg) taskq_wait(mg->mg_taskq); return; } - mutex_enter(&mg->mg_lock); + mutex_enter(&mg->mg_lock); /* - * Prefetch the next potential metaslabs + * Load the next potential metaslabs */ - for (msp = avl_first(t); msp != NULL; msp = AVL_NEXT(t, msp)) { + msp = avl_first(t); + while (msp != NULL) { + metaslab_t *msp_next = AVL_NEXT(t, msp); /* If we have reached our preload limit then we're done */ if (++m > metaslab_preload_limit) break; + /* + * We must drop the metaslab group lock here to preserve + * lock ordering with the ms_lock (when grabbing both + * the mg_lock and the ms_lock, the ms_lock must be taken + * first). As a result, it is possible that the ordering + * of the metaslabs within the avl tree may change before + * we reacquire the lock. The metaslab cannot be removed from + * the tree while we're in syncing context so it is safe to + * drop the mg_lock here. If the metaslabs are reordered + * nothing will break -- we just may end up loading a + * less than optimal one. + */ + mutex_exit(&mg->mg_lock); VERIFY(taskq_dispatch(mg->mg_taskq, metaslab_preload, msp, TQ_SLEEP) != 0); + mutex_enter(&mg->mg_lock); + msp = msp_next; } mutex_exit(&mg->mg_lock); } -- 2.45.0