From 13cb1e663613d1f1c3bd34240c0ac5703b2bff01 Mon Sep 17 00:00:00 2001 From: delphij Date: Fri, 5 Jul 2013 04:08:45 +0000 Subject: [PATCH] MFC r251646 + r252219: MFV r251644: Poor ZFS send / receive performance due to snapshot hold / release processing (by smh@) Illumos ZFS issues: 3740 Poor ZFS send / receive performance due to snapshot hold / release processing MFV r252215: Restore a previous behavior before r251646, where when destructing ZFS snapshot, the ioctl would return ENOENT when it hit any of them in the errlist (the new behavior was only return ENOENT when all returns error). Illumos ZFS issues: 3829 fix for 3740 changed behavior of zfs destroy/hold/release ioctl git-svn-id: svn://svn.freebsd.org/base/stable/9@252764 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- cddl/contrib/opensolaris/cmd/zfs/zfs_main.c | 4 +- cddl/contrib/opensolaris/cmd/zhack/zhack.c | 3 +- cddl/contrib/opensolaris/cmd/ztest/ztest.c | 3 +- .../opensolaris/lib/libzfs/common/libzfs.h | 4 +- .../lib/libzfs/common/libzfs_dataset.c | 116 ++-- .../lib/libzfs/common/libzfs_sendrecv.c | 146 ++--- .../lib/libzfs_core/common/libzfs_core.c | 34 +- .../uts/common/fs/zfs/dsl_destroy.c | 2 + .../opensolaris/uts/common/fs/zfs/dsl_pool.c | 18 +- .../uts/common/fs/zfs/dsl_userhold.c | 583 +++++++++++------- .../uts/common/fs/zfs/sys/dsl_dataset.h | 3 +- .../uts/common/fs/zfs/sys/dsl_userhold.h | 4 +- .../opensolaris/uts/common/fs/zfs/zfs_ioctl.c | 15 +- 13 files changed, 543 insertions(+), 392 deletions(-) diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c index 97ae0bdd5..3baf47ae0 100644 --- a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c +++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c @@ -28,6 +28,7 @@ * Copyright (c) 2011-2012 Pawel Jakub Dawidek . * All rights reserved. * Copyright (c) 2012 Martin Matuska . All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include @@ -5222,8 +5223,7 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding) continue; } if (holding) { - if (zfs_hold(zhp, delim+1, tag, recursive, - B_FALSE, -1) != 0) + if (zfs_hold(zhp, delim+1, tag, recursive, -1) != 0) ++errors; } else { if (zfs_release(zhp, delim+1, tag, recursive) != 0) diff --git a/cddl/contrib/opensolaris/cmd/zhack/zhack.c b/cddl/contrib/opensolaris/cmd/zhack/zhack.c index d80b3a013..1eb87136f 100644 --- a/cddl/contrib/opensolaris/cmd/zhack/zhack.c +++ b/cddl/contrib/opensolaris/cmd/zhack/zhack.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ /* @@ -153,7 +154,7 @@ import_pool(const char *target, boolean_t readonly) g_importargs.poolname = g_pool; pools = zpool_search_import(g_zfs, &g_importargs); - if (pools == NULL || nvlist_next_nvpair(pools, NULL) == NULL) { + if (nvlist_empty(pools)) { if (!g_importargs.can_be_active) { g_importargs.can_be_active = B_TRUE; if (zpool_search_import(g_zfs, &g_importargs) != NULL || diff --git a/cddl/contrib/opensolaris/cmd/ztest/ztest.c b/cddl/contrib/opensolaris/cmd/ztest/ztest.c index 94a5bd084..eabdd4ed2 100644 --- a/cddl/contrib/opensolaris/cmd/ztest/ztest.c +++ b/cddl/contrib/opensolaris/cmd/ztest/ztest.c @@ -23,6 +23,7 @@ * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012 Martin Matuska . All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ /* @@ -4713,7 +4714,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) error = user_release_one(fullname, tag); if (error) - fatal(0, "user_release_one(%s)", fullname, tag); + fatal(0, "user_release_one(%s, %s) = %d", fullname, tag, error); VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT); diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h index 274b47b55..981e9bd9e 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h @@ -27,6 +27,7 @@ * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2012 Martin Matuska . All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #ifndef _LIBZFS_H @@ -611,7 +612,8 @@ extern int zfs_send(zfs_handle_t *, const char *, const char *, extern int zfs_promote(zfs_handle_t *); extern int zfs_hold(zfs_handle_t *, const char *, const char *, - boolean_t, boolean_t, int); + boolean_t, int); +extern int zfs_hold_nvl(zfs_handle_t *, int, nvlist_t *); extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t); extern int zfs_get_holds(zfs_handle_t *, nvlist_t **); extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *); diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c index b06a78ded..7ffbf56bb 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c @@ -21,12 +21,13 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2012 DEY Storage Systems, Inc. All rights reserved. + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Pawel Jakub Dawidek . * All rights reserved. * Copyright (c) 2012 Martin Matuska . All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include @@ -3158,18 +3159,14 @@ static int zfs_check_snap_cb(zfs_handle_t *zhp, void *arg) { struct destroydata *dd = arg; - zfs_handle_t *szhp; char name[ZFS_MAXNAMELEN]; int rv = 0; (void) snprintf(name, sizeof (name), "%s@%s", zhp->zfs_name, dd->snapname); - szhp = make_dataset_handle(zhp->zfs_hdl, name); - if (szhp) { + if (lzc_exists(name)) verify(nvlist_add_boolean(dd->nvl, name) == 0); - zfs_close(szhp); - } rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, dd); zfs_close(zhp); @@ -3189,7 +3186,7 @@ zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer) verify(nvlist_alloc(&dd.nvl, NV_UNIQUE_NAME, 0) == 0); (void) zfs_check_snap_cb(zfs_handle_dup(zhp), &dd); - if (nvlist_next_nvpair(dd.nvl, NULL) == NULL) { + if (nvlist_empty(dd.nvl)) { ret = zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT, dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"), zhp->zfs_name, snapname); @@ -3214,7 +3211,7 @@ zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer) if (ret == 0) return (0); - if (nvlist_next_nvpair(errlist, NULL) == NULL) { + if (nvlist_empty(errlist)) { char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot destroy snapshots")); @@ -4162,24 +4159,21 @@ struct holdarg { const char *snapname; const char *tag; boolean_t recursive; + int error; }; static int zfs_hold_one(zfs_handle_t *zhp, void *arg) { struct holdarg *ha = arg; - zfs_handle_t *szhp; char name[ZFS_MAXNAMELEN]; int rv = 0; (void) snprintf(name, sizeof (name), "%s@%s", zhp->zfs_name, ha->snapname); - szhp = make_dataset_handle(zhp->zfs_hdl, name); - if (szhp) { + if (lzc_exists(name)) fnvlist_add_string(ha->nvl, name, ha->tag); - zfs_close(szhp); - } if (ha->recursive) rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha); @@ -4189,14 +4183,10 @@ zfs_hold_one(zfs_handle_t *zhp, void *arg) int zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, - boolean_t recursive, boolean_t enoent_ok, int cleanup_fd) + boolean_t recursive, int cleanup_fd) { int ret; struct holdarg ha; - nvlist_t *errors; - libzfs_handle_t *hdl = zhp->zfs_hdl; - char errbuf[1024]; - nvpair_t *elem; ha.nvl = fnvlist_alloc(); ha.snapname = snapname; @@ -4204,26 +4194,44 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, ha.recursive = recursive; (void) zfs_hold_one(zfs_handle_dup(zhp), &ha); - if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) { + if (nvlist_empty(ha.nvl)) { + char errbuf[1024]; + fnvlist_free(ha.nvl); ret = ENOENT; - if (!enoent_ok) { - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, - "cannot hold snapshot '%s@%s'"), - zhp->zfs_name, snapname); - (void) zfs_standard_error(hdl, ret, errbuf); - } + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot hold snapshot '%s@%s'"), + zhp->zfs_name, snapname); + (void) zfs_standard_error(zhp->zfs_hdl, ret, errbuf); return (ret); } - ret = lzc_hold(ha.nvl, cleanup_fd, &errors); + ret = zfs_hold_nvl(zhp, cleanup_fd, ha.nvl); fnvlist_free(ha.nvl); - if (ret == 0) + return (ret); +} + +int +zfs_hold_nvl(zfs_handle_t *zhp, int cleanup_fd, nvlist_t *holds) +{ + int ret; + nvlist_t *errors; + libzfs_handle_t *hdl = zhp->zfs_hdl; + char errbuf[1024]; + nvpair_t *elem; + + errors = NULL; + ret = lzc_hold(holds, cleanup_fd, &errors); + + if (ret == 0) { + /* There may be errors even in the success case. */ + fnvlist_free(errors); return (0); + } - if (nvlist_next_nvpair(errors, NULL) == NULL) { + if (nvlist_empty(errors)) { /* no hold-specific errors */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot hold")); @@ -4263,10 +4271,6 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, case EEXIST: (void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf); break; - case ENOENT: - if (enoent_ok) - return (ENOENT); - /* FALLTHROUGH */ default: (void) zfs_standard_error(hdl, fnvpair_value_int32(elem), errbuf); @@ -4277,30 +4281,26 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, return (ret); } -struct releasearg { - nvlist_t *nvl; - const char *snapname; - const char *tag; - boolean_t recursive; -}; - static int zfs_release_one(zfs_handle_t *zhp, void *arg) { struct holdarg *ha = arg; - zfs_handle_t *szhp; char name[ZFS_MAXNAMELEN]; int rv = 0; + nvlist_t *existing_holds; (void) snprintf(name, sizeof (name), "%s@%s", zhp->zfs_name, ha->snapname); - szhp = make_dataset_handle(zhp->zfs_hdl, name); - if (szhp) { - nvlist_t *holds = fnvlist_alloc(); - fnvlist_add_boolean(holds, ha->tag); - fnvlist_add_nvlist(ha->nvl, name, holds); - zfs_close(szhp); + if (lzc_get_holds(name, &existing_holds) != 0) { + ha->error = ENOENT; + } else if (!nvlist_exists(existing_holds, ha->tag)) { + ha->error = ESRCH; + } else { + nvlist_t *torelease = fnvlist_alloc(); + fnvlist_add_boolean(torelease, ha->tag); + fnvlist_add_nvlist(ha->nvl, name, torelease); + fnvlist_free(torelease); } if (ha->recursive) @@ -4315,7 +4315,7 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, { int ret; struct holdarg ha; - nvlist_t *errors; + nvlist_t *errors = NULL; nvpair_t *elem; libzfs_handle_t *hdl = zhp->zfs_hdl; char errbuf[1024]; @@ -4324,26 +4324,34 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, ha.snapname = snapname; ha.tag = tag; ha.recursive = recursive; + ha.error = 0; (void) zfs_release_one(zfs_handle_dup(zhp), &ha); - if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) { + if (nvlist_empty(ha.nvl)) { fnvlist_free(ha.nvl); - ret = ENOENT; + ret = ha.error; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot release hold from snapshot '%s@%s'"), zhp->zfs_name, snapname); - (void) zfs_standard_error(hdl, ret, errbuf); + if (ret == ESRCH) { + (void) zfs_error(hdl, EZFS_REFTAG_RELE, errbuf); + } else { + (void) zfs_standard_error(hdl, ret, errbuf); + } return (ret); } ret = lzc_release(ha.nvl, &errors); fnvlist_free(ha.nvl); - if (ret == 0) + if (ret == 0) { + /* There may be errors even in the success case. */ + fnvlist_free(errors); return (0); + } - if (nvlist_next_nvpair(errors, NULL) == NULL) { + if (nvlist_empty(errors)) { /* no hold-specific errors */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot release")); diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c index 34cb648f6..d0eac1201 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c @@ -25,6 +25,7 @@ * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2012 Pawel Jakub Dawidek . * All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include @@ -802,6 +803,7 @@ typedef struct send_dump_data { int outfd; boolean_t err; nvlist_t *fss; + nvlist_t *snapholds; avl_tree_t *fsavl; snapfilter_cb_t *filter_cb; void *filter_cb_arg; @@ -952,41 +954,19 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj, return (0); } -static int -hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd) +static void +gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd) { - zfs_handle_t *pzhp; - int error = 0; - char *thissnap; - assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); - if (sdd->dryrun) - return (0); - /* - * zfs_send() only opens a cleanup_fd for sends that need it, + * zfs_send() only sets snapholds for sends that need them, * e.g. replication and doall. */ - if (sdd->cleanup_fd == -1) - return (0); - - thissnap = strchr(zhp->zfs_name, '@') + 1; - *(thissnap - 1) = '\0'; - pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET); - *(thissnap - 1) = '@'; - - /* - * It's OK if the parent no longer exists. The send code will - * handle that error. - */ - if (pzhp) { - error = zfs_hold(pzhp, thissnap, sdd->holdtag, - B_FALSE, B_TRUE, sdd->cleanup_fd); - zfs_close(pzhp); - } + if (sdd->snapholds == NULL) + return; - return (error); + fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag); } static void * @@ -1042,28 +1022,23 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) send_dump_data_t *sdd = arg; progress_arg_t pa = { 0 }; pthread_t tid; - char *thissnap; int err; boolean_t isfromsnap, istosnap, fromorigin; boolean_t exclude = B_FALSE; + err = 0; thissnap = strchr(zhp->zfs_name, '@') + 1; isfromsnap = (sdd->fromsnap != NULL && strcmp(sdd->fromsnap, thissnap) == 0); if (!sdd->seenfrom && isfromsnap) { - err = hold_for_send(zhp, sdd); - if (err == 0) { - sdd->seenfrom = B_TRUE; - (void) strcpy(sdd->prevsnap, thissnap); - sdd->prevsnap_obj = zfs_prop_get_int(zhp, - ZFS_PROP_OBJSETID); - } else if (err == ENOENT) { - err = 0; - } + gather_holds(zhp, sdd); + sdd->seenfrom = B_TRUE; + (void) strcpy(sdd->prevsnap, thissnap); + sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); zfs_close(zhp); - return (err); + return (0); } if (sdd->seento || !sdd->seenfrom) { @@ -1114,14 +1089,7 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) return (0); } - err = hold_for_send(zhp, sdd); - if (err) { - if (err == ENOENT) - err = 0; - zfs_close(zhp); - return (err); - } - + gather_holds(zhp, sdd); fromorigin = sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate); @@ -1389,7 +1357,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, avl_tree_t *fsavl = NULL; static uint64_t holdseq; int spa_version; - pthread_t tid; + pthread_t tid = 0; int pipefd[2]; dedup_arg_t dda = { 0 }; int featureflags = 0; @@ -1462,11 +1430,8 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, *debugnvp = hdrnv; else nvlist_free(hdrnv); - if (err) { - fsavl_destroy(fsavl); - nvlist_free(fss); + if (err) goto stderr_out; - } } if (!flags->dryrun) { @@ -1490,8 +1455,6 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, } free(packbuf); if (err == -1) { - fsavl_destroy(fsavl); - nvlist_free(fss); err = errno; goto stderr_out; } @@ -1502,8 +1465,6 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, drr.drr_u.drr_end.drr_checksum = zc; err = write(outfd, &drr, sizeof (drr)); if (err == -1) { - fsavl_destroy(fsavl); - nvlist_free(fss); err = errno; goto stderr_out; } @@ -1515,7 +1476,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, /* dump each stream */ sdd.fromsnap = fromsnap; sdd.tosnap = tosnap; - if (flags->dedup) + if (tid != 0) sdd.outfd = pipefd[0]; else sdd.outfd = outfd; @@ -1552,36 +1513,71 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, err = errno; goto stderr_out; } + sdd.snapholds = fnvlist_alloc(); } else { sdd.cleanup_fd = -1; + sdd.snapholds = NULL; } - if (flags->verbose) { + if (flags->verbose || sdd.snapholds != NULL) { /* * Do a verbose no-op dry run to get all the verbose output - * before generating any data. Then do a non-verbose real - * run to generate the streams. + * or to gather snapshot hold's before generating any data, + * then do a non-verbose real run to generate the streams. */ sdd.dryrun = B_TRUE; err = dump_filesystems(zhp, &sdd); - sdd.dryrun = flags->dryrun; - sdd.verbose = B_FALSE; - if (flags->parsable) { - (void) fprintf(stderr, "size\t%llu\n", - (longlong_t)sdd.size); - } else { - char buf[16]; - zfs_nicenum(sdd.size, buf, sizeof (buf)); - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, - "total estimated size is %s\n"), buf); + + if (err != 0) + goto stderr_out; + + if (flags->verbose) { + if (flags->parsable) { + (void) fprintf(stderr, "size\t%llu\n", + (longlong_t)sdd.size); + } else { + char buf[16]; + zfs_nicenum(sdd.size, buf, sizeof (buf)); + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "total estimated size is %s\n"), buf); + } + } + + /* Ensure no snaps found is treated as an error. */ + if (!sdd.seento) { + err = ENOENT; + goto err_out; } + + /* Skip the second run if dryrun was requested. */ + if (flags->dryrun) + goto err_out; + + if (sdd.snapholds != NULL) { + err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds); + if (err != 0) + goto stderr_out; + + fnvlist_free(sdd.snapholds); + sdd.snapholds = NULL; + } + + sdd.dryrun = B_FALSE; + sdd.verbose = B_FALSE; } + err = dump_filesystems(zhp, &sdd); fsavl_destroy(fsavl); nvlist_free(fss); - if (flags->dedup) { - (void) close(pipefd[0]); + /* Ensure no snaps found is treated as an error. */ + if (err == 0 && !sdd.seento) + err = ENOENT; + + if (tid != 0) { + if (err != 0) + (void) pthread_cancel(tid); (void) pthread_join(tid, NULL); + (void) close(pipefd[0]); } if (sdd.cleanup_fd != -1) { @@ -1609,9 +1605,13 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, stderr_out: err = zfs_standard_error(zhp->zfs_hdl, err, errbuf); err_out: + fsavl_destroy(fsavl); + nvlist_free(fss); + fnvlist_free(sdd.snapholds); + if (sdd.cleanup_fd != -1) VERIFY(0 == close(sdd.cleanup_fd)); - if (flags->dedup) { + if (tid != 0) { (void) pthread_cancel(tid); (void) pthread_join(tid, NULL); (void) close(pipefd[0]); diff --git a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c index ab23aa1ad..c5c3b0e23 100644 --- a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c +++ b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ /* @@ -333,7 +334,6 @@ lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) nvlist_free(args); return (error); - } int @@ -393,11 +393,17 @@ lzc_exists(const char *dataset) * uncleanly, the holds will be released when the pool is next opened * or imported. * - * The return value will be 0 if all holds were created. Otherwise the return - * value will be the errno of a (unspecified) hold that failed, no holds will - * be created, and the errlist will have an entry for each hold that - * failed (name = snapshot). The value in the errlist will be the error - * code (int32). + * Holds for snapshots which don't exist will be skipped and have an entry + * added to errlist, but will not cause an overall failure. + * + * The return value will be 0 if all holds, for snapshots that existed, + * were succesfully created. + * + * Otherwise the return value will be the errno of a (unspecified) hold that + * failed and no holds will be created. + * + * In all cases the errlist will have an entry for each hold that failed + * (name = snapshot), with its value being the error code (int32). */ int lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) @@ -434,11 +440,17 @@ lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) * The snapshots must all be in the same pool. * The value is a nvlist whose keys are the holds to remove. * - * The return value will be 0 if all holds were removed. - * Otherwise the return value will be the errno of a (unspecified) release - * that failed, no holds will be released, and the errlist will have an - * entry for each snapshot that has failed releases (name = snapshot). - * The value in the errlist will be the error code (int32) of a failed release. + * Holds which failed to release because they didn't exist will have an entry + * added to errlist, but will not cause an overall failure. + * + * The return value will be 0 if the nvl holds was empty or all holds that + * existed, were successfully removed. + * + * Otherwise the return value will be the errno of a (unspecified) hold that + * failed to release and no holds will be released. + * + * In all cases the errlist will have an entry for each hold that failed to + * to release. */ int lzc_release(nvlist_t *holds, nvlist_t **errlist) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c index efa55408f..f968215db 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include @@ -127,6 +128,7 @@ dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx) pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL); if (pair != NULL) return (fnvpair_value_int32(pair)); + return (0); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c index cc2631e5a..d7949e80c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include @@ -856,23 +857,34 @@ dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp) zap_cursor_t zc; objset_t *mos = dp->dp_meta_objset; uint64_t zapobj = dp->dp_tmp_userrefs_obj; + nvlist_t *holds; if (zapobj == 0) return; ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); + holds = fnvlist_alloc(); + for (zap_cursor_init(&zc, mos, zapobj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { char *htag; - uint64_t dsobj; + nvlist_t *tags; htag = strchr(za.za_name, '-'); *htag = '\0'; ++htag; - dsobj = strtonum(za.za_name, NULL); - dsl_dataset_user_release_tmp(dp, dsobj, htag); + if (nvlist_lookup_nvlist(holds, za.za_name, &tags) != 0) { + tags = fnvlist_alloc(); + fnvlist_add_boolean(tags, htag); + fnvlist_add_nvlist(holds, za.za_name, tags); + fnvlist_free(tags); + } else { + fnvlist_add_boolean(tags, htag); + } } + dsl_dataset_user_release_tmp(dp, holds); + fnvlist_free(holds); zap_cursor_fini(&zc); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c index 568bba33b..a948e10b1 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include @@ -37,6 +38,7 @@ typedef struct dsl_dataset_user_hold_arg { nvlist_t *dduha_holds; + nvlist_t *dduha_chkholds; nvlist_t *dduha_errlist; minor_t dduha_minor; } dsl_dataset_user_hold_arg_t; @@ -53,25 +55,24 @@ dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag, objset_t *mos = dp->dp_meta_objset; int error = 0; + ASSERT(dsl_pool_config_held(dp)); + if (strlen(htag) > MAXNAMELEN) - return (E2BIG); + return (SET_ERROR(E2BIG)); /* Tempholds have a more restricted length */ if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) - return (E2BIG); + return (SET_ERROR(E2BIG)); /* tags must be unique (if ds already exists) */ - if (ds != NULL) { - mutex_enter(&ds->ds_lock); - if (ds->ds_phys->ds_userrefs_obj != 0) { - uint64_t value; - error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, - htag, 8, 1, &value); - if (error == 0) - error = SET_ERROR(EEXIST); - else if (error == ENOENT) - error = 0; - } - mutex_exit(&ds->ds_lock); + if (ds != NULL && ds->ds_phys->ds_userrefs_obj != 0) { + uint64_t value; + + error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, + htag, 8, 1, &value); + if (error == 0) + error = SET_ERROR(EEXIST); + else if (error == ENOENT) + error = 0; } return (error); @@ -82,52 +83,63 @@ dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx) { dsl_dataset_user_hold_arg_t *dduha = arg; dsl_pool_t *dp = dmu_tx_pool(tx); - nvpair_t *pair; - int rv = 0; if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) return (SET_ERROR(ENOTSUP)); - for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL; - pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) { - int error = 0; + if (!dmu_tx_is_syncing(tx)) + return (0); + + for (nvpair_t *pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); + pair != NULL; pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) { dsl_dataset_t *ds; - char *htag; + int error = 0; + char *htag, *name; /* must be a snapshot */ - if (strchr(nvpair_name(pair), '@') == NULL) + name = nvpair_name(pair); + if (strchr(name, '@') == NULL) error = SET_ERROR(EINVAL); if (error == 0) error = nvpair_value_string(pair, &htag); - if (error == 0) { - error = dsl_dataset_hold(dp, - nvpair_name(pair), FTAG, &ds); - } + + if (error == 0) + error = dsl_dataset_hold(dp, name, FTAG, &ds); + if (error == 0) { error = dsl_dataset_user_hold_check_one(ds, htag, dduha->dduha_minor != 0, tx); dsl_dataset_rele(ds, FTAG); } - if (error != 0) { - rv = error; - fnvlist_add_int32(dduha->dduha_errlist, - nvpair_name(pair), error); + if (error == 0) { + fnvlist_add_string(dduha->dduha_chkholds, name, htag); + } else { + /* + * We register ENOENT errors so they can be correctly + * reported if needed, such as when all holds fail. + */ + fnvlist_add_int32(dduha->dduha_errlist, name, error); + if (error != ENOENT) + return (error); } } - return (rv); + + return (0); } -void -dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag, - minor_t minor, uint64_t now, dmu_tx_t *tx) + +static void +dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds, + const char *htag, minor_t minor, uint64_t now, dmu_tx_t *tx) { dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; uint64_t zapobj; - mutex_enter(&ds->ds_lock); + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); + if (ds->ds_phys->ds_userrefs_obj == 0) { /* * This is the first user hold for this dataset. Create @@ -140,14 +152,26 @@ dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag, zapobj = ds->ds_phys->ds_userrefs_obj; } ds->ds_userrefs++; - mutex_exit(&ds->ds_lock); VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx)); if (minor != 0) { + char name[MAXNAMELEN]; + nvlist_t *tags; + VERIFY0(dsl_pool_user_hold(dp, ds->ds_object, htag, now, tx)); - dsl_register_onexit_hold_cleanup(ds, htag, minor); + (void) snprintf(name, sizeof (name), "%llx", + (u_longlong_t)ds->ds_object); + + if (nvlist_lookup_nvlist(tmpholds, name, &tags) != 0) { + tags = fnvlist_alloc(); + fnvlist_add_boolean(tags, htag); + fnvlist_add_nvlist(tmpholds, name, tags); + fnvlist_free(tags); + } else { + fnvlist_add_boolean(tags, htag); + } } spa_history_log_internal_ds(ds, "hold", tx, @@ -155,140 +179,292 @@ dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag, htag, minor != 0, ds->ds_userrefs); } +typedef struct zfs_hold_cleanup_arg { + char zhca_spaname[MAXNAMELEN]; + uint64_t zhca_spa_load_guid; + nvlist_t *zhca_holds; +} zfs_hold_cleanup_arg_t; + +static void +dsl_dataset_user_release_onexit(void *arg) +{ + zfs_hold_cleanup_arg_t *ca = arg; + spa_t *spa; + int error; + + error = spa_open(ca->zhca_spaname, &spa, FTAG); + if (error != 0) { + zfs_dbgmsg("couldn't release holds on pool=%s " + "because pool is no longer loaded", + ca->zhca_spaname); + return; + } + if (spa_load_guid(spa) != ca->zhca_spa_load_guid) { + zfs_dbgmsg("couldn't release holds on pool=%s " + "because pool is no longer loaded (guid doesn't match)", + ca->zhca_spaname); + spa_close(spa, FTAG); + return; + } + + (void) dsl_dataset_user_release_tmp(spa_get_dsl(spa), ca->zhca_holds); + fnvlist_free(ca->zhca_holds); + kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t)); + spa_close(spa, FTAG); +} + +static void +dsl_onexit_hold_cleanup(spa_t *spa, nvlist_t *holds, minor_t minor) +{ + zfs_hold_cleanup_arg_t *ca; + + if (minor == 0 || nvlist_empty(holds)) { + fnvlist_free(holds); + return; + } + + ASSERT(spa != NULL); + ca = kmem_alloc(sizeof (*ca), KM_SLEEP); + + (void) strlcpy(ca->zhca_spaname, spa_name(spa), + sizeof (ca->zhca_spaname)); + ca->zhca_spa_load_guid = spa_load_guid(spa); + ca->zhca_holds = holds; + VERIFY0(zfs_onexit_add_cb(minor, + dsl_dataset_user_release_onexit, ca, NULL)); +} + +void +dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag, + minor_t minor, uint64_t now, dmu_tx_t *tx) +{ + nvlist_t *tmpholds; + + if (minor != 0) + tmpholds = fnvlist_alloc(); + else + tmpholds = NULL; + dsl_dataset_user_hold_sync_one_impl(tmpholds, ds, htag, minor, now, tx); + dsl_onexit_hold_cleanup(dsl_dataset_get_spa(ds), tmpholds, minor); +} + static void dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx) { dsl_dataset_user_hold_arg_t *dduha = arg; dsl_pool_t *dp = dmu_tx_pool(tx); - nvpair_t *pair; + nvlist_t *tmpholds; uint64_t now = gethrestime_sec(); - for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL; - pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) { + if (dduha->dduha_minor != 0) + tmpholds = fnvlist_alloc(); + else + tmpholds = NULL; + for (nvpair_t *pair = nvlist_next_nvpair(dduha->dduha_chkholds, NULL); + pair != NULL; + pair = nvlist_next_nvpair(dduha->dduha_chkholds, pair)) { dsl_dataset_t *ds; + VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds)); - dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair), - dduha->dduha_minor, now, tx); + dsl_dataset_user_hold_sync_one_impl(tmpholds, ds, + fnvpair_value_string(pair), dduha->dduha_minor, now, tx); dsl_dataset_rele(ds, FTAG); } + dsl_onexit_hold_cleanup(dp->dp_spa, tmpholds, dduha->dduha_minor); } /* + * The full semantics of this function are described in the comment above + * lzc_hold(). + * + * To summarize: * holds is nvl of snapname -> holdname * errlist will be filled in with snapname -> error - * if cleanup_minor is not 0, the holds will be temporary, cleaned up - * when the process exits. * - * if any fails, all will fail. + * The snaphosts must all be in the same pool. + * + * Holds for snapshots that don't exist will be skipped. + * + * If none of the snapshots for requested holds exist then ENOENT will be + * returned. + * + * If cleanup_minor is not 0, the holds will be temporary, which will be cleaned + * up when the process exits. + * + * On success all the holds, for snapshots that existed, will be created and 0 + * will be returned. + * + * On failure no holds will be created, the errlist will be filled in, + * and an errno will returned. + * + * In all cases the errlist will contain entries for holds where the snapshot + * didn't exist. */ int dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist) { dsl_dataset_user_hold_arg_t dduha; nvpair_t *pair; + int ret; pair = nvlist_next_nvpair(holds, NULL); if (pair == NULL) return (0); dduha.dduha_holds = holds; + dduha.dduha_chkholds = fnvlist_alloc(); dduha.dduha_errlist = errlist; dduha.dduha_minor = cleanup_minor; - return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check, - dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds))); + ret = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check, + dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)); + fnvlist_free(dduha.dduha_chkholds); + + return (ret); } +typedef int (dsl_holdfunc_t)(dsl_pool_t *dp, const char *name, void *tag, + dsl_dataset_t **dsp); + typedef struct dsl_dataset_user_release_arg { + dsl_holdfunc_t *ddura_holdfunc; nvlist_t *ddura_holds; nvlist_t *ddura_todelete; nvlist_t *ddura_errlist; + nvlist_t *ddura_chkholds; } dsl_dataset_user_release_arg_t; +/* Place a dataset hold on the snapshot identified by passed dsobj string */ static int -dsl_dataset_user_release_check_one(dsl_dataset_t *ds, - nvlist_t *holds, boolean_t *todelete) +dsl_dataset_hold_obj_string(dsl_pool_t *dp, const char *dsobj, void *tag, + dsl_dataset_t **dsp) { - uint64_t zapobj; - nvpair_t *pair; - objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - int error; - int numholds = 0; + return (dsl_dataset_hold_obj(dp, strtonum(dsobj, NULL), tag, dsp)); +} - *todelete = B_FALSE; +static int +dsl_dataset_user_release_check_one(dsl_dataset_user_release_arg_t *ddura, + dsl_dataset_t *ds, nvlist_t *holds, const char *snapname) +{ + uint64_t zapobj; + nvlist_t *holds_found; + objset_t *mos; + int numholds; if (!dsl_dataset_is_snapshot(ds)) return (SET_ERROR(EINVAL)); + if (nvlist_empty(holds)) + return (0); + + numholds = 0; + mos = ds->ds_dir->dd_pool->dp_meta_objset; zapobj = ds->ds_phys->ds_userrefs_obj; - if (zapobj == 0) - return (SET_ERROR(ESRCH)); + holds_found = fnvlist_alloc(); - for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; + for (nvpair_t *pair = nvlist_next_nvpair(holds, NULL); pair != NULL; pair = nvlist_next_nvpair(holds, pair)) { - /* Make sure the hold exists */ uint64_t tmp; - error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp); - if (error == ENOENT) - error = SET_ERROR(ESRCH); - if (error != 0) + int error; + const char *holdname = nvpair_name(pair); + + if (zapobj != 0) + error = zap_lookup(mos, zapobj, holdname, 8, 1, &tmp); + else + error = SET_ERROR(ENOENT); + + /* + * Non-existent holds are put on the errlist, but don't + * cause an overall failure. + */ + if (error == ENOENT) { + if (ddura->ddura_errlist != NULL) { + char *errtag = kmem_asprintf("%s#%s", + snapname, holdname); + fnvlist_add_int32(ddura->ddura_errlist, errtag, + ENOENT); + strfree(errtag); + } + continue; + } + + if (error != 0) { + fnvlist_free(holds_found); return (error); + } + + fnvlist_add_boolean(holds_found, holdname); numholds++; } if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 && ds->ds_userrefs == numholds) { /* we need to destroy the snapshot as well */ - - if (dsl_dataset_long_held(ds)) + if (dsl_dataset_long_held(ds)) { + fnvlist_free(holds_found); return (SET_ERROR(EBUSY)); - *todelete = B_TRUE; + } + fnvlist_add_boolean(ddura->ddura_todelete, snapname); + } + + if (numholds != 0) { + fnvlist_add_nvlist(ddura->ddura_chkholds, snapname, + holds_found); } + fnvlist_free(holds_found); + return (0); } static int dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx) { - dsl_dataset_user_release_arg_t *ddura = arg; - dsl_pool_t *dp = dmu_tx_pool(tx); - nvpair_t *pair; - int rv = 0; + dsl_dataset_user_release_arg_t *ddura; + dsl_holdfunc_t *holdfunc; + dsl_pool_t *dp; if (!dmu_tx_is_syncing(tx)) return (0); - for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL; - pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) { - const char *name = nvpair_name(pair); + dp = dmu_tx_pool(tx); + + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); + + ddura = arg; + holdfunc = ddura->ddura_holdfunc; + + for (nvpair_t *pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); + pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) { int error; dsl_dataset_t *ds; nvlist_t *holds; + const char *snapname = nvpair_name(pair); error = nvpair_value_nvlist(pair, &holds); if (error != 0) - return (SET_ERROR(EINVAL)); - - error = dsl_dataset_hold(dp, name, FTAG, &ds); + error = (SET_ERROR(EINVAL)); + else + error = holdfunc(dp, snapname, FTAG, &ds); if (error == 0) { - boolean_t deleteme; - error = dsl_dataset_user_release_check_one(ds, - holds, &deleteme); - if (error == 0 && deleteme) { - fnvlist_add_boolean(ddura->ddura_todelete, - name); - } + error = dsl_dataset_user_release_check_one(ddura, ds, + holds, snapname); dsl_dataset_rele(ds, FTAG); } if (error != 0) { if (ddura->ddura_errlist != NULL) { fnvlist_add_int32(ddura->ddura_errlist, - name, error); + snapname, error); } - rv = error; + /* + * Non-existent snapshots are put on the errlist, + * but don't cause an overall failure. + */ + if (error != ENOENT) + return (error); } } - return (rv); + + return (0); } static void @@ -297,22 +473,22 @@ dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds, { dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; - uint64_t zapobj; - int error; - nvpair_t *pair; - for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; + for (nvpair_t *pair = nvlist_next_nvpair(holds, NULL); pair != NULL; pair = nvlist_next_nvpair(holds, pair)) { - ds->ds_userrefs--; - error = dsl_pool_user_release(dp, ds->ds_object, - nvpair_name(pair), tx); + int error; + const char *holdname = nvpair_name(pair); + + /* Remove temporary hold if one exists. */ + error = dsl_pool_user_release(dp, ds->ds_object, holdname, tx); VERIFY(error == 0 || error == ENOENT); - zapobj = ds->ds_phys->ds_userrefs_obj; - VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx)); + + VERIFY0(zap_remove(mos, ds->ds_phys->ds_userrefs_obj, holdname, + tx)); + ds->ds_userrefs--; spa_history_log_internal_ds(ds, "release", tx, - "tag=%s refs=%lld", nvpair_name(pair), - (longlong_t)ds->ds_userrefs); + "tag=%s refs=%lld", holdname, (longlong_t)ds->ds_userrefs); } } @@ -320,18 +496,22 @@ static void dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx) { dsl_dataset_user_release_arg_t *ddura = arg; + dsl_holdfunc_t *holdfunc = ddura->ddura_holdfunc; dsl_pool_t *dp = dmu_tx_pool(tx); - nvpair_t *pair; - for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL; - pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) { + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); + + for (nvpair_t *pair = nvlist_next_nvpair(ddura->ddura_chkholds, NULL); + pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_chkholds, + pair)) { dsl_dataset_t *ds; + const char *name = nvpair_name(pair); + + VERIFY0(holdfunc(dp, name, FTAG, &ds)); - VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds)); dsl_dataset_user_release_sync_one(ds, fnvpair_value_nvlist(pair), tx); - if (nvlist_exists(ddura->ddura_todelete, - nvpair_name(pair))) { + if (nvlist_exists(ddura->ddura_todelete, name)) { ASSERT(ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 && DS_IS_DEFER_DESTROY(ds)); @@ -342,161 +522,108 @@ dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx) } /* + * The full semantics of this function are described in the comment above + * lzc_release(). + * + * To summarize: + * Releases holds specified in the nvl holds. + * * holds is nvl of snapname -> { holdname, ... } * errlist will be filled in with snapname -> error * - * if any fails, all will fail. + * If tmpdp is not NULL the names for holds should be the dsobj's of snapshots, + * otherwise they should be the names of shapshots. + * + * As a release may cause snapshots to be destroyed this trys to ensure they + * aren't mounted. + * + * The release of non-existent holds are skipped. + * + * At least one hold must have been released for the this function to succeed + * and return 0. */ -int -dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist) +static int +dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist, + dsl_pool_t *tmpdp) { dsl_dataset_user_release_arg_t ddura; nvpair_t *pair; + char *pool; int error; pair = nvlist_next_nvpair(holds, NULL); if (pair == NULL) return (0); + /* + * The release may cause snapshots to be destroyed; make sure they + * are not mounted. + */ + if (tmpdp != NULL) { + /* Temporary holds are specified by dsobj string. */ + ddura.ddura_holdfunc = dsl_dataset_hold_obj_string; + pool = spa_name(tmpdp->dp_spa); +#ifdef _KERNEL + dsl_pool_config_enter(tmpdp, FTAG); + for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(holds, pair)) { + dsl_dataset_t *ds; + + error = dsl_dataset_hold_obj_string(tmpdp, + nvpair_name(pair), FTAG, &ds); + if (error == 0) { + char name[MAXNAMELEN]; + dsl_dataset_name(ds, name); + dsl_dataset_rele(ds, FTAG); + (void) zfs_unmount_snap(name); + } + } + dsl_pool_config_exit(tmpdp, FTAG); +#endif + } else { + /* Non-temporary holds are specified by name. */ + ddura.ddura_holdfunc = dsl_dataset_hold; + pool = nvpair_name(pair); +#ifdef _KERNEL + for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(holds, pair)) { + (void) zfs_unmount_snap(nvpair_name(pair)); + } +#endif + } + ddura.ddura_holds = holds; ddura.ddura_errlist = errlist; ddura.ddura_todelete = fnvlist_alloc(); + ddura.ddura_chkholds = fnvlist_alloc(); - error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check, - dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds)); + error = dsl_sync_task(pool, dsl_dataset_user_release_check, + dsl_dataset_user_release_sync, &ddura, + fnvlist_num_pairs(holds)); fnvlist_free(ddura.ddura_todelete); - return (error); -} - -typedef struct dsl_dataset_user_release_tmp_arg { - uint64_t ddurta_dsobj; - nvlist_t *ddurta_holds; - boolean_t ddurta_deleteme; -} dsl_dataset_user_release_tmp_arg_t; - -static int -dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx) -{ - dsl_dataset_user_release_tmp_arg_t *ddurta = arg; - dsl_pool_t *dp = dmu_tx_pool(tx); - dsl_dataset_t *ds; - int error; - - if (!dmu_tx_is_syncing(tx)) - return (0); + fnvlist_free(ddura.ddura_chkholds); - error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds); - if (error) - return (error); - - error = dsl_dataset_user_release_check_one(ds, - ddurta->ddurta_holds, &ddurta->ddurta_deleteme); - dsl_dataset_rele(ds, FTAG); return (error); } -static void -dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx) -{ - dsl_dataset_user_release_tmp_arg_t *ddurta = arg; - dsl_pool_t *dp = dmu_tx_pool(tx); - dsl_dataset_t *ds; - - VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds)); - dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx); - if (ddurta->ddurta_deleteme) { - ASSERT(ds->ds_userrefs == 0 && - ds->ds_phys->ds_num_children == 1 && - DS_IS_DEFER_DESTROY(ds)); - dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx); - } - dsl_dataset_rele(ds, FTAG); -} - /* - * Called at spa_load time to release a stale temporary user hold. - * Also called by the onexit code. + * holds is nvl of snapname -> { holdname, ... } + * errlist will be filled in with snapname -> error */ -void -dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag) -{ - dsl_dataset_user_release_tmp_arg_t ddurta; - dsl_dataset_t *ds; - int error; - -#ifdef _KERNEL - /* Make sure it is not mounted. */ - dsl_pool_config_enter(dp, FTAG); - error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); - if (error == 0) { - char name[MAXNAMELEN]; - dsl_dataset_name(ds, name); - dsl_dataset_rele(ds, FTAG); - dsl_pool_config_exit(dp, FTAG); - (void) zfs_unmount_snap(name); - } else { - dsl_pool_config_exit(dp, FTAG); - } -#endif - - ddurta.ddurta_dsobj = dsobj; - ddurta.ddurta_holds = fnvlist_alloc(); - fnvlist_add_boolean(ddurta.ddurta_holds, htag); - - (void) dsl_sync_task(spa_name(dp->dp_spa), - dsl_dataset_user_release_tmp_check, - dsl_dataset_user_release_tmp_sync, &ddurta, 1); - fnvlist_free(ddurta.ddurta_holds); -} - -typedef struct zfs_hold_cleanup_arg { - char zhca_spaname[MAXNAMELEN]; - uint64_t zhca_spa_load_guid; - uint64_t zhca_dsobj; - char zhca_htag[MAXNAMELEN]; -} zfs_hold_cleanup_arg_t; - -static void -dsl_dataset_user_release_onexit(void *arg) +int +dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist) { - zfs_hold_cleanup_arg_t *ca = arg; - spa_t *spa; - int error; - - error = spa_open(ca->zhca_spaname, &spa, FTAG); - if (error != 0) { - zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s " - "because pool is no longer loaded", - ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag); - return; - } - if (spa_load_guid(spa) != ca->zhca_spa_load_guid) { - zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s " - "because pool is no longer loaded (guid doesn't match)", - ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag); - spa_close(spa, FTAG); - return; - } - - dsl_dataset_user_release_tmp(spa_get_dsl(spa), - ca->zhca_dsobj, ca->zhca_htag); - kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t)); - spa_close(spa, FTAG); + return (dsl_dataset_user_release_impl(holds, errlist, NULL)); } +/* + * holds is nvl of snapdsobj -> { holdname, ... } + */ void -dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, - minor_t minor) +dsl_dataset_user_release_tmp(struct dsl_pool *dp, nvlist_t *holds) { - zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP); - spa_t *spa = dsl_dataset_get_spa(ds); - (void) strlcpy(ca->zhca_spaname, spa_name(spa), - sizeof (ca->zhca_spaname)); - ca->zhca_spa_load_guid = spa_load_guid(spa); - ca->zhca_dsobj = ds->ds_object; - (void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag)); - VERIFY0(zfs_onexit_add_cb(minor, - dsl_dataset_user_release_onexit, ca, NULL)); + ASSERT(dp != NULL); + (void) dsl_dataset_user_release_impl(holds, NULL, dp); } int diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h index 6729f9f05..cbb6f8653 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h @@ -22,6 +22,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #ifndef _SYS_DSL_DATASET_H @@ -187,8 +188,6 @@ int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj, void dsl_dataset_disown(dsl_dataset_t *ds, void *tag); void dsl_dataset_name(dsl_dataset_t *ds, char *name); boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag); -void dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, - minor_t minor); uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *); uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_userhold.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_userhold.h index 56c6c8f47..071aeb86d 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_userhold.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_userhold.h @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #ifndef _SYS_DSL_USERHOLD_H @@ -43,8 +44,7 @@ int dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist); int dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist); int dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl); -void dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj, - const char *htag); +void dsl_dataset_user_release_tmp(struct dsl_pool *dp, nvlist_t *holds); int dsl_dataset_user_hold_check_one(struct dsl_dataset *ds, const char *htag, boolean_t temphold, struct dmu_tx *tx); void dsl_dataset_user_hold_sync_one(struct dsl_dataset *ds, const char *htag, diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c index 61a9e0f43..357b95491 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c @@ -28,6 +28,7 @@ * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ /* @@ -5078,20 +5079,6 @@ zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl) static int zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist) { - nvpair_t *pair; - int err; - - /* - * The release may cause the snapshot to be destroyed; make sure it - * is not mounted. - */ - for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; - pair = nvlist_next_nvpair(holds, pair)) { - err = zfs_unmount_snap(nvpair_name(pair)); - if (err != 0) - return (err); - } - return (dsl_dataset_user_release(holds, errlist)); } -- 2.45.0