From 2094ce50e11c2838cd73ab107971259af40195f3 Mon Sep 17 00:00:00 2001 From: mm Date: Fri, 19 Apr 2013 09:19:10 +0000 Subject: [PATCH] MFC 248571,248976,249004,249042,249188,249195-249196,249206,249207,249319, 249326,249356-249357 Merge libzfs_core and other ZFS bugfixes and improvements. MFC r248571: MFV 238590, 238592: In the first zfs ioctl restructuring phase, the libzfs_core library was introduced. It is a new thin library that wraps around kernel ioctl's. The idea is to provide a forward-compatible way of dealing with new features. Arguments are passed in nvlists and not random zfs_cmd fields, new-style ioctls are logged to pool history using a new method of history logging. http://blog.delphix.com/matt/2012/01/17/the-future-of-libzfs/ MFV 247580 [1]: To address issues of several deadlocks and race conditions the locking code around dsl_dataset was rewritten and the interface to synctasks was changed. User-Visible Changes: "zfs snapshot" can create more arbitrary snapshots at once (atomically) "zfs destroy" destroys multiple snapshots at once "zfs recv" has improved performance Backward Compatibility: I have extended the compatibility layer to support full backward compatibility by remapping or rewriting the responsible ioctl arguments. Old utilities are fully supported by the new kernel module. Forward Compatibility: New utilities work with old kernels with the following restrictions: - creating, destroying, holding and releasing of multiple snapshots at once is not supported, this includes recursive (-r) commands Illumos ZFS issues: 2882 implement libzfs_core 2900 "zfs snapshot" should be able to create multiple, arbitrary snapshots at once 3464 zfs synctask code needs restructuring MFC r248976: Call dmu_snapshot_list_next() in zvol.c with dsl_pool_config lock held MFC r249004: Do not check against uninitialized rc and comment out vendor code MFC r249042: Fix possible pool hold leak in dmu_send_impl() Illumos ZFS issues: 3645 dmu_send_impl: possibilty of pool hold leak MFC r249188: Import vendor change to reduce diff, no effect on FreeBSD. Illumos ZFS issues: 3517 importing pool with autoreplace=on and "hole" vdevs crashes syseventd MFC r249195: Merge change from vendor to reduce diff only. ZFS dtrace probes are not supported on FreeBSD yet. Illumos ZFS issues: 3598 want to dtrace when errors are generated in zfs MFC r249196: Provide a fix for kernel panic if receiving recursive deduplicated streams. Problem reported to vendor. Illumos ZFS issues: 3692 Panic on zfs receive of a recursive deduplicated stream MFC r249206: Merge vendor change - modify time processing in deadman thread. Illumos ZFS issues: 3618 ::zio dcmd does not show timestamp data MFC r249207: Allow zdb to output a histogram of compressed block sizes. Illumos ZFS issues: 3641 want a histogram of compressed block sizes MFC r249319: ZFS expects a copyout of zfs_cmd_t on an ioctl error. Our sys_ioctl() doesn't copyout in this case. To solve this a new struct zfs_iocparm_t is introduced consisting of: - zfs_ioctl_version (future backwards compatibility purposes) - user space pointer to zfs_cmd_t (copyin and copyout) - size of zfs_cmd_t (verification purposes) The copyin and copyout of zfs_cmd_t is now done the illumos (vendor) way what makes porting of new changes easier and ensures correct behavior if returning an error. MFC r249326: Cast (void *)(uintptr_t) on copyout and copyin of zfs_iocparm_t.zfs_cmd MFC r249356: Merge bugfixes accepted and integrated by vendor. Underlying problems have been reported by us and fixed in r240942 and r249196. Illumos ZFS issues: 3645 dmu_send_impl: possibilty of pool hold leak 3692 Panic on zfs receive of a recursive deduplicated stream MFC r249357: Fix libzfs to report error instead of returning zero if trying to hold or release a non-existing snapshot of a existing dataset. In recursive case error is reported if no snapshots with the requested name have been found. Illumos ZFS issues: 3699 zfs hold or release of a non-existent snapshot does not output error git-svn-id: svn://svn.freebsd.org/base/stable/9@249643 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- Makefile.inc1 | 3 + cddl/contrib/opensolaris/cmd/zdb/zdb.c | 133 +- cddl/contrib/opensolaris/cmd/zfs/zfs.8 | 20 +- cddl/contrib/opensolaris/cmd/zfs/zfs_main.c | 217 +- cddl/contrib/opensolaris/cmd/zhack/zhack.c | 34 +- .../opensolaris/cmd/zpool/zpool_main.c | 171 +- cddl/contrib/opensolaris/cmd/ztest/ztest.c | 217 +- .../opensolaris/lib/libzfs/common/libzfs.h | 31 +- .../lib/libzfs/common/libzfs_compat.c | 105 + .../lib/libzfs/common/libzfs_compat.h | 44 + .../lib/libzfs/common/libzfs_dataset.c | 560 ++- .../lib/libzfs/common/libzfs_impl.h | 67 +- .../lib/libzfs/common/libzfs_iter.c | 8 +- .../lib/libzfs/common/libzfs_pool.c | 64 +- .../lib/libzfs/common/libzfs_sendrecv.c | 25 +- .../lib/libzfs/common/libzfs_util.c | 26 +- .../lib/libzfs_core/common/libzfs_core.c | 618 +++ .../lib/libzfs_core/common/libzfs_core.h | 67 + .../libzfs_core/common/libzfs_core_compat.c | 189 + .../libzfs_core/common/libzfs_core_compat.h | 47 + .../opensolaris/lib/libzpool/common/kernel.c | 11 + .../lib/libzpool/common/sys/zfs_context.h | 66 +- .../opensolaris/lib/libzpool/common/zfs.d | 36 + cddl/lib/Makefile | 2 + cddl/lib/libzfs/Makefile | 7 +- cddl/lib/libzfs_core/Makefile | 37 + cddl/sbin/zfs/Makefile | 5 +- cddl/sbin/zpool/Makefile | 4 +- cddl/usr.bin/zinject/Makefile | 4 +- cddl/usr.bin/ztest/Makefile | 5 +- cddl/usr.sbin/zdb/Makefile | 4 +- cddl/usr.sbin/zhack/Makefile | 4 +- rescue/rescue/Makefile | 2 +- share/mk/bsd.libnames.mk | 1 + sys/cddl/compat/opensolaris/sys/cred.h | 1 + sys/cddl/compat/opensolaris/sys/sdt.h | 4 +- .../opensolaris/common/nvpair/fnvpair.c | 14 + .../opensolaris/common/zfs/zfs_comutil.c | 7 +- .../opensolaris/common/zfs/zfs_comutil.h | 4 +- .../opensolaris/common/zfs/zfs_ioctl_compat.c | 350 +- .../opensolaris/common/zfs/zfs_ioctl_compat.h | 64 +- .../opensolaris/common/zfs/zprop_common.c | 6 +- .../opensolaris/uts/common/Makefile.files | 4 +- .../opensolaris/uts/common/fs/zfs/arc.c | 26 +- .../opensolaris/uts/common/fs/zfs/bplist.c | 8 + .../opensolaris/uts/common/fs/zfs/bpobj.c | 4 + .../opensolaris/uts/common/fs/zfs/dbuf.c | 97 +- .../opensolaris/uts/common/fs/zfs/ddt.c | 8 +- .../opensolaris/uts/common/fs/zfs/dmu.c | 33 +- .../opensolaris/uts/common/fs/zfs/dmu_diff.c | 94 +- .../uts/common/fs/zfs/dmu_object.c | 5 +- .../uts/common/fs/zfs/dmu_objset.c | 856 ++-- .../opensolaris/uts/common/fs/zfs/dmu_send.c | 1038 ++-- .../uts/common/fs/zfs/dmu_traverse.c | 46 +- .../opensolaris/uts/common/fs/zfs/dmu_tx.c | 43 +- .../opensolaris/uts/common/fs/zfs/dnode.c | 34 +- .../uts/common/fs/zfs/dnode_sync.c | 1 + .../uts/common/fs/zfs/dsl_dataset.c | 4252 ++++++----------- .../opensolaris/uts/common/fs/zfs/dsl_deleg.c | 184 +- .../uts/common/fs/zfs/dsl_destroy.c | 926 ++++ .../opensolaris/uts/common/fs/zfs/dsl_dir.c | 646 ++- .../opensolaris/uts/common/fs/zfs/dsl_pool.c | 224 +- .../opensolaris/uts/common/fs/zfs/dsl_prop.c | 512 +- .../opensolaris/uts/common/fs/zfs/dsl_scan.c | 130 +- .../uts/common/fs/zfs/dsl_synctask.c | 243 +- .../uts/common/fs/zfs/dsl_userhold.c | 536 +++ .../opensolaris/uts/common/fs/zfs/metaslab.c | 50 +- .../opensolaris/uts/common/fs/zfs/refcount.c | 21 +- .../opensolaris/uts/common/fs/zfs/rrwlock.c | 54 +- .../opensolaris/uts/common/fs/zfs/sa.c | 22 +- .../opensolaris/uts/common/fs/zfs/spa.c | 260 +- .../uts/common/fs/zfs/spa_config.c | 10 +- .../uts/common/fs/zfs/spa_errlog.c | 9 +- .../uts/common/fs/zfs/spa_history.c | 236 +- .../opensolaris/uts/common/fs/zfs/spa_misc.c | 27 +- .../opensolaris/uts/common/fs/zfs/space_map.c | 50 +- .../opensolaris/uts/common/fs/zfs/sys/arc.h | 2 +- .../opensolaris/uts/common/fs/zfs/sys/dbuf.h | 9 +- .../opensolaris/uts/common/fs/zfs/sys/dmu.h | 71 +- .../uts/common/fs/zfs/sys/dmu_objset.h | 22 +- .../uts/common/fs/zfs/sys/dmu_send.h | 77 + .../uts/common/fs/zfs/sys/dmu_tx.h | 6 +- .../uts/common/fs/zfs/sys/dsl_dataset.h | 121 +- .../uts/common/fs/zfs/sys/dsl_deleg.h | 5 +- .../uts/common/fs/zfs/sys/dsl_destroy.h | 52 + .../uts/common/fs/zfs/sys/dsl_dir.h | 19 +- .../uts/common/fs/zfs/sys/dsl_pool.h | 16 +- .../uts/common/fs/zfs/sys/dsl_prop.h | 56 +- .../uts/common/fs/zfs/sys/dsl_synctask.h | 46 +- .../uts/common/fs/zfs/sys/dsl_userhold.h | 57 + .../uts/common/fs/zfs/sys/metaslab.h | 3 +- .../uts/common/fs/zfs/sys/refcount.h | 5 +- .../uts/common/fs/zfs/sys/rrwlock.h | 13 +- .../opensolaris/uts/common/fs/zfs/sys/spa.h | 32 +- .../uts/common/fs/zfs/sys/space_map.h | 2 + .../opensolaris/uts/common/fs/zfs/sys/txg.h | 9 +- .../uts/common/fs/zfs/sys/vdev_impl.h | 3 +- .../uts/common/fs/zfs/sys/zfeature.h | 17 +- .../uts/common/fs/zfs/sys/zfs_debug.h | 12 +- .../uts/common/fs/zfs/sys/zfs_ioctl.h | 35 +- .../uts/common/fs/zfs/sys/zfs_znode.h | 3 +- .../opensolaris/uts/common/fs/zfs/sys/zil.h | 4 +- .../opensolaris/uts/common/fs/zfs/sys/zio.h | 2 +- .../opensolaris/uts/common/fs/zfs/txg.c | 37 +- .../opensolaris/uts/common/fs/zfs/uberblock.c | 3 +- .../opensolaris/uts/common/fs/zfs/vdev.c | 58 +- .../uts/common/fs/zfs/vdev_cache.c | 13 +- .../opensolaris/uts/common/fs/zfs/vdev_disk.c | 54 +- .../opensolaris/uts/common/fs/zfs/vdev_file.c | 16 +- .../uts/common/fs/zfs/vdev_label.c | 8 +- .../uts/common/fs/zfs/vdev_mirror.c | 10 +- .../uts/common/fs/zfs/vdev_missing.c | 4 +- .../uts/common/fs/zfs/vdev_queue.c | 12 +- .../uts/common/fs/zfs/vdev_raidz.c | 14 +- .../opensolaris/uts/common/fs/zfs/vdev_root.c | 4 +- .../opensolaris/uts/common/fs/zfs/zap.c | 18 +- .../opensolaris/uts/common/fs/zfs/zap_leaf.c | 13 +- .../opensolaris/uts/common/fs/zfs/zap_micro.c | 45 +- .../opensolaris/uts/common/fs/zfs/zfeature.c | 14 +- .../opensolaris/uts/common/fs/zfs/zfs_acl.c | 37 +- .../uts/common/fs/zfs/zfs_ctldir.c | 109 +- .../opensolaris/uts/common/fs/zfs/zfs_dir.c | 35 +- .../opensolaris/uts/common/fs/zfs/zfs_ioctl.c | 2939 +++++++----- .../uts/common/fs/zfs/zfs_onexit.c | 9 +- .../uts/common/fs/zfs/zfs_replay.c | 9 +- .../uts/common/fs/zfs/zfs_vfsops.c | 179 +- .../opensolaris/uts/common/fs/zfs/zfs_vnops.c | 193 +- .../opensolaris/uts/common/fs/zfs/zfs_znode.c | 20 +- .../opensolaris/uts/common/fs/zfs/zil.c | 147 +- .../opensolaris/uts/common/fs/zfs/zio.c | 20 +- .../uts/common/fs/zfs/zio_checksum.c | 9 +- .../uts/common/fs/zfs/zio_compress.c | 6 +- .../uts/common/fs/zfs/zio_inject.c | 10 +- .../opensolaris/uts/common/fs/zfs/zvol.c | 173 +- .../uts/common/sys/feature_tests.h | 4 +- .../opensolaris/uts/common/sys/fs/zfs.h | 208 +- .../opensolaris/uts/common/sys/nvpair.h | 1 + 137 files changed, 11049 insertions(+), 8054 deletions(-) create mode 100644 cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.c create mode 100644 cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.h create mode 100644 cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c create mode 100644 cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h create mode 100644 cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.c create mode 100644 cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.h create mode 100644 cddl/contrib/opensolaris/lib/libzpool/common/zfs.d create mode 100644 cddl/lib/libzfs_core/Makefile create mode 100644 sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c create mode 100644 sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c create mode 100644 sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h create mode 100644 sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_destroy.h create mode 100644 sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_userhold.h diff --git a/Makefile.inc1 b/Makefile.inc1 index 4efa7d4c6..f5ea58d7d 100644 --- a/Makefile.inc1 +++ b/Makefile.inc1 @@ -1321,6 +1321,7 @@ _prebuild_libs= ${_kerberos5_lib_libasn1} ${_kerberos5_lib_libhdb} \ lib/libopie lib/libpam ${_lib_libthr} \ lib/libradius lib/libsbuf lib/libtacplus \ ${_cddl_lib_libumem} ${_cddl_lib_libnvpair} \ + ${_cddl_lib_libzfs_core} \ lib/libutil ${_lib_libypclnt} lib/libz lib/msun \ ${_secure_lib_libcrypto} ${_secure_lib_libssh} \ ${_secure_lib_libssl} @@ -1340,7 +1341,9 @@ lib/libopie__L lib/libtacplus__L: lib/libmd__L .if ${MK_CDDL} != "no" _cddl_lib_libumem= cddl/lib/libumem _cddl_lib_libnvpair= cddl/lib/libnvpair +_cddl_lib_libzfs_core= cddl/lib/libzfs_core _cddl_lib= cddl/lib +cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L .endif .if ${MK_CRYPT} != "no" diff --git a/cddl/contrib/opensolaris/cmd/zdb/zdb.c b/cddl/contrib/opensolaris/cmd/zdb/zdb.c index dd36813d7..c265c99b4 100644 --- a/cddl/contrib/opensolaris/cmd/zdb/zdb.c +++ b/cddl/contrib/opensolaris/cmd/zdb/zdb.c @@ -21,10 +21,11 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include +#include #include #include #include @@ -57,6 +58,7 @@ #include #include #include +#include #undef ZFS_MAXNAMELEN #undef verify #include @@ -206,6 +208,27 @@ dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size) nvlist_free(nv); } +/* ARGSUSED */ +static void +dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size) +{ + spa_history_phys_t *shp = data; + + if (shp == NULL) + return; + + (void) printf("\t\tpool_create_len = %llu\n", + (u_longlong_t)shp->sh_pool_create_len); + (void) printf("\t\tphys_max_off = %llu\n", + (u_longlong_t)shp->sh_phys_max_off); + (void) printf("\t\tbof = %llu\n", + (u_longlong_t)shp->sh_bof); + (void) printf("\t\teof = %llu\n", + (u_longlong_t)shp->sh_eof); + (void) printf("\t\trecords_lost = %llu\n", + (u_longlong_t)shp->sh_records_lost); +} + static void zdb_nicenum(uint64_t num, char *buf) { @@ -215,18 +238,18 @@ zdb_nicenum(uint64_t num, char *buf) nicenum(num, buf); } -const char dump_zap_stars[] = "****************************************"; -const int dump_zap_width = sizeof (dump_zap_stars) - 1; +const char histo_stars[] = "****************************************"; +const int histo_width = sizeof (histo_stars) - 1; static void -dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE]) +dump_histogram(const uint64_t *histo, int size) { int i; - int minidx = ZAP_HISTOGRAM_SIZE - 1; + int minidx = size - 1; int maxidx = 0; uint64_t max = 0; - for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) { + for (i = 0; i < size; i++) { if (histo[i] > max) max = histo[i]; if (histo[i] > 0 && i > maxidx) @@ -235,12 +258,14 @@ dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE]) minidx = i; } - if (max < dump_zap_width) - max = dump_zap_width; + if (max < histo_width) + max = histo_width; - for (i = minidx; i <= maxidx; i++) - (void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i], - &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]); + for (i = minidx; i <= maxidx; i++) { + (void) printf("\t\t\t%3u: %6llu %s\n", + i, (u_longlong_t)histo[i], + &histo_stars[(max - histo[i]) * histo_width / max]); + } } static void @@ -291,19 +316,19 @@ dump_zap_stats(objset_t *os, uint64_t object) (u_longlong_t)zs.zs_salt); (void) printf("\t\tLeafs with 2^n pointers:\n"); - dump_zap_histogram(zs.zs_leafs_with_2n_pointers); + dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE); (void) printf("\t\tBlocks with n*5 entries:\n"); - dump_zap_histogram(zs.zs_blocks_with_n5_entries); + dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE); (void) printf("\t\tBlocks n/10 full:\n"); - dump_zap_histogram(zs.zs_blocks_n_tenths_full); + dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE); (void) printf("\t\tEntries with n chunks:\n"); - dump_zap_histogram(zs.zs_entries_using_n_chunks); + dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE); (void) printf("\t\tBuckets with n entries:\n"); - dump_zap_histogram(zs.zs_buckets_with_n_entries); + dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE); } /*ARGSUSED*/ @@ -857,21 +882,22 @@ dump_history(spa_t *spa) for (int i = 0; i < num; i++) { uint64_t time, txg, ievent; char *cmd, *intstr; + boolean_t printed = B_FALSE; if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME, &time) != 0) - continue; + goto next; if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD, &cmd) != 0) { if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_INT_EVENT, &ievent) != 0) - continue; + goto next; verify(nvlist_lookup_uint64(events[i], ZPOOL_HIST_TXG, &txg) == 0); verify(nvlist_lookup_string(events[i], ZPOOL_HIST_INT_STR, &intstr) == 0); - if (ievent >= LOG_END) - continue; + if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) + goto next; (void) snprintf(internalstr, sizeof (internalstr), @@ -884,6 +910,14 @@ dump_history(spa_t *spa) (void) localtime_r(&tsec, &t); (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); (void) printf("%s %s\n", tbuf, cmd); + printed = B_TRUE; + +next: + if (dump_opt['h'] > 1) { + if (!printed) + (void) printf("unrecognized record:\n"); + dump_nvlist(events[i], 2); + } } } @@ -916,7 +950,7 @@ sprintf_blkptr_compact(char *blkbuf, const blkptr_t *bp) const dva_t *dva = bp->blk_dva; int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1; - if (dump_opt['b'] >= 5) { + if (dump_opt['b'] >= 6) { sprintf_blkptr(blkbuf, bp); return; } @@ -1496,7 +1530,7 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = { dump_zap, /* other ZAP */ dump_zap, /* persistent error log */ dump_uint8, /* SPA history */ - dump_uint64, /* SPA history offsets */ + dump_history_offsets, /* SPA history offsets */ dump_zap, /* Pool properties */ dump_zap, /* DSL permissions */ dump_acl, /* ZFS ACL */ @@ -1661,7 +1695,9 @@ dump_dir(objset_t *os) int print_header = 1; int i, error; + dsl_pool_config_enter(dmu_objset_pool(os), FTAG); dmu_objset_fast_stat(os, &dds); + dsl_pool_config_exit(dmu_objset_pool(os), FTAG); if (dds.dds_type < DMU_OST_NUMTYPES) type = objset_types[dds.dds_type]; @@ -1953,11 +1989,13 @@ dump_one_dir(const char *dsname, void *arg) /* * Block statistics. */ +#define PSIZE_HISTO_SIZE (SPA_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1) typedef struct zdb_blkstats { - uint64_t zb_asize; - uint64_t zb_lsize; - uint64_t zb_psize; - uint64_t zb_count; + uint64_t zb_asize; + uint64_t zb_lsize; + uint64_t zb_psize; + uint64_t zb_count; + uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE]; } zdb_blkstats_t; /* @@ -1981,6 +2019,9 @@ typedef struct zdb_cb { zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1]; uint64_t zcb_dedup_asize; uint64_t zcb_dedup_blocks; + uint64_t zcb_start; + uint64_t zcb_lastprint; + uint64_t zcb_totalasize; uint64_t zcb_errors[256]; int zcb_readfails; int zcb_haderrors; @@ -2007,6 +2048,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, zb->zb_lsize += BP_GET_LSIZE(bp); zb->zb_psize += BP_GET_PSIZE(bp); zb->zb_count++; + zb->zb_psize_histogram[BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT]++; } if (dump_opt['L']) @@ -2070,7 +2112,6 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb)); free(data); - if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) { zcb->zcb_haderrors = 1; zcb->zcb_errors[ioerr]++; @@ -2094,7 +2135,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, zcb->zcb_readfails = 0; - if (dump_opt['b'] >= 4) { + if (dump_opt['b'] >= 5) { sprintf_blkptr(blkbuf, bp); (void) printf("objset %llu object %llu " "level %lld offset 0x%llx %s\n", @@ -2105,6 +2146,28 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, blkbuf); } + if (dump_opt['b'] < 5 && isatty(STDERR_FILENO) && + gethrtime() > zcb->zcb_lastprint + NANOSEC) { + uint64_t now = gethrtime(); + char buf[10]; + uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize; + int kb_per_sec = + 1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000)); + int sec_remaining = + (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec; + + zfs_nicenum(bytes, buf, sizeof (buf)); + (void) fprintf(stderr, + "\r%5s completed (%4dMB/s) " + "estimated time remaining: %uhr %02umin %02usec ", + buf, kb_per_sec / 1024, + sec_remaining / 60 / 60, + sec_remaining / 60 % 60, + sec_remaining % 60); + + zcb->zcb_lastprint = now; + } + return (0); } @@ -2236,7 +2299,7 @@ count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { zdb_cb_t *zcb = arg; - if (dump_opt['b'] >= 4) { + if (dump_opt['b'] >= 5) { char blkbuf[BP_SPRINTF_LEN]; sprintf_blkptr(blkbuf, bp); (void) printf("[%s] %s\n", @@ -2255,7 +2318,7 @@ dump_block_stats(spa_t *spa) int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD; int leaks = 0; - (void) printf("\nTraversing all blocks %s%s%s%s%s...\n", + (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n", (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "", (dump_opt['c'] == 1) ? "metadata " : "", dump_opt['c'] ? "checksums " : "", @@ -2291,6 +2354,8 @@ dump_block_stats(spa_t *spa) if (dump_opt['c'] > 1) flags |= TRAVERSE_PREFETCH_DATA; + zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa)); + zcb.zcb_start = zcb.zcb_lastprint = gethrtime(); zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb); if (zcb.zcb_haderrors) { @@ -2418,6 +2483,14 @@ dump_block_stats(spa_t *spa) else (void) printf(" L%d %s\n", level, typename); + + if (dump_opt['b'] >= 4) { + (void) printf("psize " + "(in 512-byte sectors): " + "number of blocks\n"); + dump_histogram(zb->zb_psize_histogram, + PSIZE_HISTO_SIZE); + } } } } diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs.8 b/cddl/contrib/opensolaris/cmd/zfs/zfs.8 index 57fa27298..92619a9b7 100644 --- a/cddl/contrib/opensolaris/cmd/zfs/zfs.8 +++ b/cddl/contrib/opensolaris/cmd/zfs/zfs.8 @@ -28,7 +28,7 @@ .\" .\" $FreeBSD$ .\" -.Dd March 1, 2013 +.Dd March 21, 2013 .Dt ZFS 8 .Os .Sh NAME @@ -65,6 +65,7 @@ .Op Fl r .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... .Ar filesystem@snapname Ns | Ns Ar volume@snapname +.Ar filesystem@snapname Ns | Ns Ar volume@snapname Ns ... .Nm .Cm rollback .Op Fl rRf @@ -1609,7 +1610,11 @@ multiple snapshots. Destroy (or mark for deferred deletion) all snapshots with this name in descendent file systems. .It Fl R -Recursively destroy all dependents. +Recursively destroy all clones of these snapshots, including the clones, +snapshots, and children. +If this flag is specified, the +.Op fl d +flag will have no effect. .It Fl n Do a dry-run ("No-op") deletion. No data will be deleted. This is useful in conjunction with the @@ -1637,17 +1642,18 @@ behavior for mounted file systems in use. .Op Fl r .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... .Ar filesystem@snapname Ns | Ns volume@snapname +.Ar filesystem@snapname Ns | Ns volume@snapname Ns ... .Xc .Pp -Creates a snapshot with the given name. All previous modifications by -successful system calls to the file system are part of the snapshot. See the +Creates snapshots with the given names. All previous modifications by +successful system calls to the file system are part of the snapshots. +Snapshots are taken atomically, so that all snapshots correspond to the same +moment in time. See the .Qq Sx Snapshots section for details. .Bl -tag -width indent .It Fl r -Recursively create snapshots of all descendent datasets. Snapshots are taken -atomically, so that all recursive snapshots correspond to the same moment in -time. +Recursively create snapshots of all descendent datasets .It Fl o Ar property Ns = Ns Ar value Sets the specified property; see .Qq Nm Cm create diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c index 6b8c13214..97ae0bdd5 100644 --- a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c +++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c @@ -58,6 +58,7 @@ #include #include +#include #include #include #include @@ -74,6 +75,7 @@ libzfs_handle_t *g_zfs; static FILE *mnttab_file; static char history_str[HIS_MAX_RECORD_LEN]; +static boolean_t log_history = B_TRUE; static int zfs_do_clone(int argc, char **argv); static int zfs_do_create(int argc, char **argv); @@ -276,7 +278,7 @@ get_usage(zfs_help_t idx) return (gettext("\tshare <-a | filesystem>\n")); case HELP_SNAPSHOT: return (gettext("\tsnapshot [-r] [-o property=value] ... " - "\n")); + " ...\n")); case HELP_UNMOUNT: return (gettext("\tunmount [-f] " "<-a | filesystem|mountpoint>\n")); @@ -903,11 +905,12 @@ typedef struct destroy_cbdata { boolean_t cb_parsable; boolean_t cb_dryrun; nvlist_t *cb_nvl; + nvlist_t *cb_batchedsnaps; /* first snap in contiguous run */ - zfs_handle_t *cb_firstsnap; + char *cb_firstsnap; /* previous snap in contiguous run */ - zfs_handle_t *cb_prevsnap; + char *cb_prevsnap; int64_t cb_snapused; char *cb_snapspec; } destroy_cbdata_t; @@ -999,9 +1002,27 @@ destroy_callback(zfs_handle_t *zhp, void *data) zfs_close(zhp); return (0); } + if (cb->cb_dryrun) { + zfs_close(zhp); + return (0); + } - if (!cb->cb_dryrun) { - if (zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 || + /* + * We batch up all contiguous snapshots (even of different + * filesystems) and destroy them with one ioctl. We can't + * simply do all snap deletions and then all fs deletions, + * because we must delete a clone before its origin. + */ + if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT) { + fnvlist_add_boolean(cb->cb_batchedsnaps, name); + } else { + int error = zfs_destroy_snaps_nvl(g_zfs, + cb->cb_batchedsnaps, B_FALSE); + fnvlist_free(cb->cb_batchedsnaps); + cb->cb_batchedsnaps = fnvlist_alloc(); + + if (error != 0 || + zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 || zfs_destroy(zhp, cb->cb_defer_destroy) != 0) { zfs_close(zhp); return (-1); @@ -1021,11 +1042,13 @@ destroy_print_cb(zfs_handle_t *zhp, void *arg) if (nvlist_exists(cb->cb_nvl, name)) { if (cb->cb_firstsnap == NULL) - cb->cb_firstsnap = zfs_handle_dup(zhp); + cb->cb_firstsnap = strdup(name); if (cb->cb_prevsnap != NULL) - zfs_close(cb->cb_prevsnap); + free(cb->cb_prevsnap); /* this snap continues the current range */ - cb->cb_prevsnap = zfs_handle_dup(zhp); + cb->cb_prevsnap = strdup(name); + if (cb->cb_firstsnap == NULL || cb->cb_prevsnap == NULL) + nomem(); if (cb->cb_verbose) { if (cb->cb_parsable) { (void) printf("destroy\t%s\n", name); @@ -1040,12 +1063,12 @@ destroy_print_cb(zfs_handle_t *zhp, void *arg) } else if (cb->cb_firstsnap != NULL) { /* end of this range */ uint64_t used = 0; - err = zfs_get_snapused_int(cb->cb_firstsnap, + err = lzc_snaprange_space(cb->cb_firstsnap, cb->cb_prevsnap, &used); cb->cb_snapused += used; - zfs_close(cb->cb_firstsnap); + free(cb->cb_firstsnap); cb->cb_firstsnap = NULL; - zfs_close(cb->cb_prevsnap); + free(cb->cb_prevsnap); cb->cb_prevsnap = NULL; } zfs_close(zhp); @@ -1062,13 +1085,13 @@ destroy_print_snapshots(zfs_handle_t *fs_zhp, destroy_cbdata_t *cb) if (cb->cb_firstsnap != NULL) { uint64_t used = 0; if (err == 0) { - err = zfs_get_snapused_int(cb->cb_firstsnap, + err = lzc_snaprange_space(cb->cb_firstsnap, cb->cb_prevsnap, &used); } cb->cb_snapused += used; - zfs_close(cb->cb_firstsnap); + free(cb->cb_firstsnap); cb->cb_firstsnap = NULL; - zfs_close(cb->cb_prevsnap); + free(cb->cb_prevsnap); cb->cb_prevsnap = NULL; } return (err); @@ -1155,8 +1178,10 @@ static int zfs_do_destroy(int argc, char **argv) { destroy_cbdata_t cb = { 0 }; + int rv = 0; + int err = 0; int c; - zfs_handle_t *zhp; + zfs_handle_t *zhp = NULL; char *at; zfs_type_t type = ZFS_TYPE_DATASET; @@ -1210,11 +1235,9 @@ zfs_do_destroy(int argc, char **argv) at = strchr(argv[0], '@'); if (at != NULL) { - int err = 0; /* Build the list of snaps to destroy in cb_nvl. */ - if (nvlist_alloc(&cb.cb_nvl, NV_UNIQUE_NAME, 0) != 0) - nomem(); + cb.cb_nvl = fnvlist_alloc(); *at = '\0'; zhp = zfs_open(g_zfs, argv[0], @@ -1225,17 +1248,15 @@ zfs_do_destroy(int argc, char **argv) cb.cb_snapspec = at + 1; if (gather_snapshots(zfs_handle_dup(zhp), &cb) != 0 || cb.cb_error) { - zfs_close(zhp); - nvlist_free(cb.cb_nvl); - return (1); + rv = 1; + goto out; } if (nvlist_empty(cb.cb_nvl)) { (void) fprintf(stderr, gettext("could not find any " "snapshots to destroy; check snapshot names.\n")); - zfs_close(zhp); - nvlist_free(cb.cb_nvl); - return (1); + rv = 1; + goto out; } if (cb.cb_verbose) { @@ -1254,18 +1275,26 @@ zfs_do_destroy(int argc, char **argv) } if (!cb.cb_dryrun) { - if (cb.cb_doclones) + if (cb.cb_doclones) { + cb.cb_batchedsnaps = fnvlist_alloc(); err = destroy_clones(&cb); + if (err == 0) { + err = zfs_destroy_snaps_nvl(g_zfs, + cb.cb_batchedsnaps, B_FALSE); + } + if (err != 0) { + rv = 1; + goto out; + } + } if (err == 0) { - err = zfs_destroy_snaps_nvl(zhp, cb.cb_nvl, + err = zfs_destroy_snaps_nvl(g_zfs, cb.cb_nvl, cb.cb_defer_destroy); } } - zfs_close(zhp); - nvlist_free(cb.cb_nvl); if (err != 0) - return (1); + rv = 1; } else { /* Open the given dataset */ if ((zhp = zfs_open(g_zfs, argv[0], type)) == NULL) @@ -1286,8 +1315,8 @@ zfs_do_destroy(int argc, char **argv) zfs_get_name(zhp)); (void) fprintf(stderr, gettext("use 'zpool destroy %s' " "to destroy the pool itself\n"), zfs_get_name(zhp)); - zfs_close(zhp); - return (1); + rv = 1; + goto out; } /* @@ -1297,30 +1326,42 @@ zfs_do_destroy(int argc, char **argv) if (!cb.cb_doclones && zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent, &cb) != 0) { - zfs_close(zhp); - return (1); + rv = 1; + goto out; } if (cb.cb_error) { - zfs_close(zhp); - return (1); + rv = 1; + goto out; } + cb.cb_batchedsnaps = fnvlist_alloc(); if (zfs_iter_dependents(zhp, B_FALSE, destroy_callback, &cb) != 0) { - zfs_close(zhp); - return (1); + rv = 1; + goto out; } /* * Do the real thing. The callback will close the * handle regardless of whether it succeeds or not. */ - if (destroy_callback(zhp, &cb) != 0) - return (1); + err = destroy_callback(zhp, &cb); + zhp = NULL; + if (err == 0) { + err = zfs_destroy_snaps_nvl(g_zfs, + cb.cb_batchedsnaps, cb.cb_defer_destroy); + } + if (err != 0) + rv = 1; } - return (0); +out: + fnvlist_free(cb.cb_batchedsnaps); + fnvlist_free(cb.cb_nvl); + if (zhp != NULL) + zfs_close(zhp); + return (rv); } static boolean_t @@ -1921,9 +1962,11 @@ upgrade_set_callback(zfs_handle_t *zhp, void *data) /* * If they did "zfs upgrade -a", then we could * be doing ioctls to different pools. We need - * to log this history once to each pool. + * to log this history once to each pool, and bypass + * the normal history logging that happens in main(). */ - verify(zpool_stage_history(g_zfs, history_str) == 0); + (void) zpool_log_history(g_zfs, history_str); + log_history = B_FALSE; } if (zfs_prop_set(zhp, "version", verstr) == 0) cb->cb_numupgraded++; @@ -3461,6 +3504,32 @@ zfs_do_set(int argc, char **argv) return (ret); } +typedef struct snap_cbdata { + nvlist_t *sd_nvl; + boolean_t sd_recursive; + const char *sd_snapname; +} snap_cbdata_t; + +static int +zfs_snapshot_cb(zfs_handle_t *zhp, void *arg) +{ + snap_cbdata_t *sd = arg; + char *name; + int rv = 0; + int error; + + error = asprintf(&name, "%s@%s", zfs_get_name(zhp), sd->sd_snapname); + if (error == -1) + nomem(); + fnvlist_add_boolean(sd->sd_nvl, name); + free(name); + + if (sd->sd_recursive) + rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd); + zfs_close(zhp); + return (rv); +} + /* * zfs snapshot [-r] [-o prop=value] ... * @@ -3470,13 +3539,16 @@ zfs_do_set(int argc, char **argv) static int zfs_do_snapshot(int argc, char **argv) { - boolean_t recursive = B_FALSE; int ret = 0; char c; nvlist_t *props; + snap_cbdata_t sd = { 0 }; + boolean_t multiple_snaps = B_FALSE; if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) nomem(); + if (nvlist_alloc(&sd.sd_nvl, NV_UNIQUE_NAME, 0) != 0) + nomem(); /* check options */ while ((c = getopt(argc, argv, "ro:")) != -1) { @@ -3486,7 +3558,8 @@ zfs_do_snapshot(int argc, char **argv) return (1); break; case 'r': - recursive = B_TRUE; + sd.sd_recursive = B_TRUE; + multiple_snaps = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), @@ -3503,18 +3576,35 @@ zfs_do_snapshot(int argc, char **argv) (void) fprintf(stderr, gettext("missing snapshot argument\n")); goto usage; } - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - goto usage; + + if (argc > 1) + multiple_snaps = B_TRUE; + for (; argc > 0; argc--, argv++) { + char *atp; + zfs_handle_t *zhp; + + atp = strchr(argv[0], '@'); + if (atp == NULL) + goto usage; + *atp = '\0'; + sd.sd_snapname = atp + 1; + zhp = zfs_open(g_zfs, argv[0], + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + goto usage; + if (zfs_snapshot_cb(zhp, &sd) != 0) + goto usage; } - ret = zfs_snapshot(g_zfs, argv[0], recursive, props); + ret = zfs_snapshot_nvl(g_zfs, sd.sd_nvl, props); + nvlist_free(sd.sd_nvl); nvlist_free(props); - if (ret && recursive) + if (ret != 0 && multiple_snaps) (void) fprintf(stderr, gettext("no snapshots were created\n")); return (ret != 0); usage: + nvlist_free(sd.sd_nvl); nvlist_free(props); usage(B_FALSE); return (-1); @@ -5057,28 +5147,12 @@ cleanup2: return (error); } -/* - * zfs allow [-r] [-t] ... - * - * -r Recursively hold - * -t Temporary hold (hidden option) - * - * Apply a user-hold with the given tag to the list of snapshots. - */ static int zfs_do_allow(int argc, char **argv) { return (zfs_do_allow_unallow_impl(argc, argv, B_FALSE)); } -/* - * zfs unallow [-r] [-t] ... - * - * -r Recursively hold - * -t Temporary hold (hidden option) - * - * Apply a user-hold with the given tag to the list of snapshots. - */ static int zfs_do_unallow(int argc, char **argv) { @@ -5092,7 +5166,6 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding) int i; const char *tag; boolean_t recursive = B_FALSE; - boolean_t temphold = B_FALSE; const char *opts = holding ? "rt" : "r"; int c; @@ -5102,9 +5175,6 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding) case 'r': recursive = B_TRUE; break; - case 't': - temphold = B_TRUE; - break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -5153,7 +5223,7 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding) } if (holding) { if (zfs_hold(zhp, delim+1, tag, recursive, - temphold, B_FALSE, -1, 0, 0) != 0) + B_FALSE, -1) != 0) ++errors; } else { if (zfs_release(zhp, delim+1, tag, recursive) != 0) @@ -5169,7 +5239,6 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding) * zfs hold [-r] [-t] ... * * -r Recursively hold - * -t Temporary hold (hidden option) * * Apply a user-hold with the given tag to the list of snapshots. */ @@ -6591,8 +6660,7 @@ main(int argc, char **argv) return (1); } - zpool_set_history_str("zfs", argc, argv, history_str); - verify(zpool_stage_history(g_zfs, history_str) == 0); + zfs_save_arguments(argc, argv, history_str, sizeof (history_str)); libzfs_print_on_error(g_zfs, B_TRUE); @@ -6661,6 +6729,9 @@ main(int argc, char **argv) (void) fclose(mnttab_file); + if (ret == 0 && log_history) + (void) zpool_log_history(g_zfs, history_str); + libzfs_fini(g_zfs); /* diff --git a/cddl/contrib/opensolaris/cmd/zhack/zhack.c b/cddl/contrib/opensolaris/cmd/zhack/zhack.c index 2618cea32..d80b3a013 100644 --- a/cddl/contrib/opensolaris/cmd/zhack/zhack.c +++ b/cddl/contrib/opensolaris/cmd/zhack/zhack.c @@ -46,6 +46,7 @@ #include #include #include +#include #undef ZFS_MAXNAMELEN #undef verify #include @@ -273,12 +274,15 @@ zhack_do_feature_stat(int argc, char **argv) } static void -feature_enable_sync(void *arg1, void *arg2, dmu_tx_t *tx) +feature_enable_sync(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; - zfeature_info_t *feature = arg2; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; + zfeature_info_t *feature = arg; spa_feature_enable(spa, feature, tx); + spa_history_log_internal(spa, "zhack enable feature", tx, + "name=%s can_readonly=%u", + feature->fi_guid, feature->fi_can_readonly); } static void @@ -341,8 +345,8 @@ zhack_do_feature_enable(int argc, char **argv) if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid)) fatal("feature already enabled: %s", feature.fi_guid); - VERIFY3U(0, ==, dsl_sync_task_do(spa->spa_dsl_pool, NULL, - feature_enable_sync, spa, &feature, 5)); + VERIFY0(dsl_sync_task(spa_name(spa), NULL, + feature_enable_sync, &feature, 5)); spa_close(spa, FTAG); @@ -350,21 +354,25 @@ zhack_do_feature_enable(int argc, char **argv) } static void -feature_incr_sync(void *arg1, void *arg2, dmu_tx_t *tx) +feature_incr_sync(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; - zfeature_info_t *feature = arg2; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; + zfeature_info_t *feature = arg; spa_feature_incr(spa, feature, tx); + spa_history_log_internal(spa, "zhack feature incr", tx, + "name=%s", feature->fi_guid); } static void -feature_decr_sync(void *arg1, void *arg2, dmu_tx_t *tx) +feature_decr_sync(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; - zfeature_info_t *feature = arg2; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; + zfeature_info_t *feature = arg; spa_feature_decr(spa, feature, tx); + spa_history_log_internal(spa, "zhack feature decr", tx, + "name=%s", feature->fi_guid); } static void @@ -435,8 +443,8 @@ zhack_do_feature_ref(int argc, char **argv) if (decr && !spa_feature_is_active(spa, &feature)) fatal("feature refcount already 0: %s", feature.fi_guid); - VERIFY3U(0, ==, dsl_sync_task_do(spa->spa_dsl_pool, NULL, - decr ? feature_decr_sync : feature_incr_sync, spa, &feature, 5)); + VERIFY0(dsl_sync_task(spa_name(spa), NULL, + decr ? feature_decr_sync : feature_incr_sync, &feature, 5)); spa_close(spa, FTAG); } diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c index 2effce4b5..3d1f804fc 100644 --- a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c +++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c @@ -192,9 +192,9 @@ static zpool_command_t command_table[] = { #define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) -zpool_command_t *current_command; +static zpool_command_t *current_command; static char history_str[HIS_MAX_RECORD_LEN]; - +static boolean_t log_history = B_TRUE; static uint_t timestamp_fmt = NODATE; static const char * @@ -1092,7 +1092,10 @@ zpool_do_destroy(int argc, char **argv) return (1); } - ret = (zpool_destroy(zhp) != 0); + /* The history must be logged as part of the export */ + log_history = B_FALSE; + + ret = (zpool_destroy(zhp, history_str) != 0); zpool_close(zhp); @@ -1156,10 +1159,13 @@ zpool_do_export(int argc, char **argv) continue; } + /* The history must be logged as part of the export */ + log_history = B_FALSE; + if (hardforce) { - if (zpool_export_force(zhp) != 0) + if (zpool_export_force(zhp, history_str) != 0) ret = 1; - } else if (zpool_export(zhp, force) != 0) { + } else if (zpool_export(zhp, force, history_str) != 0) { ret = 1; } @@ -4562,6 +4568,14 @@ upgrade_cb(zpool_handle_t *zhp, void *arg) if (count > 0) { cbp->cb_first = B_FALSE; printnl = B_TRUE; + /* + * If they did "zpool upgrade -a", then we could + * be doing ioctls to different pools. We need + * to log this history once to each pool, and bypass + * the normal history logging that happens in main(). + */ + (void) zpool_log_history(g_zfs, history_str); + log_history = B_FALSE; } } @@ -4923,8 +4937,8 @@ zpool_do_upgrade(int argc, char **argv) typedef struct hist_cbdata { boolean_t first; - int longfmt; - int internal; + boolean_t longfmt; + boolean_t internal; } hist_cbdata_t; /* @@ -4936,21 +4950,8 @@ get_history_one(zpool_handle_t *zhp, void *data) nvlist_t *nvhis; nvlist_t **records; uint_t numrecords; - char *cmdstr; - char *pathstr; - uint64_t dst_time; - time_t tsec; - struct tm t; - char tbuf[30]; int ret, i; - uint64_t who; - struct passwd *pwd; - char *hostname; - char *zonename; - char internalstr[MAXPATHLEN]; hist_cbdata_t *cb = (hist_cbdata_t *)data; - uint64_t txg; - uint64_t ievent; cb->first = B_FALSE; @@ -4962,64 +4963,94 @@ get_history_one(zpool_handle_t *zhp, void *data) verify(nvlist_lookup_nvlist_array(nvhis, ZPOOL_HIST_RECORD, &records, &numrecords) == 0); for (i = 0; i < numrecords; i++) { - if (nvlist_lookup_uint64(records[i], ZPOOL_HIST_TIME, - &dst_time) != 0) - continue; + nvlist_t *rec = records[i]; + char tbuf[30] = ""; - /* is it an internal event or a standard event? */ - if (nvlist_lookup_string(records[i], ZPOOL_HIST_CMD, - &cmdstr) != 0) { - if (cb->internal == 0) - continue; + if (nvlist_exists(rec, ZPOOL_HIST_TIME)) { + time_t tsec; + struct tm t; + + tsec = fnvlist_lookup_uint64(records[i], + ZPOOL_HIST_TIME); + (void) localtime_r(&tsec, &t); + (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); + } - if (nvlist_lookup_uint64(records[i], - ZPOOL_HIST_INT_EVENT, &ievent) != 0) + if (nvlist_exists(rec, ZPOOL_HIST_CMD)) { + (void) printf("%s %s", tbuf, + fnvlist_lookup_string(rec, ZPOOL_HIST_CMD)); + } else if (nvlist_exists(rec, ZPOOL_HIST_INT_EVENT)) { + int ievent = + fnvlist_lookup_uint64(rec, ZPOOL_HIST_INT_EVENT); + if (!cb->internal) + continue; + if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) { + (void) printf("%s unrecognized record:\n", + tbuf); + dump_nvlist(rec, 4); + continue; + } + (void) printf("%s [internal %s txg:%lld] %s", tbuf, + zfs_history_event_names[ievent], + fnvlist_lookup_uint64(rec, ZPOOL_HIST_TXG), + fnvlist_lookup_string(rec, ZPOOL_HIST_INT_STR)); + } else if (nvlist_exists(rec, ZPOOL_HIST_INT_NAME)) { + if (!cb->internal) continue; - verify(nvlist_lookup_uint64(records[i], - ZPOOL_HIST_TXG, &txg) == 0); - verify(nvlist_lookup_string(records[i], - ZPOOL_HIST_INT_STR, &pathstr) == 0); - if (ievent >= LOG_END) + (void) printf("%s [txg:%lld] %s", tbuf, + fnvlist_lookup_uint64(rec, ZPOOL_HIST_TXG), + fnvlist_lookup_string(rec, ZPOOL_HIST_INT_NAME)); + if (nvlist_exists(rec, ZPOOL_HIST_DSNAME)) { + (void) printf(" %s (%llu)", + fnvlist_lookup_string(rec, + ZPOOL_HIST_DSNAME), + fnvlist_lookup_uint64(rec, + ZPOOL_HIST_DSID)); + } + (void) printf(" %s", fnvlist_lookup_string(rec, + ZPOOL_HIST_INT_STR)); + } else if (nvlist_exists(rec, ZPOOL_HIST_IOCTL)) { + if (!cb->internal) continue; - (void) snprintf(internalstr, - sizeof (internalstr), - "[internal %s txg:%lld] %s", - zfs_history_event_names[ievent], txg, - pathstr); - cmdstr = internalstr; + (void) printf("%s ioctl %s\n", tbuf, + fnvlist_lookup_string(rec, ZPOOL_HIST_IOCTL)); + if (nvlist_exists(rec, ZPOOL_HIST_INPUT_NVL)) { + (void) printf(" input:\n"); + dump_nvlist(fnvlist_lookup_nvlist(rec, + ZPOOL_HIST_INPUT_NVL), 8); + } + if (nvlist_exists(rec, ZPOOL_HIST_OUTPUT_NVL)) { + (void) printf(" output:\n"); + dump_nvlist(fnvlist_lookup_nvlist(rec, + ZPOOL_HIST_OUTPUT_NVL), 8); + } + } else { + if (!cb->internal) + continue; + (void) printf("%s unrecognized record:\n", tbuf); + dump_nvlist(rec, 4); } - tsec = dst_time; - (void) localtime_r(&tsec, &t); - (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); - (void) printf("%s %s", tbuf, cmdstr); if (!cb->longfmt) { (void) printf("\n"); continue; } (void) printf(" ["); - if (nvlist_lookup_uint64(records[i], - ZPOOL_HIST_WHO, &who) == 0) { - pwd = getpwuid((uid_t)who); - if (pwd) - (void) printf("user %s on", - pwd->pw_name); - else - (void) printf("user %d on", - (int)who); - } else { - (void) printf(gettext("no info]\n")); - continue; + if (nvlist_exists(rec, ZPOOL_HIST_WHO)) { + uid_t who = fnvlist_lookup_uint64(rec, ZPOOL_HIST_WHO); + struct passwd *pwd = getpwuid(who); + (void) printf("user %d ", (int)who); + if (pwd != NULL) + (void) printf("(%s) ", pwd->pw_name); } - if (nvlist_lookup_string(records[i], - ZPOOL_HIST_HOST, &hostname) == 0) { - (void) printf(" %s", hostname); + if (nvlist_exists(rec, ZPOOL_HIST_HOST)) { + (void) printf("on %s", + fnvlist_lookup_string(rec, ZPOOL_HIST_HOST)); } - if (nvlist_lookup_string(records[i], - ZPOOL_HIST_ZONE, &zonename) == 0) { - (void) printf(":%s", zonename); + if (nvlist_exists(rec, ZPOOL_HIST_ZONE)) { + (void) printf(":%s", + fnvlist_lookup_string(rec, ZPOOL_HIST_ZONE)); } - (void) printf("]"); (void) printf("\n"); } @@ -5034,8 +5065,6 @@ get_history_one(zpool_handle_t *zhp, void *data) * * Displays the history of commands that modified pools. */ - - int zpool_do_history(int argc, char **argv) { @@ -5048,10 +5077,10 @@ zpool_do_history(int argc, char **argv) while ((c = getopt(argc, argv, "li")) != -1) { switch (c) { case 'l': - cbdata.longfmt = 1; + cbdata.longfmt = B_TRUE; break; case 'i': - cbdata.internal = 1; + cbdata.internal = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), @@ -5276,8 +5305,7 @@ main(int argc, char **argv) if (strcmp(cmdname, "-?") == 0) usage(B_TRUE); - zpool_set_history_str("zpool", argc, argv, history_str); - verify(zpool_stage_history(g_zfs, history_str) == 0); + zfs_save_arguments(argc, argv, history_str, sizeof (history_str)); /* * Run the appropriate command. @@ -5304,6 +5332,9 @@ main(int argc, char **argv) usage(B_FALSE); } + if (ret == 0 && log_history) + (void) zpool_log_history(g_zfs, history_str); + libzfs_fini(g_zfs); /* diff --git a/cddl/contrib/opensolaris/cmd/ztest/ztest.c b/cddl/contrib/opensolaris/cmd/ztest/ztest.c index 9c38b3f34..ef776a4c7 100644 --- a/cddl/contrib/opensolaris/cmd/ztest/ztest.c +++ b/cddl/contrib/opensolaris/cmd/ztest/ztest.c @@ -104,10 +104,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -367,7 +369,7 @@ ztest_info_t ztest_info[] = { { ztest_scrub, 1, &zopt_rarely }, { ztest_spa_upgrade, 1, &zopt_rarely }, { ztest_dsl_dataset_promote_busy, 1, &zopt_rarely }, - { ztest_vdev_attach_detach, 1, &zopt_rarely }, + { ztest_vdev_attach_detach, 1, &zopt_sometimes }, { ztest_vdev_LUN_growth, 1, &zopt_rarely }, { ztest_vdev_add_remove, 1, &ztest_opts.zo_vdevtime }, @@ -1008,9 +1010,8 @@ ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value, uint64_t curval; int error; - error = dsl_prop_set(osname, propname, - (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), - sizeof (value), 1, &value); + error = dsl_prop_set_int(osname, propname, + (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), value); if (error == ENOSPC) { ztest_record_enospc(FTAG); @@ -1018,8 +1019,7 @@ ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value, } ASSERT0(error); - VERIFY3U(dsl_prop_get(osname, propname, sizeof (curval), - 1, &curval, setpoint), ==, 0); + VERIFY0(dsl_prop_get_integer(osname, propname, &curval, setpoint)); if (ztest_opts.zo_verbose >= 6) { VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0); @@ -2332,7 +2332,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) */ nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); VERIFY3U(ENOENT, ==, - spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL)); + spa_create("ztest_bad_file", nvroot, NULL, NULL)); nvlist_free(nvroot); /* @@ -2340,7 +2340,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) */ nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1); VERIFY3U(ENOENT, ==, - spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL)); + spa_create("ztest_bad_mirror", nvroot, NULL, NULL)); nvlist_free(nvroot); /* @@ -2349,7 +2349,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) */ (void) rw_rdlock(&ztest_name_lock); nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); - VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL)); + VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL)); nvlist_free(nvroot); VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG)); VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); @@ -2407,7 +2407,7 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) props = fnvlist_alloc(); fnvlist_add_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), version); - VERIFY0(spa_create(name, nvroot, props, NULL, NULL)); + VERIFY0(spa_create(name, nvroot, props, NULL)); fnvlist_free(nvroot); fnvlist_free(props); @@ -2481,8 +2481,7 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) int error; VERIFY(mutex_lock(&ztest_vdev_lock) == 0); - leaves = - MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz; + leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz; spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); @@ -3182,7 +3181,7 @@ ztest_objset_destroy_cb(const char *name, void *arg) /* * Verify that the dataset contains a directory object. */ - VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os)); + VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os)); error = dmu_object_info(os, ZTEST_DIROBJ, &doi); if (error != ENOENT) { /* We could have crashed in the middle of destroying it */ @@ -3190,12 +3189,16 @@ ztest_objset_destroy_cb(const char *name, void *arg) ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER); ASSERT3S(doi.doi_physical_blocks_512, >=, 0); } - dmu_objset_rele(os, FTAG); + dmu_objset_disown(os, FTAG); /* * Destroy the dataset. */ - VERIFY3U(0, ==, dmu_objset_destroy(name, B_FALSE)); + if (strchr(name, '@') != NULL) { + VERIFY0(dsl_destroy_snapshot(name, B_FALSE)); + } else { + VERIFY0(dsl_destroy_head(name)); + } return (0); } @@ -3205,17 +3208,17 @@ ztest_snapshot_create(char *osname, uint64_t id) char snapname[MAXNAMELEN]; int error; - (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname, - (u_longlong_t)id); + (void) snprintf(snapname, sizeof (snapname), "%llu", (u_longlong_t)id); - error = dmu_objset_snapshot(osname, strchr(snapname, '@') + 1, - NULL, NULL, B_FALSE, B_FALSE, -1); + error = dmu_objset_snapshot_one(osname, snapname); if (error == ENOSPC) { ztest_record_enospc(FTAG); return (B_FALSE); } - if (error != 0 && error != EEXIST) - fatal(0, "ztest_snapshot_create(%s) = %d", snapname, error); + if (error != 0 && error != EEXIST) { + fatal(0, "ztest_snapshot_create(%s@%s) = %d", osname, + snapname, error); + } return (B_TRUE); } @@ -3228,7 +3231,7 @@ ztest_snapshot_destroy(char *osname, uint64_t id) (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname, (u_longlong_t)id); - error = dmu_objset_destroy(snapname, B_FALSE); + error = dsl_destroy_snapshot(snapname, B_FALSE); if (error != 0 && error != ENOENT) fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error); return (B_TRUE); @@ -3274,7 +3277,8 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) /* * Verify that the destroyed dataset is no longer in the namespace. */ - VERIFY3U(ENOENT, ==, dmu_objset_hold(name, FTAG, &os)); + VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, + FTAG, &os)); /* * Verify that we can create a new dataset. @@ -3289,8 +3293,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_create(%s) = %d", name, error); } - VERIFY3U(0, ==, - dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os)); + VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os)); ztest_zd_init(&zdtmp, NULL, os); @@ -3366,21 +3369,21 @@ ztest_dsl_dataset_cleanup(char *osname, uint64_t id) (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id); (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id); - error = dmu_objset_destroy(clone2name, B_FALSE); + error = dsl_destroy_head(clone2name); if (error && error != ENOENT) - fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error); - error = dmu_objset_destroy(snap3name, B_FALSE); + fatal(0, "dsl_destroy_head(%s) = %d", clone2name, error); + error = dsl_destroy_snapshot(snap3name, B_FALSE); if (error && error != ENOENT) - fatal(0, "dmu_objset_destroy(%s) = %d", snap3name, error); - error = dmu_objset_destroy(snap2name, B_FALSE); + fatal(0, "dsl_destroy_snapshot(%s) = %d", snap3name, error); + error = dsl_destroy_snapshot(snap2name, B_FALSE); if (error && error != ENOENT) - fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error); - error = dmu_objset_destroy(clone1name, B_FALSE); + fatal(0, "dsl_destroy_snapshot(%s) = %d", snap2name, error); + error = dsl_destroy_head(clone1name); if (error && error != ENOENT) - fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error); - error = dmu_objset_destroy(snap1name, B_FALSE); + fatal(0, "dsl_destroy_head(%s) = %d", clone1name, error); + error = dsl_destroy_snapshot(snap1name, B_FALSE); if (error && error != ENOENT) - fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error); + fatal(0, "dsl_destroy_snapshot(%s) = %d", snap1name, error); } /* @@ -3389,8 +3392,7 @@ ztest_dsl_dataset_cleanup(char *osname, uint64_t id) void ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) { - objset_t *clone; - dsl_dataset_t *ds; + objset_t *os; char snap1name[MAXNAMELEN]; char clone1name[MAXNAMELEN]; char snap2name[MAXNAMELEN]; @@ -3409,8 +3411,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id); (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id); - error = dmu_objset_snapshot(osname, strchr(snap1name, '@')+1, - NULL, NULL, B_FALSE, B_FALSE, -1); + error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1); if (error && error != EEXIST) { if (error == ENOSPC) { ztest_record_enospc(FTAG); @@ -3419,12 +3420,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error); } - error = dmu_objset_hold(snap1name, FTAG, &clone); - if (error) - fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error); - - error = dmu_objset_clone(clone1name, dmu_objset_ds(clone), 0); - dmu_objset_rele(clone, FTAG); + error = dmu_objset_clone(clone1name, snap1name); if (error) { if (error == ENOSPC) { ztest_record_enospc(FTAG); @@ -3433,8 +3429,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_create(%s) = %d", clone1name, error); } - error = dmu_objset_snapshot(clone1name, strchr(snap2name, '@')+1, - NULL, NULL, B_FALSE, B_FALSE, -1); + error = dmu_objset_snapshot_one(clone1name, strchr(snap2name, '@') + 1); if (error && error != EEXIST) { if (error == ENOSPC) { ztest_record_enospc(FTAG); @@ -3443,8 +3438,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_open_snapshot(%s) = %d", snap2name, error); } - error = dmu_objset_snapshot(clone1name, strchr(snap3name, '@')+1, - NULL, NULL, B_FALSE, B_FALSE, -1); + error = dmu_objset_snapshot_one(clone1name, strchr(snap3name, '@') + 1); if (error && error != EEXIST) { if (error == ENOSPC) { ztest_record_enospc(FTAG); @@ -3453,12 +3447,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); } - error = dmu_objset_hold(snap3name, FTAG, &clone); - if (error) - fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); - - error = dmu_objset_clone(clone2name, dmu_objset_ds(clone), 0); - dmu_objset_rele(clone, FTAG); + error = dmu_objset_clone(clone2name, snap3name); if (error) { if (error == ENOSPC) { ztest_record_enospc(FTAG); @@ -3467,14 +3456,14 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_create(%s) = %d", clone2name, error); } - error = dsl_dataset_own(snap2name, B_FALSE, FTAG, &ds); + error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os); if (error) - fatal(0, "dsl_dataset_own(%s) = %d", snap2name, error); + fatal(0, "dmu_objset_own(%s) = %d", snap2name, error); error = dsl_dataset_promote(clone2name, NULL); if (error != EBUSY) fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name, error); - dsl_dataset_disown(ds, FTAG); + dmu_objset_disown(os, FTAG); out: ztest_dsl_dataset_cleanup(osname, id); @@ -4286,7 +4275,7 @@ ztest_zap_parallel(ztest_ds_t *zd, uint64_t id) } count = -1ULL; - VERIFY(zap_count(os, object, &count) == 0); + VERIFY0(zap_count(os, object, &count)); ASSERT(count != -1ULL); /* @@ -4597,6 +4586,22 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id) (void) rw_unlock(&ztest_name_lock); } +static int +user_release_one(const char *snapname, const char *holdname) +{ + nvlist_t *snaps, *holds; + int error; + + snaps = fnvlist_alloc(); + holds = fnvlist_alloc(); + fnvlist_add_boolean(holds, holdname); + fnvlist_add_nvlist(snaps, snapname, holds); + fnvlist_free(holds); + error = dsl_dataset_user_release(snaps, NULL); + fnvlist_free(snaps); + return (error); +} + /* * Test snapshot hold/release and deferred destroy. */ @@ -4611,29 +4616,36 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) char clonename[100]; char tag[100]; char osname[MAXNAMELEN]; + nvlist_t *holds; (void) rw_rdlock(&ztest_name_lock); dmu_objset_name(os, osname); - (void) snprintf(snapname, 100, "sh1_%llu", id); - (void) snprintf(fullname, 100, "%s@%s", osname, snapname); - (void) snprintf(clonename, 100, "%s/ch1_%llu", osname, id); - (void) snprintf(tag, 100, "%tag_%llu", id); + (void) snprintf(snapname, sizeof (snapname), "sh1_%llu", id); + (void) snprintf(fullname, sizeof (fullname), "%s@%s", osname, snapname); + (void) snprintf(clonename, sizeof (clonename), + "%s/ch1_%llu", osname, id); + (void) snprintf(tag, sizeof (tag), "tag_%llu", id); /* * Clean up from any previous run. */ - (void) dmu_objset_destroy(clonename, B_FALSE); - (void) dsl_dataset_user_release(osname, snapname, tag, B_FALSE); - (void) dmu_objset_destroy(fullname, B_FALSE); + error = dsl_destroy_head(clonename); + if (error != ENOENT) + ASSERT0(error); + error = user_release_one(fullname, tag); + if (error != ESRCH && error != ENOENT) + ASSERT0(error); + error = dsl_destroy_snapshot(fullname, B_FALSE); + if (error != ENOENT) + ASSERT0(error); /* * Create snapshot, clone it, mark snap for deferred destroy, * destroy clone, verify snap was also destroyed. */ - error = dmu_objset_snapshot(osname, snapname, NULL, NULL, FALSE, - FALSE, -1); + error = dmu_objset_snapshot_one(osname, snapname); if (error) { if (error == ENOSPC) { ztest_record_enospc("dmu_objset_snapshot"); @@ -4642,12 +4654,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); } - error = dmu_objset_hold(fullname, FTAG, &origin); - if (error) - fatal(0, "dmu_objset_hold(%s) = %d", fullname, error); - - error = dmu_objset_clone(clonename, dmu_objset_ds(origin), 0); - dmu_objset_rele(origin, FTAG); + error = dmu_objset_clone(clonename, fullname); if (error) { if (error == ENOSPC) { ztest_record_enospc("dmu_objset_clone"); @@ -4656,15 +4663,15 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_clone(%s) = %d", clonename, error); } - error = dmu_objset_destroy(fullname, B_TRUE); + error = dsl_destroy_snapshot(fullname, B_TRUE); if (error) { - fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d", + fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d", fullname, error); } - error = dmu_objset_destroy(clonename, B_FALSE); + error = dsl_destroy_head(clonename); if (error) - fatal(0, "dmu_objset_destroy(%s) = %d", clonename, error); + fatal(0, "dsl_destroy_head(%s) = %d", clonename, error); error = dmu_objset_hold(fullname, FTAG, &origin); if (error != ENOENT) @@ -4675,8 +4682,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) * destroy a held snapshot, mark for deferred destroy, * release hold, verify snapshot was destroyed. */ - error = dmu_objset_snapshot(osname, snapname, NULL, NULL, FALSE, - FALSE, -1); + error = dmu_objset_snapshot_one(osname, snapname); if (error) { if (error == ENOSPC) { ztest_record_enospc("dmu_objset_snapshot"); @@ -4685,28 +4691,31 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); } - error = dsl_dataset_user_hold(osname, snapname, tag, B_FALSE, - B_TRUE, -1); + holds = fnvlist_alloc(); + fnvlist_add_string(holds, fullname, tag); + error = dsl_dataset_user_hold(holds, 0, NULL); + fnvlist_free(holds); + if (error) fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag); - error = dmu_objset_destroy(fullname, B_FALSE); + error = dsl_destroy_snapshot(fullname, B_FALSE); if (error != EBUSY) { - fatal(0, "dmu_objset_destroy(%s, B_FALSE) = %d", + fatal(0, "dsl_destroy_snapshot(%s, B_FALSE) = %d", fullname, error); } - error = dmu_objset_destroy(fullname, B_TRUE); + error = dsl_destroy_snapshot(fullname, B_TRUE); if (error) { - fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d", + fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d", fullname, error); } - error = dsl_dataset_user_release(osname, snapname, tag, B_FALSE); + error = user_release_one(fullname, tag); if (error) - fatal(0, "dsl_dataset_user_release(%s)", fullname, tag); + fatal(0, "user_release_one(%s)", fullname, tag); - VERIFY(dmu_objset_hold(fullname, FTAG, &origin) == ENOENT); + VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT); out: (void) rw_unlock(&ztest_name_lock); @@ -4960,8 +4969,12 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) */ for (int i = 0; i < copies; i++) { uint64_t offset = i * blocksize; - VERIFY0(dmu_buf_hold(os, object, offset, FTAG, &db, - DMU_READ_NO_PREFETCH)); + int error = dmu_buf_hold(os, object, offset, FTAG, &db, + DMU_READ_NO_PREFETCH); + if (error != 0) { + fatal(B_FALSE, "dmu_buf_hold(%p, %llu, %llu) = %u", + os, (long long)object, (long long) offset, error); + } ASSERT(db->db_offset == offset); ASSERT(db->db_size == blocksize); ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) || @@ -5172,6 +5185,7 @@ ztest_spa_import_export(char *oldname, char *newname) nvlist_t *config, *newconfig; uint64_t pool_guid; spa_t *spa; + int error; if (ztest_opts.zo_verbose >= 4) { (void) printf("import/export: old = %s, new = %s\n", @@ -5216,7 +5230,12 @@ ztest_spa_import_export(char *oldname, char *newname) /* * Import it under the new name. */ - VERIFY3U(0, ==, spa_import(newname, config, NULL, 0)); + error = spa_import(newname, config, NULL, 0); + if (error != 0) { + dump_nvlist(config, 0); + fatal(B_FALSE, "couldn't import pool %s as %s: error %u", + oldname, newname, error); + } ztest_walk_pool_directory("pools after import"); @@ -5423,7 +5442,7 @@ ztest_dataset_open(int d) } ASSERT(error == 0 || error == EEXIST); - VERIFY0(dmu_objset_hold(name, zd, &os)); + VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os)); (void) rw_unlock(&ztest_name_lock); ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os); @@ -5464,7 +5483,7 @@ ztest_dataset_close(int d) ztest_ds_t *zd = &ztest_ds[d]; zil_close(zd->zd_zilog); - dmu_objset_rele(zd->zd_os, zd); + dmu_objset_disown(zd->zd_os, zd); ztest_zd_fini(zd); } @@ -5508,13 +5527,14 @@ ztest_run(ztest_shared_t *zs) * Open our pool. */ kernel_init(FREAD | FWRITE); - VERIFY(spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0); + VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG)); spa->spa_debug = B_TRUE; ztest_spa = spa; - VERIFY3U(0, ==, dmu_objset_hold(ztest_opts.zo_pool, FTAG, &os)); + VERIFY0(dmu_objset_own(ztest_opts.zo_pool, + DMU_OST_ANY, B_TRUE, FTAG, &os)); zs->zs_guid = dmu_objset_fsid_guid(os); - dmu_objset_rele(os, FTAG); + dmu_objset_disown(os, FTAG); spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN; @@ -5791,8 +5811,7 @@ ztest_init(ztest_shared_t *zs) spa_feature_table[i].fi_uname); VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0)); } - VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, - NULL, NULL)); + VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL)); nvlist_free(nvroot); VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h index 2660059f7..274b47b55 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h @@ -57,7 +57,8 @@ extern "C" { /* * libzfs errors */ -enum { +typedef enum zfs_error { + EZFS_SUCCESS = 0, /* no error -- success */ EZFS_NOMEM = 2000, /* out of memory */ EZFS_BADPROP, /* invalid property value */ EZFS_PROPREADONLY, /* cannot set readonly property */ @@ -129,7 +130,7 @@ enum { EZFS_DIFFDATA, /* bad zfs diff data */ EZFS_POOLREADONLY, /* pool is in read-only mode */ EZFS_UNKNOWN -}; +} zfs_error_t; /* * The following data structures are all part @@ -185,6 +186,9 @@ extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *); extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t); +extern void zfs_save_arguments(int argc, char **, char *, int); +extern int zpool_log_history(libzfs_handle_t *, const char *); + extern int libzfs_errno(libzfs_handle_t *); extern const char *libzfs_error_action(libzfs_handle_t *); extern const char *libzfs_error_description(libzfs_handle_t *); @@ -220,7 +224,7 @@ extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *); */ extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *, nvlist_t *, nvlist_t *); -extern int zpool_destroy(zpool_handle_t *); +extern int zpool_destroy(zpool_handle_t *, const char *); extern int zpool_add(zpool_handle_t *, nvlist_t *); typedef struct splitflags { @@ -343,8 +347,8 @@ extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **); /* * Import and export functions */ -extern int zpool_export(zpool_handle_t *, boolean_t); -extern int zpool_export_force(zpool_handle_t *); +extern int zpool_export(zpool_handle_t *, boolean_t, const char *); +extern int zpool_export_force(zpool_handle_t *, const char *); extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *, char *altroot); extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *, @@ -378,7 +382,7 @@ extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *, */ struct zfs_cmd; -extern const char *zfs_history_event_names[LOG_END]; +extern const char *zfs_history_event_names[]; extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *, boolean_t verbose); @@ -386,12 +390,9 @@ extern int zpool_upgrade(zpool_handle_t *, uint64_t); extern int zpool_get_history(zpool_handle_t *, nvlist_t **); extern int zpool_history_unpack(char *, uint64_t, uint64_t *, nvlist_t ***, uint_t *); -extern void zpool_set_history_str(const char *subcommand, int argc, - char **argv, char *history_str); -extern int zpool_stage_history(libzfs_handle_t *, const char *); extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *, size_t len); -extern int zfs_ioctl(libzfs_handle_t *, unsigned long, struct zfs_cmd *); +extern int zfs_ioctl(libzfs_handle_t *, int request, struct zfs_cmd *); extern int zpool_get_physpath(zpool_handle_t *, char *, size_t); extern void zpool_explain_recover(libzfs_handle_t *, const char *, int, nvlist_t *); @@ -441,8 +442,6 @@ extern int zfs_prop_get_written(zfs_handle_t *zhp, const char *propname, char *propbuf, int proplen, boolean_t literal); extern int zfs_prop_get_feature(zfs_handle_t *zhp, const char *propname, char *buf, size_t len); -extern int zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap, - uint64_t *usedp); extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t); extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t); extern const char *zfs_prop_values(zfs_prop_t); @@ -555,9 +554,11 @@ extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t, extern int zfs_create_ancestors(libzfs_handle_t *, const char *); extern int zfs_destroy(zfs_handle_t *, boolean_t); extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t); -extern int zfs_destroy_snaps_nvl(zfs_handle_t *, nvlist_t *, boolean_t); +extern int zfs_destroy_snaps_nvl(libzfs_handle_t *, nvlist_t *, boolean_t); extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *); extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *); +extern int zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, + nvlist_t *props); extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t); typedef struct renameflags { @@ -609,8 +610,8 @@ extern int zfs_send(zfs_handle_t *, const char *, const char *, sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **); extern int zfs_promote(zfs_handle_t *); -extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t, - boolean_t, boolean_t, int, uint64_t, uint64_t); +extern int zfs_hold(zfs_handle_t *, const char *, const char *, + boolean_t, boolean_t, int); extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t); extern int zfs_get_holds(zfs_handle_t *, nvlist_t **); extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *); diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.c new file mode 100644 index 000000000..3c8119de5 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.c @@ -0,0 +1,105 @@ +/* + * CDDL HEADER SART + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2013 Martin Matuska . All rights reserved. + */ + +#include "libzfs_compat.h" + +int zfs_ioctl_version = ZFS_IOCVER_UNDEF; +static int zfs_spa_version = -1; + +/* + * Get zfs_ioctl_version + */ +int +get_zfs_ioctl_version(void) +{ + size_t ver_size; + int ver = ZFS_IOCVER_NONE; + + ver_size = sizeof(ver); + sysctlbyname("vfs.zfs.version.ioctl", &ver, &ver_size, NULL, 0); + + return (ver); +} + +/* + * Get the SPA version + */ +static int +get_zfs_spa_version(void) +{ + size_t ver_size; + int ver = 0; + + ver_size = sizeof(ver); + sysctlbyname("vfs.zfs.version.spa", &ver, &ver_size, NULL, 0); + + return (ver); +} + +/* + * This is FreeBSD version of ioctl, because Solaris' ioctl() updates + * zc_nvlist_dst_size even if an error is returned, on FreeBSD if an + * error is returned zc_nvlist_dst_size won't be updated. + */ +int +zcmd_ioctl(int fd, int request, zfs_cmd_t *zc) +{ + size_t oldsize; + int ret, cflag = ZFS_CMD_COMPAT_NONE; + + if (zfs_ioctl_version == ZFS_IOCVER_UNDEF) + zfs_ioctl_version = get_zfs_ioctl_version(); + + if (zfs_ioctl_version == ZFS_IOCVER_LZC) + cflag = ZFS_CMD_COMPAT_LZC; + else if (zfs_ioctl_version == ZFS_IOCVER_DEADMAN) + cflag = ZFS_CMD_COMPAT_DEADMAN; + + /* + * If vfs.zfs.version.ioctl is not defined, assume we have v28 + * compatible binaries and use vfs.zfs.version.spa to test for v15 + */ + if (zfs_ioctl_version < ZFS_IOCVER_DEADMAN) { + cflag = ZFS_CMD_COMPAT_V28; + + if (zfs_spa_version < 0) + zfs_spa_version = get_zfs_spa_version(); + + if (zfs_spa_version == SPA_VERSION_15 || + zfs_spa_version == SPA_VERSION_14 || + zfs_spa_version == SPA_VERSION_13) + cflag = ZFS_CMD_COMPAT_V15; + } + + oldsize = zc->zc_nvlist_dst_size; + ret = zcmd_ioctl_compat(fd, request, zc, cflag); + + if (ret == 0 && oldsize < zc->zc_nvlist_dst_size) { + ret = -1; + errno = ENOMEM; + } + + return (ret); +} diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.h new file mode 100644 index 000000000..376166833 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.h @@ -0,0 +1,44 @@ +/* + * CDDL HEADER SART + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2013 Martin Matuska . All rights reserved. + */ + +#ifndef _LIBZFS_COMPAT_H +#define _LIBZFS_COMPAT_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int get_zfs_ioctl_version(void); +int zcmd_ioctl(int fd, int request, zfs_cmd_t *zc); + +#define ioctl(fd, ioc, zc) zcmd_ioctl((fd), (ioc), (zc)) + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBZFS_COMPAT_H */ diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c index 1696cb184..e78bb82c6 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c @@ -1447,7 +1447,6 @@ zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval) nvlist_t *nvl = NULL, *realprops; zfs_prop_t prop; boolean_t do_prefix = B_TRUE; - uint64_t idx; int added_resv; (void) snprintf(errbuf, sizeof (errbuf), @@ -2017,10 +2016,7 @@ get_clones_cb(zfs_handle_t *zhp, void *arg) NULL, NULL, 0, B_TRUE) != 0) goto out; if (strcmp(gca->buf, gca->origin) == 0) { - if (nvlist_add_boolean(gca->value, zfs_get_name(zhp)) != 0) { - zfs_close(zhp); - return (no_memory(zhp->zfs_hdl)); - } + fnvlist_add_boolean(gca->value, zfs_get_name(zhp)); gca->numclones--; } @@ -2711,25 +2707,6 @@ zfs_prop_get_written(zfs_handle_t *zhp, const char *propname, return (0); } -int -zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap, - uint64_t *usedp) -{ - int err; - zfs_cmd_t zc = { 0 }; - - (void) strlcpy(zc.zc_name, lastsnap->zfs_name, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_value, firstsnap->zfs_name, sizeof (zc.zc_value)); - - err = ioctl(lastsnap->zfs_hdl->libzfs_fd, ZFS_IOC_SPACE_SNAPS, &zc); - if (err) - return (err); - - *usedp = zc.zc_cookie; - - return (0); -} - /* * Returns the name of the given zfs handle. */ @@ -2930,7 +2907,6 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen) */ for (cp = target + prefixlen + 1; cp = strchr(cp, '/'); *cp = '/', cp++) { - char *logstr; *cp = '\0'; @@ -2941,16 +2917,12 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen) continue; } - logstr = hdl->libzfs_log_str; - hdl->libzfs_log_str = NULL; if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM, NULL) != 0) { - hdl->libzfs_log_str = logstr; opname = dgettext(TEXT_DOMAIN, "create"); goto ancestorerr; } - hdl->libzfs_log_str = logstr; h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); if (h == NULL) { opname = dgettext(TEXT_DOMAIN, "open"); @@ -3008,12 +2980,12 @@ int zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, nvlist_t *props) { - zfs_cmd_t zc = { 0 }; int ret; uint64_t size = 0; uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); char errbuf[1024]; uint64_t zoned; + dmu_objset_type_t ost; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot create '%s'"), path); @@ -3033,17 +3005,16 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, * will return ENOENT, not EEXIST. To prevent this from happening, we * first try to see if the dataset exists. */ - (void) strlcpy(zc.zc_name, path, sizeof (zc.zc_name)); - if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { + if (zfs_dataset_exists(hdl, path, ZFS_TYPE_DATASET)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "dataset already exists")); return (zfs_error(hdl, EZFS_EXISTS, errbuf)); } if (type == ZFS_TYPE_VOLUME) - zc.zc_objset_type = DMU_OST_ZVOL; + ost = DMU_OST_ZVOL; else - zc.zc_objset_type = DMU_OST_ZFS; + ost = DMU_OST_ZFS; if (props && (props = zfs_valid_proplist(hdl, type, props, zoned, NULL, errbuf)) == 0) @@ -3095,14 +3066,9 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, } } - if (props && zcmd_write_src_nvlist(hdl, &zc, props) != 0) - return (-1); - nvlist_free(props); - /* create the dataset */ - ret = zfs_ioctl(hdl, ZFS_IOC_CREATE, &zc); - - zcmd_free_nvlists(&zc); + ret = lzc_create(path, ost, props); + nvlist_free(props); /* check for failure */ if (ret != 0) { @@ -3228,47 +3194,53 @@ zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer) dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"), zhp->zfs_name, snapname); } else { - ret = zfs_destroy_snaps_nvl(zhp, dd.nvl, defer); + ret = zfs_destroy_snaps_nvl(zhp->zfs_hdl, dd.nvl, defer); } nvlist_free(dd.nvl); return (ret); } /* - * Destroys all the snapshots named in the nvlist. They must be underneath - * the zhp (either snapshots of it, or snapshots of its descendants). + * Destroys all the snapshots named in the nvlist. */ int -zfs_destroy_snaps_nvl(zfs_handle_t *zhp, nvlist_t *snaps, boolean_t defer) +zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer) { int ret; - zfs_cmd_t zc = { 0 }; + nvlist_t *errlist; - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - if (zcmd_write_src_nvlist(zhp->zfs_hdl, &zc, snaps) != 0) - return (-1); - zc.zc_defer_destroy = defer; + ret = lzc_destroy_snaps(snaps, defer, &errlist); - ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY_SNAPS_NVL, &zc); - if (ret != 0) { + if (ret == 0) + return (0); + + if (nvlist_next_nvpair(errlist, NULL) == NULL) { char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot destroy snapshots")); - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot destroy snapshots in %s"), zc.zc_name); + ret = zfs_standard_error(hdl, ret, errbuf); + } + for (nvpair_t *pair = nvlist_next_nvpair(errlist, NULL); + pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"), + nvpair_name(pair)); - switch (errno) { + switch (fnvpair_value_int32(pair)) { case EEXIST: - zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, - "snapshot is cloned")); - return (zfs_error(zhp->zfs_hdl, EZFS_EXISTS, errbuf)); - + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, "snapshot is cloned")); + ret = zfs_error(hdl, EZFS_EXISTS, errbuf); + break; default: - return (zfs_standard_error(zhp->zfs_hdl, errno, - errbuf)); + ret = zfs_standard_error(hdl, errno, errbuf); + break; } } - return (0); + return (ret); } /* @@ -3277,12 +3249,10 @@ zfs_destroy_snaps_nvl(zfs_handle_t *zhp, nvlist_t *snaps, boolean_t defer) int zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props) { - zfs_cmd_t zc = { 0 }; char parent[ZFS_MAXNAMELEN]; int ret; char errbuf[1024]; libzfs_handle_t *hdl = zhp->zfs_hdl; - zfs_type_t type; uint64_t zoned; assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); @@ -3301,32 +3271,21 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props) (void) parent_name(target, parent, sizeof (parent)); /* do the clone */ - if (ZFS_IS_VOLUME(zhp)) { - zc.zc_objset_type = DMU_OST_ZVOL; - type = ZFS_TYPE_VOLUME; - } else { - zc.zc_objset_type = DMU_OST_ZFS; - type = ZFS_TYPE_FILESYSTEM; - } if (props) { + zfs_type_t type; + if (ZFS_IS_VOLUME(zhp)) { + type = ZFS_TYPE_VOLUME; + } else { + type = ZFS_TYPE_FILESYSTEM; + } if ((props = zfs_valid_proplist(hdl, type, props, zoned, zhp, errbuf)) == NULL) return (-1); - - if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) { - nvlist_free(props); - return (-1); - } - - nvlist_free(props); } - (void) strlcpy(zc.zc_name, target, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_value, zhp->zfs_name, sizeof (zc.zc_value)); - ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_CREATE, &zc); - - zcmd_free_nvlists(&zc); + ret = lzc_clone(target, zhp->zfs_name, props); + nvlist_free(props); if (ret != 0) { switch (errno) { @@ -3411,74 +3370,134 @@ zfs_promote(zfs_handle_t *zhp) return (ret); } +typedef struct snapdata { + nvlist_t *sd_nvl; + const char *sd_snapname; +} snapdata_t; + +static int +zfs_snapshot_cb(zfs_handle_t *zhp, void *arg) +{ + snapdata_t *sd = arg; + char name[ZFS_MAXNAMELEN]; + int rv = 0; + + (void) snprintf(name, sizeof (name), + "%s@%s", zfs_get_name(zhp), sd->sd_snapname); + + fnvlist_add_boolean(sd->sd_nvl, name); + + rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd); + zfs_close(zhp); + return (rv); +} + /* - * Takes a snapshot of the given dataset. + * Creates snapshots. The keys in the snaps nvlist are the snapshots to be + * created. */ int -zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive, - nvlist_t *props) +zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props) { - const char *delim; - char parent[ZFS_MAXNAMELEN]; - zfs_handle_t *zhp; - zfs_cmd_t zc = { 0 }; int ret; char errbuf[1024]; + nvpair_t *elem; + nvlist_t *errors; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot snapshot '%s'"), path); + "cannot create snapshots ")); - /* validate the target name */ - if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE)) - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + elem = NULL; + while ((elem = nvlist_next_nvpair(snaps, elem)) != NULL) { + const char *snapname = nvpair_name(elem); - if (props) { - if ((props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT, - props, B_FALSE, NULL, errbuf)) == NULL) - return (-1); + /* validate the target name */ + if (!zfs_validate_name(hdl, snapname, ZFS_TYPE_SNAPSHOT, + B_TRUE)) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot create snapshot '%s'"), snapname); + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + } + } - if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) { - nvlist_free(props); - return (-1); + if (props != NULL && + (props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT, + props, B_FALSE, NULL, errbuf)) == NULL) { + return (-1); + } + + ret = lzc_snapshot(snaps, props, &errors); + + if (ret != 0) { + boolean_t printed = B_FALSE; + for (elem = nvlist_next_nvpair(errors, NULL); + elem != NULL; + elem = nvlist_next_nvpair(errors, elem)) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot create snapshot '%s'"), nvpair_name(elem)); + (void) zfs_standard_error(hdl, + fnvpair_value_int32(elem), errbuf); + printed = B_TRUE; } + if (!printed) { + switch (ret) { + case EXDEV: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "multiple snapshots of same " + "fs not allowed")); + (void) zfs_error(hdl, EZFS_EXISTS, errbuf); - nvlist_free(props); + break; + default: + (void) zfs_standard_error(hdl, ret, errbuf); + } + } } - /* make sure the parent exists and is of the appropriate type */ - delim = strchr(path, '@'); - (void) strncpy(parent, path, delim - path); - parent[delim - path] = '\0'; + nvlist_free(props); + nvlist_free(errors); + return (ret); +} + +int +zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive, + nvlist_t *props) +{ + int ret; + snapdata_t sd = { 0 }; + char fsname[ZFS_MAXNAMELEN]; + char *cp; + zfs_handle_t *zhp; + char errbuf[1024]; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot snapshot %s"), path); + + if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE)) + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + + (void) strlcpy(fsname, path, sizeof (fsname)); + cp = strchr(fsname, '@'); + *cp = '\0'; + sd.sd_snapname = cp + 1; - if ((zhp = zfs_open(hdl, parent, ZFS_TYPE_FILESYSTEM | + if ((zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) { - zcmd_free_nvlists(&zc); return (-1); } - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_value, delim+1, sizeof (zc.zc_value)); - if (ZFS_IS_VOLUME(zhp)) - zc.zc_objset_type = DMU_OST_ZVOL; - else - zc.zc_objset_type = DMU_OST_ZFS; - zc.zc_cookie = recursive; - ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SNAPSHOT, &zc); - - zcmd_free_nvlists(&zc); - - /* - * if it was recursive, the one that actually failed will be in - * zc.zc_name. - */ - if (ret != 0) { - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value); - (void) zfs_standard_error(hdl, errno, errbuf); + verify(nvlist_alloc(&sd.sd_nvl, NV_UNIQUE_NAME, 0) == 0); + if (recursive) { + (void) zfs_snapshot_cb(zfs_handle_dup(zhp), &sd); + } else { + fnvlist_add_boolean(sd.sd_nvl, path); } + ret = zfs_snapshot_nvl(hdl, sd.sd_nvl, props); + nvlist_free(sd.sd_nvl); zfs_close(zhp); - return (ret); } @@ -3506,17 +3525,13 @@ rollback_destroy(zfs_handle_t *zhp, void *data) zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT && zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > cbp->cb_create) { - char *logstr; cbp->cb_dependent = B_TRUE; cbp->cb_error |= zfs_iter_dependents(zhp, B_FALSE, rollback_destroy, cbp); cbp->cb_dependent = B_FALSE; - logstr = zhp->zfs_hdl->libzfs_log_str; - zhp->zfs_hdl->libzfs_log_str = NULL; cbp->cb_error |= zfs_destroy(zhp, B_FALSE); - zhp->zfs_hdl->libzfs_log_str = logstr; } } else { /* We must destroy this clone; first unmount it */ @@ -4120,7 +4135,7 @@ zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type, zc.zc_nvlist_dst_size = sizeof (buf); if (zfs_ioctl(hdl, ZFS_IOC_USERSPACE_MANY, &zc) != 0) { - char errbuf[ZFS_MAXNAMELEN + 32]; + char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, @@ -4142,37 +4157,94 @@ zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type, return (0); } +struct holdarg { + nvlist_t *nvl; + const char *snapname; + const char *tag; + boolean_t recursive; +}; + +static int +zfs_hold_one(zfs_handle_t *zhp, void *arg) +{ + struct holdarg *ha = arg; + zfs_handle_t *szhp; + char name[ZFS_MAXNAMELEN]; + int rv = 0; + + (void) snprintf(name, sizeof (name), + "%s@%s", zhp->zfs_name, ha->snapname); + + szhp = make_dataset_handle(zhp->zfs_hdl, name); + if (szhp) { + fnvlist_add_string(ha->nvl, name, ha->tag); + zfs_close(szhp); + } + + if (ha->recursive) + rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha); + zfs_close(zhp); + return (rv); +} + int zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, - boolean_t recursive, boolean_t temphold, boolean_t enoent_ok, - int cleanup_fd, uint64_t dsobj, uint64_t createtxg) + boolean_t recursive, boolean_t enoent_ok, int cleanup_fd) { - zfs_cmd_t zc = { 0 }; + int ret; + struct holdarg ha; + nvlist_t *errors; libzfs_handle_t *hdl = zhp->zfs_hdl; + char errbuf[1024]; + nvpair_t *elem; - ASSERT(!recursive || dsobj == 0); + ha.nvl = fnvlist_alloc(); + ha.snapname = snapname; + ha.tag = tag; + ha.recursive = recursive; + (void) zfs_hold_one(zfs_handle_dup(zhp), &ha); - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); - if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string)) - >= sizeof (zc.zc_string)) - return (zfs_error(hdl, EZFS_TAGTOOLONG, tag)); - zc.zc_cookie = recursive; - zc.zc_temphold = temphold; - zc.zc_cleanup_fd = cleanup_fd; - zc.zc_sendobj = dsobj; - zc.zc_createtxg = createtxg; - - if (zfs_ioctl(hdl, ZFS_IOC_HOLD, &zc) != 0) { - char errbuf[ZFS_MAXNAMELEN+32]; + if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) { + fnvlist_free(ha.nvl); + ret = ENOENT; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot hold snapshot '%s@%s'"), + zhp->zfs_name, snapname); + (void) zfs_standard_error(hdl, ret, errbuf); + return (ret); + } - /* - * if it was recursive, the one that actually failed will be in - * zc.zc_name. - */ - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot hold '%s@%s'"), zc.zc_name, snapname); - switch (errno) { + ret = lzc_hold(ha.nvl, cleanup_fd, &errors); + fnvlist_free(ha.nvl); + + if (ret == 0) + return (0); + + if (nvlist_next_nvpair(errors, NULL) == NULL) { + /* no hold-specific errors */ + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot hold")); + switch (ret) { + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded")); + (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + case EINVAL: + (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; + default: + (void) zfs_standard_error(hdl, ret, errbuf); + } + } + + for (elem = nvlist_next_nvpair(errors, NULL); + elem != NULL; + elem = nvlist_next_nvpair(errors, elem)) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot hold snapshot '%s'"), nvpair_name(elem)); + switch (fnvpair_value_int32(elem)) { case E2BIG: /* * Temporary tags wind up having the ds object id @@ -4180,66 +4252,131 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, * above, it's still possible for the tag to wind * up being slightly too long. */ - return (zfs_error(hdl, EZFS_TAGTOOLONG, errbuf)); - case ENOTSUP: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool must be upgraded")); - return (zfs_error(hdl, EZFS_BADVERSION, errbuf)); + (void) zfs_error(hdl, EZFS_TAGTOOLONG, errbuf); + break; case EINVAL: - return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; case EEXIST: - return (zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf)); + (void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf); + break; case ENOENT: if (enoent_ok) return (ENOENT); /* FALLTHROUGH */ default: - return (zfs_standard_error_fmt(hdl, errno, errbuf)); + (void) zfs_standard_error(hdl, + fnvpair_value_int32(elem), errbuf); } } - return (0); + fnvlist_free(errors); + return (ret); +} + +struct releasearg { + nvlist_t *nvl; + const char *snapname; + const char *tag; + boolean_t recursive; +}; + +static int +zfs_release_one(zfs_handle_t *zhp, void *arg) +{ + struct holdarg *ha = arg; + zfs_handle_t *szhp; + char name[ZFS_MAXNAMELEN]; + int rv = 0; + + (void) snprintf(name, sizeof (name), + "%s@%s", zhp->zfs_name, ha->snapname); + + szhp = make_dataset_handle(zhp->zfs_hdl, name); + if (szhp) { + nvlist_t *holds = fnvlist_alloc(); + fnvlist_add_boolean(holds, ha->tag); + fnvlist_add_nvlist(ha->nvl, name, holds); + zfs_close(szhp); + } + + if (ha->recursive) + rv = zfs_iter_filesystems(zhp, zfs_release_one, ha); + zfs_close(zhp); + return (rv); } int zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, boolean_t recursive) { - zfs_cmd_t zc = { 0 }; + int ret; + struct holdarg ha; + nvlist_t *errors; + nvpair_t *elem; libzfs_handle_t *hdl = zhp->zfs_hdl; + char errbuf[1024]; - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); - if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string)) - >= sizeof (zc.zc_string)) - return (zfs_error(hdl, EZFS_TAGTOOLONG, tag)); - zc.zc_cookie = recursive; + ha.nvl = fnvlist_alloc(); + ha.snapname = snapname; + ha.tag = tag; + ha.recursive = recursive; + (void) zfs_release_one(zfs_handle_dup(zhp), &ha); + + if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) { + fnvlist_free(ha.nvl); + ret = ENOENT; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot release hold from snapshot '%s@%s'"), + zhp->zfs_name, snapname); + (void) zfs_standard_error(hdl, ret, errbuf); + return (ret); + } - if (zfs_ioctl(hdl, ZFS_IOC_RELEASE, &zc) != 0) { - char errbuf[ZFS_MAXNAMELEN+32]; + ret = lzc_release(ha.nvl, &errors); + fnvlist_free(ha.nvl); - /* - * if it was recursive, the one that actually failed will be in - * zc.zc_name. - */ + if (ret == 0) + return (0); + + if (nvlist_next_nvpair(errors, NULL) == NULL) { + /* no hold-specific errors */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot release '%s' from '%s@%s'"), tag, zc.zc_name, - snapname); + "cannot release")); switch (errno) { - case ESRCH: - return (zfs_error(hdl, EZFS_REFTAG_RELE, errbuf)); case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded")); - return (zfs_error(hdl, EZFS_BADVERSION, errbuf)); + (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + default: + (void) zfs_standard_error_fmt(hdl, errno, errbuf); + } + } + + for (elem = nvlist_next_nvpair(errors, NULL); + elem != NULL; + elem = nvlist_next_nvpair(errors, elem)) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot release hold from snapshot '%s'"), + nvpair_name(elem)); + switch (fnvpair_value_int32(elem)) { + case ESRCH: + (void) zfs_error(hdl, EZFS_REFTAG_RELE, errbuf); + break; case EINVAL: - return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; default: - return (zfs_standard_error_fmt(hdl, errno, errbuf)); + (void) zfs_standard_error_fmt(hdl, + fnvpair_value_int32(elem), errbuf); } } - return (0); + fnvlist_free(errors); + return (ret); } int @@ -4250,7 +4387,7 @@ zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl) int nvsz = 2048; void *nvbuf; int err = 0; - char errbuf[ZFS_MAXNAMELEN+32]; + char errbuf[1024]; assert(zhp->zfs_type == ZFS_TYPE_VOLUME || zhp->zfs_type == ZFS_TYPE_FILESYSTEM); @@ -4315,7 +4452,7 @@ zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl) zfs_cmd_t zc = { 0 }; libzfs_handle_t *hdl = zhp->zfs_hdl; char *nvbuf; - char errbuf[ZFS_MAXNAMELEN+32]; + char errbuf[1024]; size_t nvsz; int err; @@ -4366,38 +4503,18 @@ zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl) int zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl) { - zfs_cmd_t zc = { 0 }; - libzfs_handle_t *hdl = zhp->zfs_hdl; - int nvsz = 2048; - void *nvbuf; - int err = 0; - char errbuf[ZFS_MAXNAMELEN+32]; - - assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); - -tryagain: - - nvbuf = malloc(nvsz); - if (nvbuf == NULL) { - err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno))); - goto out; - } + int err; + char errbuf[1024]; - zc.zc_nvlist_dst_size = nvsz; - zc.zc_nvlist_dst = (uintptr_t)nvbuf; + err = lzc_get_holds(zhp->zfs_name, nvl); - (void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN); + if (err != 0) { + libzfs_handle_t *hdl = zhp->zfs_hdl; - if (zfs_ioctl(hdl, ZFS_IOC_GET_HOLDS, &zc) != 0) { (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"), - zc.zc_name); - switch (errno) { - case ENOMEM: - free(nvbuf); - nvsz = zc.zc_nvlist_dst_size; - goto tryagain; - + zhp->zfs_name); + switch (err) { case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded")); @@ -4413,19 +4530,8 @@ tryagain: err = zfs_standard_error_fmt(hdl, errno, errbuf); break; } - } else { - /* success */ - int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0); - if (rc) { - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"), - zc.zc_name); - err = zfs_standard_error_fmt(hdl, rc, errbuf); - } } - free(nvbuf); -out: return (err); } diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h index be07187f9..11a57a98a 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h @@ -23,12 +23,12 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Pawel Jakub Dawidek . * All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2013 Martin Matuska . All rights reserved. */ -#ifndef _LIBFS_IMPL_H -#define _LIBFS_IMPL_H +#ifndef _LIBZFS_IMPL_H +#define _LIBZFS_IMPL_H #include #include @@ -39,8 +39,8 @@ #include #include #include - -#include "zfs_ioctl_compat.h" +#include +#include #ifdef __cplusplus extern "C" { @@ -70,7 +70,6 @@ struct libzfs_handle { int libzfs_desc_active; char libzfs_action[1024]; char libzfs_desc[1024]; - char *libzfs_log_str; int libzfs_printerr; int libzfs_storeerr; /* stuff error messages into buffer */ void *libzfs_sharehdl; /* libshare handle */ @@ -215,62 +214,8 @@ extern int zfs_unshare_proto(zfs_handle_t *, extern void libzfs_fru_clear(libzfs_handle_t *, boolean_t); -#ifndef sun -static int zfs_kernel_version = 0; -static int zfs_ioctl_version = 0; - -/* - * This is FreeBSD version of ioctl, because Solaris' ioctl() updates - * zc_nvlist_dst_size even if an error is returned, on FreeBSD if an - * error is returned zc_nvlist_dst_size won't be updated. - */ -static __inline int -zcmd_ioctl(int fd, unsigned long cmd, zfs_cmd_t *zc) -{ - size_t oldsize, zfs_kernel_version_size, zfs_ioctl_version_size; - int version, ret, cflag = ZFS_CMD_COMPAT_NONE; - - zfs_ioctl_version_size = sizeof(zfs_ioctl_version); - if (zfs_ioctl_version == 0) { - sysctlbyname("vfs.zfs.version.ioctl", &zfs_ioctl_version, - &zfs_ioctl_version_size, NULL, 0); - } - - /* - * If vfs.zfs.version.ioctl is not defined, assume we have v28 - * compatible binaries and use vfs.zfs.version.spa to test for v15 - */ - if (zfs_ioctl_version < ZFS_IOCVER_DEADMAN) { - cflag = ZFS_CMD_COMPAT_V28; - zfs_kernel_version_size = sizeof(zfs_kernel_version); - - if (zfs_kernel_version == 0) { - sysctlbyname("vfs.zfs.version.spa", - &zfs_kernel_version, - &zfs_kernel_version_size, NULL, 0); - } - - if (zfs_kernel_version == SPA_VERSION_15 || - zfs_kernel_version == SPA_VERSION_14 || - zfs_kernel_version == SPA_VERSION_13) - cflag = ZFS_CMD_COMPAT_V15; - } - - oldsize = zc->zc_nvlist_dst_size; - ret = zcmd_ioctl_compat(fd, cmd, zc, cflag); - - if (ret == 0 && oldsize < zc->zc_nvlist_dst_size) { - ret = -1; - errno = ENOMEM; - } - - return (ret); -} -#define ioctl(fd, cmd, zc) zcmd_ioctl((fd), (cmd), (zc)) -#endif /* !sun */ - #ifdef __cplusplus } #endif -#endif /* _LIBFS_IMPL_H */ +#endif /* _LIBZFS_IMPL_H */ diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_iter.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_iter.c index a4db909e1..278bfd4cf 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_iter.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_iter.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2010 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012 Pawel Jakub Dawidek . * All rights reserved. */ @@ -308,12 +308,11 @@ int zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig, zfs_iter_f func, void *arg) { - char buf[ZFS_MAXNAMELEN]; - char *comma_separated, *cp; + char *buf, *comma_separated, *cp; int err = 0; int ret = 0; - (void) strlcpy(buf, spec_orig, sizeof (buf)); + buf = zfs_strdup(fs_zhp->zfs_hdl, spec_orig); cp = buf; while ((comma_separated = strsep(&cp, ",")) != NULL) { @@ -371,6 +370,7 @@ zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig, } } + free(buf); return (ret); } diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c index 03bc3e658..9b04e08c5 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -1237,7 +1238,7 @@ create_failed: * datasets left in the pool. */ int -zpool_destroy(zpool_handle_t *zhp) +zpool_destroy(zpool_handle_t *zhp, const char *log_str) { zfs_cmd_t zc = { 0 }; zfs_handle_t *zfp = NULL; @@ -1249,6 +1250,7 @@ zpool_destroy(zpool_handle_t *zhp) return (-1); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + zc.zc_history = (uint64_t)(uintptr_t)log_str; if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) { (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, @@ -1403,8 +1405,9 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot) * Exports the pool from the system. The caller must ensure that there are no * mounted datasets in the pool. */ -int -zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce) +static int +zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce, + const char *log_str) { zfs_cmd_t zc = { 0 }; char msg[1024]; @@ -1415,6 +1418,7 @@ zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce) (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_cookie = force; zc.zc_guid = hardforce; + zc.zc_history = (uint64_t)(uintptr_t)log_str; if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) { switch (errno) { @@ -1436,15 +1440,15 @@ zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce) } int -zpool_export(zpool_handle_t *zhp, boolean_t force) +zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str) { - return (zpool_export_common(zhp, force, B_FALSE)); + return (zpool_export_common(zhp, force, B_FALSE, log_str)); } int -zpool_export_force(zpool_handle_t *zhp) +zpool_export_force(zpool_handle_t *zhp, const char *log_str) { - return (zpool_export_common(zhp, B_TRUE, B_TRUE)); + return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str)); } static void @@ -3632,40 +3636,30 @@ zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version) } void -zpool_set_history_str(const char *subcommand, int argc, char **argv, - char *history_str) +zfs_save_arguments(int argc, char **argv, char *string, int len) { - int i; - - (void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN); - for (i = 1; i < argc; i++) { - if (strlen(history_str) + 1 + strlen(argv[i]) > - HIS_MAX_RECORD_LEN) - break; - (void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN); - (void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN); + (void) strlcpy(string, basename(argv[0]), len); + for (int i = 1; i < argc; i++) { + (void) strlcat(string, " ", len); + (void) strlcat(string, argv[i], len); } } -/* - * Stage command history for logging. - */ int -zpool_stage_history(libzfs_handle_t *hdl, const char *history_str) +zpool_log_history(libzfs_handle_t *hdl, const char *message) { - if (history_str == NULL) - return (EINVAL); - - if (strlen(history_str) > HIS_MAX_RECORD_LEN) - return (EINVAL); - - if (hdl->libzfs_log_str != NULL) - free(hdl->libzfs_log_str); - - if ((hdl->libzfs_log_str = strdup(history_str)) == NULL) - return (no_memory(hdl)); - - return (0); + zfs_cmd_t zc = { 0 }; + nvlist_t *args; + int err; + + args = fnvlist_alloc(); + fnvlist_add_string(args, "message", message); + err = zcmd_write_src_nvlist(hdl, &zc, args); + if (err == 0) + err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc); + nvlist_free(args); + zcmd_free_nvlists(&zc); + return (err); } /* diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c index 6bb16ac73..595b1ad37 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c @@ -53,6 +53,10 @@ #include #include +#ifdef __FreeBSD__ +extern int zfs_ioctl_version; +#endif + /* in libzfs_dataset.c */ extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *); /* We need to use something for ENODATA. */ @@ -978,9 +982,7 @@ hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd) */ if (pzhp) { error = zfs_hold(pzhp, thissnap, sdd->holdtag, - B_FALSE, B_TRUE, B_TRUE, sdd->cleanup_fd, - zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID), - zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG)); + B_FALSE, B_TRUE, sdd->cleanup_fd); zfs_close(pzhp); } @@ -1719,12 +1721,11 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, err = ENOENT; } - if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) { + if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) { seq++; - (void) strncpy(newname, name, baselen); - (void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen, - "recv-%u-%u", getpid(), seq); + (void) snprintf(newname, ZFS_MAXNAMELEN, "%.*srecv-%u-%u", + baselen, name, getpid(), seq); (void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value)); if (flags->verbose) { @@ -2649,9 +2650,17 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, /* * Determine name of destination snapshot, store in zc_value. */ - (void) strcpy(zc.zc_top_ds, tosnap); (void) strcpy(zc.zc_value, tosnap); (void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value)); +#ifdef __FreeBSD__ + if (zfs_ioctl_version == ZFS_IOCVER_UNDEF) + zfs_ioctl_version = get_zfs_ioctl_version(); + /* + * For forward compatibility hide tosnap in zc_value + */ + if (zfs_ioctl_version < ZFS_IOCVER_LZC) + (void) strcpy(zc.zc_value + strlen(zc.zc_value) + 1, tosnap); +#endif free(cp); if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) { zcmd_free_nvlists(&zc); diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c index 0db2dfd1b..7b2f1c7e9 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_util.c @@ -48,6 +48,7 @@ #include #include +#include #include "libzfs_impl.h" #include "zfs_prop.h" @@ -657,6 +658,14 @@ libzfs_init(void) hdl->libzfs_sharetab = fopen(ZFS_EXPORTS_PATH, "r"); + if (libzfs_core_init() != 0) { + (void) close(hdl->libzfs_fd); + (void) fclose(hdl->libzfs_mnttab); + (void) fclose(hdl->libzfs_sharetab); + free(hdl); + return (NULL); + } + zfs_prop_init(); zpool_prop_init(); zpool_feature_init(); @@ -674,14 +683,13 @@ libzfs_fini(libzfs_handle_t *hdl) if (hdl->libzfs_sharetab) (void) fclose(hdl->libzfs_sharetab); zfs_uninit_libshare(hdl); - if (hdl->libzfs_log_str) - (void) free(hdl->libzfs_log_str); zpool_free_handles(hdl); #ifdef sun libzfs_fru_clear(hdl, B_TRUE); #endif namespace_clear(hdl); libzfs_mnttab_fini(hdl); + libzfs_core_fini(); free(hdl); } @@ -855,19 +863,9 @@ zcmd_read_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t **nvlp) } int -zfs_ioctl(libzfs_handle_t *hdl, unsigned long request, zfs_cmd_t *zc) +zfs_ioctl(libzfs_handle_t *hdl, int request, zfs_cmd_t *zc) { - int error; - - zc->zc_history = (uint64_t)(uintptr_t)hdl->libzfs_log_str; - error = ioctl(hdl->libzfs_fd, request, zc); - if (hdl->libzfs_log_str) { - free(hdl->libzfs_log_str); - hdl->libzfs_log_str = NULL; - } - zc->zc_history = 0; - - return (error); + return (ioctl(hdl->libzfs_fd, request, zc)); } /* diff --git a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c new file mode 100644 index 000000000..ab23aa1ad --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c @@ -0,0 +1,618 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +/* + * LibZFS_Core (lzc) is intended to replace most functionality in libzfs. + * It has the following characteristics: + * + * - Thread Safe. libzfs_core is accessible concurrently from multiple + * threads. This is accomplished primarily by avoiding global data + * (e.g. caching). Since it's thread-safe, there is no reason for a + * process to have multiple libzfs "instances". Therefore, we store + * our few pieces of data (e.g. the file descriptor) in global + * variables. The fd is reference-counted so that the libzfs_core + * library can be "initialized" multiple times (e.g. by different + * consumers within the same process). + * + * - Committed Interface. The libzfs_core interface will be committed, + * therefore consumers can compile against it and be confident that + * their code will continue to work on future releases of this code. + * Currently, the interface is Evolving (not Committed), but we intend + * to commit to it once it is more complete and we determine that it + * meets the needs of all consumers. + * + * - Programatic Error Handling. libzfs_core communicates errors with + * defined error numbers, and doesn't print anything to stdout/stderr. + * + * - Thin Layer. libzfs_core is a thin layer, marshaling arguments + * to/from the kernel ioctls. There is generally a 1:1 correspondence + * between libzfs_core functions and ioctls to /dev/zfs. + * + * - Clear Atomicity. Because libzfs_core functions are generally 1:1 + * with kernel ioctls, and kernel ioctls are general atomic, each + * libzfs_core function is atomic. For example, creating multiple + * snapshots with a single call to lzc_snapshot() is atomic -- it + * can't fail with only some of the requested snapshots created, even + * in the event of power loss or system crash. + * + * - Continued libzfs Support. Some higher-level operations (e.g. + * support for "zfs send -R") are too complicated to fit the scope of + * libzfs_core. This functionality will continue to live in libzfs. + * Where appropriate, libzfs will use the underlying atomic operations + * of libzfs_core. For example, libzfs may implement "zfs send -R | + * zfs receive" by using individual "send one snapshot", rename, + * destroy, and "receive one snapshot" operations in libzfs_core. + * /sbin/zfs and /zbin/zpool will link with both libzfs and + * libzfs_core. Other consumers should aim to use only libzfs_core, + * since that will be the supported, stable interface going forwards. + */ + +#define _IN_LIBZFS_CORE_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "libzfs_core_compat.h" +#include "libzfs_compat.h" + +#ifdef __FreeBSD__ +extern int zfs_ioctl_version; +#endif + +static int g_fd; +static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER; +static int g_refcount; + +int +libzfs_core_init(void) +{ + (void) pthread_mutex_lock(&g_lock); + if (g_refcount == 0) { + g_fd = open("/dev/zfs", O_RDWR); + if (g_fd < 0) { + (void) pthread_mutex_unlock(&g_lock); + return (errno); + } + } + g_refcount++; + (void) pthread_mutex_unlock(&g_lock); + + return (0); +} + +void +libzfs_core_fini(void) +{ + (void) pthread_mutex_lock(&g_lock); + ASSERT3S(g_refcount, >, 0); + g_refcount--; + if (g_refcount == 0) + (void) close(g_fd); + (void) pthread_mutex_unlock(&g_lock); +} + +static int +lzc_ioctl(zfs_ioc_t ioc, const char *name, + nvlist_t *source, nvlist_t **resultp) +{ + zfs_cmd_t zc = { 0 }; + int error = 0; + char *packed; +#ifdef __FreeBSD__ + nvlist_t *oldsource; +#endif + size_t size; + + ASSERT3S(g_refcount, >, 0); + + (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); + +#ifdef __FreeBSD__ + if (zfs_ioctl_version == ZFS_IOCVER_UNDEF) + zfs_ioctl_version = get_zfs_ioctl_version(); + + if (zfs_ioctl_version < ZFS_IOCVER_LZC) { + oldsource = source; + error = lzc_compat_pre(&zc, &ioc, &source); + if (error) + return (error); + } +#endif + + packed = fnvlist_pack(source, &size); + zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; + zc.zc_nvlist_src_size = size; + + if (resultp != NULL) { + *resultp = NULL; + zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024); + zc.zc_nvlist_dst = (uint64_t)(uintptr_t) + malloc(zc.zc_nvlist_dst_size); +#ifdef illumos + if (zc.zc_nvlist_dst == NULL) { +#else + if (zc.zc_nvlist_dst == 0) { +#endif + error = ENOMEM; + goto out; + } + } + + while (ioctl(g_fd, ioc, &zc) != 0) { + if (errno == ENOMEM && resultp != NULL) { + free((void *)(uintptr_t)zc.zc_nvlist_dst); + zc.zc_nvlist_dst_size *= 2; + zc.zc_nvlist_dst = (uint64_t)(uintptr_t) + malloc(zc.zc_nvlist_dst_size); +#ifdef illumos + if (zc.zc_nvlist_dst == NULL) { +#else + if (zc.zc_nvlist_dst == 0) { +#endif + error = ENOMEM; + goto out; + } + } else { + error = errno; + break; + } + } + +#ifdef __FreeBSD__ + if (zfs_ioctl_version < ZFS_IOCVER_LZC) + lzc_compat_post(&zc, ioc); +#endif + if (zc.zc_nvlist_dst_filled) { + *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, + zc.zc_nvlist_dst_size); + } +#ifdef __FreeBSD__ + if (zfs_ioctl_version < ZFS_IOCVER_LZC) + lzc_compat_outnvl(&zc, ioc, resultp); +#endif +out: +#ifdef __FreeBSD__ + if (zfs_ioctl_version < ZFS_IOCVER_LZC) { + if (source != oldsource) + nvlist_free(source); + source = oldsource; + } +#endif + fnvlist_pack_free(packed, size); + free((void *)(uintptr_t)zc.zc_nvlist_dst); + return (error); +} + +int +lzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props) +{ + int error; + nvlist_t *args = fnvlist_alloc(); + fnvlist_add_int32(args, "type", type); + if (props != NULL) + fnvlist_add_nvlist(args, "props", props); + error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); + nvlist_free(args); + return (error); +} + +int +lzc_clone(const char *fsname, const char *origin, + nvlist_t *props) +{ + int error; + nvlist_t *args = fnvlist_alloc(); + fnvlist_add_string(args, "origin", origin); + if (props != NULL) + fnvlist_add_nvlist(args, "props", props); + error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); + nvlist_free(args); + return (error); +} + +/* + * Creates snapshots. + * + * The keys in the snaps nvlist are the snapshots to be created. + * They must all be in the same pool. + * + * The props nvlist is properties to set. Currently only user properties + * are supported. { user:prop_name -> string value } + * + * The returned results nvlist will have an entry for each snapshot that failed. + * The value will be the (int32) error code. + * + * The return value will be 0 if all snapshots were created, otherwise it will + * be the errno of a (unspecified) snapshot that failed. + */ +int +lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) +{ + nvpair_t *elem; + nvlist_t *args; + int error; + char pool[MAXNAMELEN]; + + *errlist = NULL; + + /* determine the pool name */ + elem = nvlist_next_nvpair(snaps, NULL); + if (elem == NULL) + return (0); + (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); + pool[strcspn(pool, "/@")] = '\0'; + + args = fnvlist_alloc(); + fnvlist_add_nvlist(args, "snaps", snaps); + if (props != NULL) + fnvlist_add_nvlist(args, "props", props); + + error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist); + nvlist_free(args); + + return (error); +} + +/* + * Destroys snapshots. + * + * The keys in the snaps nvlist are the snapshots to be destroyed. + * They must all be in the same pool. + * + * Snapshots that do not exist will be silently ignored. + * + * If 'defer' is not set, and a snapshot has user holds or clones, the + * destroy operation will fail and none of the snapshots will be + * destroyed. + * + * If 'defer' is set, and a snapshot has user holds or clones, it will be + * marked for deferred destruction, and will be destroyed when the last hold + * or clone is removed/destroyed. + * + * The return value will be 0 if all snapshots were destroyed (or marked for + * later destruction if 'defer' is set) or didn't exist to begin with. + * + * Otherwise the return value will be the errno of a (unspecified) snapshot + * that failed, no snapshots will be destroyed, and the errlist will have an + * entry for each snapshot that failed. The value in the errlist will be + * the (int32) error code. + */ +int +lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) +{ + nvpair_t *elem; + nvlist_t *args; + int error; + char pool[MAXNAMELEN]; + + /* determine the pool name */ + elem = nvlist_next_nvpair(snaps, NULL); + if (elem == NULL) + return (0); + (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); + pool[strcspn(pool, "/@")] = '\0'; + + args = fnvlist_alloc(); + fnvlist_add_nvlist(args, "snaps", snaps); + if (defer) + fnvlist_add_boolean(args, "defer"); + + error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist); + nvlist_free(args); + + return (error); + +} + +int +lzc_snaprange_space(const char *firstsnap, const char *lastsnap, + uint64_t *usedp) +{ + nvlist_t *args; + nvlist_t *result; + int err; + char fs[MAXNAMELEN]; + char *atp; + + /* determine the fs name */ + (void) strlcpy(fs, firstsnap, sizeof (fs)); + atp = strchr(fs, '@'); + if (atp == NULL) + return (EINVAL); + *atp = '\0'; + + args = fnvlist_alloc(); + fnvlist_add_string(args, "firstsnap", firstsnap); + + err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result); + nvlist_free(args); + if (err == 0) + *usedp = fnvlist_lookup_uint64(result, "used"); + fnvlist_free(result); + + return (err); +} + +boolean_t +lzc_exists(const char *dataset) +{ + /* + * The objset_stats ioctl is still legacy, so we need to construct our + * own zfs_cmd_t rather than using zfsc_ioctl(). + */ + zfs_cmd_t zc = { 0 }; + + (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); + return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0); +} + +/* + * Create "user holds" on snapshots. If there is a hold on a snapshot, + * the snapshot can not be destroyed. (However, it can be marked for deletion + * by lzc_destroy_snaps(defer=B_TRUE).) + * + * The keys in the nvlist are snapshot names. + * The snapshots must all be in the same pool. + * The value is the name of the hold (string type). + * + * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL). + * In this case, when the cleanup_fd is closed (including on process + * termination), the holds will be released. If the system is shut down + * uncleanly, the holds will be released when the pool is next opened + * or imported. + * + * The return value will be 0 if all holds were created. Otherwise the return + * value will be the errno of a (unspecified) hold that failed, no holds will + * be created, and the errlist will have an entry for each hold that + * failed (name = snapshot). The value in the errlist will be the error + * code (int32). + */ +int +lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) +{ + char pool[MAXNAMELEN]; + nvlist_t *args; + nvpair_t *elem; + int error; + + /* determine the pool name */ + elem = nvlist_next_nvpair(holds, NULL); + if (elem == NULL) + return (0); + (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); + pool[strcspn(pool, "/@")] = '\0'; + + args = fnvlist_alloc(); + fnvlist_add_nvlist(args, "holds", holds); + if (cleanup_fd != -1) + fnvlist_add_int32(args, "cleanup_fd", cleanup_fd); + + error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist); + nvlist_free(args); + return (error); +} + +/* + * Release "user holds" on snapshots. If the snapshot has been marked for + * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have + * any clones, and all the user holds are removed, then the snapshot will be + * destroyed. + * + * The keys in the nvlist are snapshot names. + * The snapshots must all be in the same pool. + * The value is a nvlist whose keys are the holds to remove. + * + * The return value will be 0 if all holds were removed. + * Otherwise the return value will be the errno of a (unspecified) release + * that failed, no holds will be released, and the errlist will have an + * entry for each snapshot that has failed releases (name = snapshot). + * The value in the errlist will be the error code (int32) of a failed release. + */ +int +lzc_release(nvlist_t *holds, nvlist_t **errlist) +{ + char pool[MAXNAMELEN]; + nvpair_t *elem; + + /* determine the pool name */ + elem = nvlist_next_nvpair(holds, NULL); + if (elem == NULL) + return (0); + (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); + pool[strcspn(pool, "/@")] = '\0'; + + return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist)); +} + +/* + * Retrieve list of user holds on the specified snapshot. + * + * On success, *holdsp will be set to a nvlist which the caller must free. + * The keys are the names of the holds, and the value is the creation time + * of the hold (uint64) in seconds since the epoch. + */ +int +lzc_get_holds(const char *snapname, nvlist_t **holdsp) +{ + int error; + nvlist_t *innvl = fnvlist_alloc(); + error = lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, innvl, holdsp); + fnvlist_free(innvl); + return (error); +} + +/* + * If fromsnap is NULL, a full (non-incremental) stream will be sent. + */ +int +lzc_send(const char *snapname, const char *fromsnap, int fd) +{ + nvlist_t *args; + int err; + + args = fnvlist_alloc(); + fnvlist_add_int32(args, "fd", fd); + if (fromsnap != NULL) + fnvlist_add_string(args, "fromsnap", fromsnap); + err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); + nvlist_free(args); + return (err); +} + +/* + * If fromsnap is NULL, a full (non-incremental) stream will be estimated. + */ +int +lzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep) +{ + nvlist_t *args; + nvlist_t *result; + int err; + + args = fnvlist_alloc(); + if (fromsnap != NULL) + fnvlist_add_string(args, "fromsnap", fromsnap); + err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result); + nvlist_free(args); + if (err == 0) + *spacep = fnvlist_lookup_uint64(result, "space"); + nvlist_free(result); + return (err); +} + +static int +recv_read(int fd, void *buf, int ilen) +{ + char *cp = buf; + int rv; + int len = ilen; + + do { + rv = read(fd, cp, len); + cp += rv; + len -= rv; + } while (rv > 0); + + if (rv < 0 || len != 0) + return (EIO); + + return (0); +} + +/* + * The simplest receive case: receive from the specified fd, creating the + * specified snapshot. Apply the specified properties a "received" properties + * (which can be overridden by locally-set properties). If the stream is a + * clone, its origin snapshot must be specified by 'origin'. The 'force' + * flag will cause the target filesystem to be rolled back or destroyed if + * necessary to receive. + * + * Return 0 on success or an errno on failure. + * + * Note: this interface does not work on dedup'd streams + * (those with DMU_BACKUP_FEATURE_DEDUP). + */ +int +lzc_receive(const char *snapname, nvlist_t *props, const char *origin, + boolean_t force, int fd) +{ + /* + * The receive ioctl is still legacy, so we need to construct our own + * zfs_cmd_t rather than using zfsc_ioctl(). + */ + zfs_cmd_t zc = { 0 }; + char *atp; + char *packed = NULL; + size_t size; + dmu_replay_record_t drr; + int error; + + ASSERT3S(g_refcount, >, 0); + + /* zc_name is name of containing filesystem */ + (void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name)); + atp = strchr(zc.zc_name, '@'); + if (atp == NULL) + return (EINVAL); + *atp = '\0'; + + /* if the fs does not exist, try its parent. */ + if (!lzc_exists(zc.zc_name)) { + char *slashp = strrchr(zc.zc_name, '/'); + if (slashp == NULL) + return (ENOENT); + *slashp = '\0'; + + } + + /* zc_value is full name of the snapshot to create */ + (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); + + if (props != NULL) { + /* zc_nvlist_src is props to set */ + packed = fnvlist_pack(props, &size); + zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; + zc.zc_nvlist_src_size = size; + } + + /* zc_string is name of clone origin (if DRR_FLAG_CLONE) */ + if (origin != NULL) + (void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string)); + + /* zc_begin_record is non-byteswapped BEGIN record */ + error = recv_read(fd, &drr, sizeof (drr)); + if (error != 0) + goto out; + zc.zc_begin_record = drr.drr_u.drr_begin; + + /* zc_cookie is fd to read from */ + zc.zc_cookie = fd; + + /* zc guid is force flag */ + zc.zc_guid = force; + + /* zc_cleanup_fd is unused */ + zc.zc_cleanup_fd = -1; + + error = ioctl(g_fd, ZFS_IOC_RECV, &zc); + if (error != 0) + error = errno; + +out: + if (packed != NULL) + fnvlist_pack_free(packed, size); + free((void*)(uintptr_t)zc.zc_nvlist_dst); + return (error); +} diff --git a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h new file mode 100644 index 000000000..b10098b37 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h @@ -0,0 +1,67 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Martin Matuska . All rights reserved. + */ + +#ifndef _LIBZFS_CORE_H +#define _LIBZFS_CORE_H + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int libzfs_core_init(void); +void libzfs_core_fini(void); + +int lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist); +int lzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props); +int lzc_clone(const char *fsname, const char *origin, nvlist_t *props); +int lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist); + +int lzc_snaprange_space(const char *firstsnap, const char *lastsnap, + uint64_t *usedp); + +int lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist); +int lzc_release(nvlist_t *holds, nvlist_t **errlist); +int lzc_get_holds(const char *snapname, nvlist_t **holdsp); + +int lzc_send(const char *snapname, const char *fromsnap, int fd); +int lzc_receive(const char *snapname, nvlist_t *props, const char *origin, + boolean_t force, int fd); +int lzc_send_space(const char *snapname, const char *fromsnap, + uint64_t *result); + +boolean_t lzc_exists(const char *dataset); + + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBZFS_CORE_H */ diff --git a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.c b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.c new file mode 100644 index 000000000..a3b872ee2 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.c @@ -0,0 +1,189 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2013 Martin Matuska . All rights reserved. + */ + +#include +#include +#include "libzfs_core_compat.h" + +extern int zfs_ioctl_version; + +int +lzc_compat_pre(zfs_cmd_t *zc, zfs_ioc_t *ioc, nvlist_t **source) +{ + nvlist_t *nvl = NULL; + nvpair_t *pair, *hpair; + char *buf, *val; + zfs_ioc_t vecnum; + uint32_t type32; + int32_t cleanup_fd; + int error = 0; + int pos; + + if (zfs_ioctl_version >= ZFS_IOCVER_LZC) + return (0); + + vecnum = *ioc; + + switch (vecnum) { + case ZFS_IOC_CREATE: + type32 = fnvlist_lookup_int32(*source, "type"); + zc->zc_objset_type = (uint64_t)type32; + nvlist_lookup_nvlist(*source, "props", &nvl); + *source = nvl; + break; + case ZFS_IOC_CLONE: + buf = fnvlist_lookup_string(*source, "origin"); + strlcpy(zc->zc_value, buf, MAXPATHLEN); + nvlist_lookup_nvlist(*source, "props", &nvl); + *ioc = ZFS_IOC_CREATE; + *source = nvl; + break; + case ZFS_IOC_SNAPSHOT: + nvl = fnvlist_lookup_nvlist(*source, "snaps"); + pair = nvlist_next_nvpair(nvl, NULL); + if (pair != NULL) { + buf = nvpair_name(pair); + pos = strcspn(buf, "@"); + strlcpy(zc->zc_name, buf, pos + 1); + strlcpy(zc->zc_value, buf + pos + 1, MAXPATHLEN); + } else + error = EINVAL; + /* old kernel cannot create multiple snapshots */ + if (!error && nvlist_next_nvpair(nvl, pair) != NULL) + error = EOPNOTSUPP; + nvlist_free(nvl); + nvl = NULL; + nvlist_lookup_nvlist(*source, "props", &nvl); + *source = nvl; + break; + case ZFS_IOC_SPACE_SNAPS: + buf = fnvlist_lookup_string(*source, "firstsnap"); + strlcpy(zc->zc_value, buf, MAXPATHLEN); + break; + case ZFS_IOC_DESTROY_SNAPS: + nvl = fnvlist_lookup_nvlist(*source, "snaps"); + pair = nvlist_next_nvpair(nvl, NULL); + if (pair != NULL) { + buf = nvpair_name(pair); + pos = strcspn(buf, "@"); + strlcpy(zc->zc_name, buf, pos + 1); + } else + error = EINVAL; + /* old kernel cannot atomically destroy multiple snaps */ + if (!error && nvlist_next_nvpair(nvl, pair) != NULL) + error = EOPNOTSUPP; + *source = nvl; + break; + case ZFS_IOC_HOLD: + nvl = fnvlist_lookup_nvlist(*source, "holds"); + pair = nvlist_next_nvpair(nvl, NULL); + if (pair != NULL) { + buf = nvpair_name(pair); + pos = strcspn(buf, "@"); + strlcpy(zc->zc_name, buf, pos + 1); + strlcpy(zc->zc_value, buf + pos + 1, MAXPATHLEN); + if (nvpair_value_string(pair, &val) == 0) + strlcpy(zc->zc_string, val, MAXNAMELEN); + else + error = EINVAL; + } else + error = EINVAL; + /* old kernel cannot atomically create multiple holds */ + if (!error && nvlist_next_nvpair(nvl, pair) != NULL) + error = EOPNOTSUPP; + nvlist_free(nvl); + if (nvlist_lookup_int32(*source, "cleanup_fd", + &cleanup_fd) == 0) + zc->zc_cleanup_fd = cleanup_fd; + else + zc->zc_cleanup_fd = -1; + break; + case ZFS_IOC_RELEASE: + pair = nvlist_next_nvpair(*source, NULL); + if (pair != NULL) { + buf = nvpair_name(pair); + pos = strcspn(buf, "@"); + strlcpy(zc->zc_name, buf, pos + 1); + strlcpy(zc->zc_value, buf + pos + 1, MAXPATHLEN); + if (nvpair_value_nvlist(pair, &nvl) == 0) { + hpair = nvlist_next_nvpair(nvl, NULL); + if (hpair != NULL) + strlcpy(zc->zc_string, + nvpair_name(hpair), MAXNAMELEN); + else + error = EINVAL; + if (!error && nvlist_next_nvpair(nvl, + hpair) != NULL) + error = EOPNOTSUPP; + } else + error = EINVAL; + } else + error = EINVAL; + /* old kernel cannot atomically release multiple holds */ + if (!error && nvlist_next_nvpair(nvl, pair) != NULL) + error = EOPNOTSUPP; + break; + } + + return (error); +} + +void +lzc_compat_post(zfs_cmd_t *zc, const zfs_ioc_t ioc) +{ + if (zfs_ioctl_version >= ZFS_IOCVER_LZC) + return; + + switch (ioc) { + case ZFS_IOC_CREATE: + case ZFS_IOC_CLONE: + case ZFS_IOC_SNAPSHOT: + case ZFS_IOC_SPACE_SNAPS: + case ZFS_IOC_DESTROY_SNAPS: + zc->zc_nvlist_dst_filled = B_FALSE; + break; + } +} + +int +lzc_compat_outnvl(zfs_cmd_t *zc, const zfs_ioc_t ioc, nvlist_t **outnvl) +{ + nvlist_t *nvl; + + if (zfs_ioctl_version >= ZFS_IOCVER_LZC) + return (0); + + switch (ioc) { + case ZFS_IOC_SPACE_SNAPS: + nvl = fnvlist_alloc(); + fnvlist_add_uint64(nvl, "used", zc->zc_cookie); + fnvlist_add_uint64(nvl, "compressed", zc->zc_objset_type); + fnvlist_add_uint64(nvl, "uncompressed", zc->zc_perm_action); + *outnvl = nvl; + break; + } + + return (0); +} diff --git a/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.h b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.h new file mode 100644 index 000000000..6527c4b25 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core_compat.h @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2013 by Martin Matuska . All rights reserved. + */ + +#ifndef _LIBZFS_CORE_COMPAT_H +#define _LIBZFS_CORE_COMPAT_H + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int lzc_compat_pre(zfs_cmd_t *, zfs_ioc_t *, nvlist_t **); +void lzc_compat_post(zfs_cmd_t *, const zfs_ioc_t); +int lzc_compat_outnvl(zfs_cmd_t *, const zfs_ioc_t, nvlist_t **); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBZFS_CORE_COMPAT_H */ diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c b/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c index 25dc520b7..c5c7b6614 100644 --- a/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c +++ b/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -885,6 +886,8 @@ umem_out_of_memory(void) void kernel_init(int mode) { + extern uint_t rrw_tsd_key; + umem_nofail_callback(umem_out_of_memory); physmem = sysconf(_SC_PHYS_PAGES); @@ -905,6 +908,8 @@ kernel_init(int mode) #endif spa_init(mode); + + tsd_create(&rrw_tsd_key, rrw_tsd_destroy); } void @@ -952,6 +957,12 @@ crgetuid(cred_t *cr) return (0); } +uid_t +crgetruid(cred_t *cr) +{ + return (0); +} + gid_t crgetgid(cred_t *cr) { diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h index eaacc2175..58106ebf5 100644 --- a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h +++ b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ @@ -60,6 +61,8 @@ extern "C" { #include #include #include +#include +#include #include #include #include @@ -84,6 +87,9 @@ extern "C" { #include #include #include +#ifdef illumos +#include "zfs.h" +#endif #define ZFS_EXPORTS_PATH "/etc/zfs/exports" @@ -131,28 +137,64 @@ extern int aok; #ifdef DTRACE_PROBE #undef DTRACE_PROBE -#define DTRACE_PROBE(a) ((void)0) #endif /* DTRACE_PROBE */ +#ifdef illumos +#define DTRACE_PROBE(a) \ + ZFS_PROBE0(#a) +#endif #ifdef DTRACE_PROBE1 #undef DTRACE_PROBE1 -#define DTRACE_PROBE1(a, b, c) ((void)0) #endif /* DTRACE_PROBE1 */ +#ifdef illumos +#define DTRACE_PROBE1(a, b, c) \ + ZFS_PROBE1(#a, (unsigned long)c) +#endif #ifdef DTRACE_PROBE2 #undef DTRACE_PROBE2 -#define DTRACE_PROBE2(a, b, c, d, e) ((void)0) #endif /* DTRACE_PROBE2 */ +#ifdef illumos +#define DTRACE_PROBE2(a, b, c, d, e) \ + ZFS_PROBE2(#a, (unsigned long)c, (unsigned long)e) +#endif #ifdef DTRACE_PROBE3 #undef DTRACE_PROBE3 -#define DTRACE_PROBE3(a, b, c, d, e, f, g) ((void)0) #endif /* DTRACE_PROBE3 */ +#ifdef illumos +#define DTRACE_PROBE3(a, b, c, d, e, f, g) \ + ZFS_PROBE3(#a, (unsigned long)c, (unsigned long)e, (unsigned long)g) +#endif #ifdef DTRACE_PROBE4 #undef DTRACE_PROBE4 -#define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) ((void)0) #endif /* DTRACE_PROBE4 */ +#ifdef illumos +#define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) \ + ZFS_PROBE4(#a, (unsigned long)c, (unsigned long)e, (unsigned long)g, \ + (unsigned long)i) +#endif + +#ifdef illumos +/* + * We use the comma operator so that this macro can be used without much + * additional code. For example, "return (EINVAL);" becomes + * "return (SET_ERROR(EINVAL));". Note that the argument will be evaluated + * twice, so it should not have side effects (e.g. something like: + * "return (SET_ERROR(log_error(EINVAL, info)));" would log the error twice). + */ +#define SET_ERROR(err) (ZFS_SET_ERROR(err), err) +#else /* !illumos */ + +#define DTRACE_PROBE(a) ((void)0) +#define DTRACE_PROBE1(a, b, c) ((void)0) +#define DTRACE_PROBE2(a, b, c, d, e) ((void)0) +#define DTRACE_PROBE3(a, b, c, d, e, f, g) ((void)0) +#define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) ((void)0) + +#define SET_ERROR(err) (err) +#endif /* !illumos */ /* * Threads @@ -242,6 +284,9 @@ typedef int krw_t; #define RW_WRITE_HELD(x) ((x)->rw_owner == curthread) #define RW_LOCK_HELD(x) rw_lock_held(x) +#undef RW_LOCK_HELD +#define RW_LOCK_HELD(x) (RW_READ_HELD(x) || RW_WRITE_HELD(x)) + extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg); extern void rw_destroy(krwlock_t *rwlp); extern void rw_enter(krwlock_t *rwlp, krw_t rw); @@ -252,6 +297,7 @@ extern int rw_lock_held(krwlock_t *rwlp); #define rw_downgrade(rwlp) do { } while (0) extern uid_t crgetuid(cred_t *cr); +extern uid_t crgetruid(cred_t *cr); extern gid_t crgetgid(cred_t *cr); extern int crgetngroups(cred_t *cr); extern gid_t *crgetgroups(cred_t *cr); @@ -270,6 +316,14 @@ extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime); extern void cv_signal(kcondvar_t *cv); extern void cv_broadcast(kcondvar_t *cv); +/* + * Thread-specific data + */ +#define tsd_get(k) pthread_getspecific(k) +#define tsd_set(k, v) pthread_setspecific(k, v) +#define tsd_create(kp, d) pthread_key_create(kp, d) +#define tsd_destroy(kp) /* nothing */ + /* * Kernel memory */ @@ -527,7 +581,7 @@ typedef struct callb_cpr { #define INGLOBALZONE(z) (1) extern char *kmem_asprintf(const char *fmt, ...); -#define strfree(str) kmem_free((str), strlen(str)+1) +#define strfree(str) kmem_free((str), strlen(str) + 1) /* * Hostname information diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/zfs.d b/cddl/contrib/opensolaris/lib/libzpool/common/zfs.d new file mode 100644 index 000000000..1351733c8 --- /dev/null +++ b/cddl/contrib/opensolaris/lib/libzpool/common/zfs.d @@ -0,0 +1,36 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + */ + +provider zfs { + probe probe0(char *probename); + probe probe1(char *probename, unsigned long arg1); + probe probe2(char *probename, unsigned long arg1, unsigned long arg2); + probe probe3(char *probename, unsigned long arg1, unsigned long arg2, + unsigned long arg3); + probe probe4(char *probename, unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4); + + probe set__error(int err); +}; + +#pragma D attributes Evolving/Evolving/ISA provider zfs provider +#pragma D attributes Private/Private/Unknown provider zfs module +#pragma D attributes Private/Private/Unknown provider zfs function +#pragma D attributes Evolving/Evolving/ISA provider zfs name +#pragma D attributes Evolving/Evolving/ISA provider zfs args diff --git a/cddl/lib/Makefile b/cddl/lib/Makefile index 52e97d74b..0fb48506d 100644 --- a/cddl/lib/Makefile +++ b/cddl/lib/Makefile @@ -9,10 +9,12 @@ SUBDIR= ${_drti} \ libnvpair \ libumem \ libuutil \ + ${_libzfs_core} \ ${_libzfs} \ ${_libzpool} .if ${MK_ZFS} != "no" +_libzfs_core= libzfs_core _libzfs= libzfs .if ${MK_LIBTHR} != "no" _libzpool= libzpool diff --git a/cddl/lib/libzfs/Makefile b/cddl/lib/libzfs/Makefile index 9e68da4c3..5b6b47dcc 100644 --- a/cddl/lib/libzfs/Makefile +++ b/cddl/lib/libzfs/Makefile @@ -6,8 +6,9 @@ .PATH: ${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common LIB= zfs -DPADD= ${LIBMD} ${LIBPTHREAD} ${LIBUMEM} ${LIBUTIL} ${LIBM} ${LIBNVPAIR} -LDADD= -lmd -lpthread -lumem -lutil -lm -lnvpair +DPADD= ${LIBMD} ${LIBPTHREAD} ${LIBUMEM} ${LIBUTIL} ${LIBM} ${LIBNVPAIR} \ + ${LIBZFS_CORE} +LDADD= -lmd -lpthread -lumem -lutil -lm -lnvpair -lzfs_core SRCS= deviceid.c \ fsshare.c \ @@ -17,6 +18,7 @@ SRCS= deviceid.c \ zone.c SRCS+= libzfs_changelist.c \ + libzfs_compat.c \ libzfs_config.c \ libzfs_dataset.c \ libzfs_diff.c \ @@ -54,5 +56,6 @@ CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libuutil/common CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common +CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs_core/common .include diff --git a/cddl/lib/libzfs_core/Makefile b/cddl/lib/libzfs_core/Makefile new file mode 100644 index 000000000..a470fbc25 --- /dev/null +++ b/cddl/lib/libzfs_core/Makefile @@ -0,0 +1,37 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../../cddl/compat/opensolaris/misc +.PATH: ${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs +.PATH: ${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs +.PATH: ${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs_core/common +.PATH: ${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common + +LIB= zfs_core +DPADD= ${LIBNVPAIR} +LDADD= -lnvpair + +SRCS= libzfs_core.c \ + libzfs_core_compat.c + +SRCS+= libzfs_compat.c + +WARNS?= 0 +CSTD= c99 +CFLAGS+= -DZFS_NO_ACL +CFLAGS+= -I${.CURDIR}/../../../sbin/mount +CFLAGS+= -I${.CURDIR}/../../../cddl/lib/libumem +CFLAGS+= -I${.CURDIR}/../../../sys/cddl/compat/opensolaris +CFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/include +CFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/lib/libumem +CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzpool/common +CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs +CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs +CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys +CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/head +CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common +CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair +CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libuutil/common +CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs_core/common +CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common + +.include diff --git a/cddl/sbin/zfs/Makefile b/cddl/sbin/zfs/Makefile index 95a476b7a..a49d278a8 100644 --- a/cddl/sbin/zfs/Makefile +++ b/cddl/sbin/zfs/Makefile @@ -14,6 +14,7 @@ CFLAGS+= -I${.CURDIR}/../../../sys/cddl/compat/opensolaris CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/head CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libuutil/common CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common +CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs_core/common CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libumem/common CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common @@ -22,7 +23,7 @@ CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs DPADD= ${LIBGEOM} ${LIBJAIL} ${LIBNVPAIR} ${LIBUMEM} \ - ${LIBUTIL} ${LIBUUTIL} ${LIBZFS} -LDADD= -lgeom -ljail -lnvpair -lumem -lutil -luutil -lzfs + ${LIBUTIL} ${LIBUUTIL} ${LIBZFS_CORE} ${LIBZFS} +LDADD= -lgeom -ljail -lnvpair -lumem -lutil -luutil -lzfs_core -lzfs .include diff --git a/cddl/sbin/zpool/Makefile b/cddl/sbin/zpool/Makefile index b9d44b613..1884d249d 100644 --- a/cddl/sbin/zpool/Makefile +++ b/cddl/sbin/zpool/Makefile @@ -27,7 +27,7 @@ CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzpool/common CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/cmd/stat/common DPADD= ${LIBAVL} ${LIBGEOM} ${LIBNVPAIR} \ - ${LIBUMEM} ${LIBUTIL} ${LIBUUTIL} ${LIBZFS} -LDADD= -lavl -lgeom -lnvpair -lumem -lutil -luutil -lzfs + ${LIBUMEM} ${LIBUTIL} ${LIBUUTIL} ${LIBZFS_CORE} ${LIBZFS} +LDADD= -lavl -lgeom -lnvpair -lumem -lutil -luutil -lzfs_core -lzfs .include diff --git a/cddl/usr.bin/zinject/Makefile b/cddl/usr.bin/zinject/Makefile index 2ea066c2c..8c5c141e5 100644 --- a/cddl/usr.bin/zinject/Makefile +++ b/cddl/usr.bin/zinject/Makefile @@ -20,7 +20,7 @@ CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/head CFLAGS+= -I${.CURDIR}/../../lib/libumem DPADD= ${LIBGEOM} ${LIBM} ${LIBNVPAIR} ${LIBUMEM} ${LIBUUTIL} \ - ${LIBZFS} ${LIBZPOOL} -LDADD= -lgeom -lm -lnvpair -lumem -luutil -lzfs -lzpool + ${LIBZFS_CORE} ${LIBZFS} ${LIBZPOOL} +LDADD= -lgeom -lm -lnvpair -lumem -luutil -lzfs_core -lzfs -lzpool .include diff --git a/cddl/usr.bin/ztest/Makefile b/cddl/usr.bin/ztest/Makefile index 42c0993b3..965300ef2 100644 --- a/cddl/usr.bin/ztest/Makefile +++ b/cddl/usr.bin/ztest/Makefile @@ -19,8 +19,9 @@ CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/head CFLAGS+= -I${.CURDIR}/../../lib/libumem DPADD= ${LIBGEOM} ${LIBM} ${LIBNVPAIR} ${LIBUMEM} ${LIBZPOOL} \ - ${LIBPTHREAD} ${LIBAVL} ${LIBZFS} ${LIBUUTIL} -LDADD= -lgeom -lm -lnvpair -lumem -lzpool -lpthread -lavl -lzfs -luutil + ${LIBPTHREAD} ${LIBAVL} ${LIBZFS_CORE} ${LIBZFS} ${LIBUUTIL} +LDADD= -lgeom -lm -lnvpair -lumem -lzpool -lpthread -lavl -lzfs_core -lzfs \ + -luutil CSTD= c99 diff --git a/cddl/usr.sbin/zdb/Makefile b/cddl/usr.sbin/zdb/Makefile index bdebda335..806838560 100644 --- a/cddl/usr.sbin/zdb/Makefile +++ b/cddl/usr.sbin/zdb/Makefile @@ -24,8 +24,8 @@ CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/head CFLAGS+= -I${.CURDIR}/../../lib/libumem DPADD= ${LIBGEOM} ${LIBM} ${LIBNVPAIR} ${LIBPTHREAD} ${LIBUMEM} \ - ${LIBUUTIL} ${LIBZFS} ${LIBZPOOL} -LDADD= -lgeom -lm -lnvpair -lpthread -lumem -luutil -lzfs -lzpool + ${LIBUUTIL} ${LIBZFS_CORE} ${LIBZFS} ${LIBZPOOL} +LDADD= -lgeom -lm -lnvpair -lpthread -lumem -luutil -lzfs_core -lzfs -lzpool CFLAGS+= -DDEBUG=1 #DEBUG_FLAGS+= -g diff --git a/cddl/usr.sbin/zhack/Makefile b/cddl/usr.sbin/zhack/Makefile index 97ef5751b..f09d2d827 100644 --- a/cddl/usr.sbin/zhack/Makefile +++ b/cddl/usr.sbin/zhack/Makefile @@ -23,8 +23,8 @@ CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/head CFLAGS+= -I${.CURDIR}/../../lib/libumem DPADD= ${LIBGEOM} ${LIBM} ${LIBNVPAIR} ${LIBPTHREAD} ${LIBUMEM} \ - ${LIBUUTIL} ${LIBZFS} ${LIBZPOOL} -LDADD= -lgeom -lm -lnvpair -lpthread -lumem -luutil -lzfs -lzpool + ${LIBUUTIL} ${LIBZFS_CORE} ${LIBZFS} ${LIBZPOOL} +LDADD= -lgeom -lm -lnvpair -lpthread -lumem -luutil -lzfs_core -lzfs -lzpool CFLAGS+= -DDEBUG=1 #DEBUG_FLAGS+= -g diff --git a/rescue/rescue/Makefile b/rescue/rescue/Makefile index b33f3f2d0..379bd704c 100644 --- a/rescue/rescue/Makefile +++ b/rescue/rescue/Makefile @@ -123,7 +123,7 @@ CRUNCH_LIBS+= -lalias -lcam -lcurses -ldevstat -lipsec CRUNCH_LIBS+= -lipx .endif .if ${MK_ZFS} != "no" -CRUNCH_LIBS+= -lavl -ljail -lzfs -lnvpair -lpthread -luutil -lumem +CRUNCH_LIBS+= -lavl -ljail -lzfs_core -lzfs -lnvpair -lpthread -luutil -lumem .endif CRUNCH_LIBS+= -lgeom -lbsdxml -ljail -lkiconv -lmd -lreadline -lsbuf -lufs -lz diff --git a/share/mk/bsd.libnames.mk b/share/mk/bsd.libnames.mk index 9c060f954..a3dbf3b08 100644 --- a/share/mk/bsd.libnames.mk +++ b/share/mk/bsd.libnames.mk @@ -169,4 +169,5 @@ LIBY?= ${DESTDIR}${LIBDIR}/liby.a LIBYPCLNT?= ${DESTDIR}${LIBDIR}/libypclnt.a LIBZ?= ${DESTDIR}${LIBDIR}/libz.a LIBZFS?= ${DESTDIR}${LIBDIR}/libzfs.a +LIBZFS_CORE?= ${DESTDIR}${LIBDIR}/libzfs_core.a LIBZPOOL?= ${DESTDIR}${LIBDIR}/libzpool.a diff --git a/sys/cddl/compat/opensolaris/sys/cred.h b/sys/cddl/compat/opensolaris/sys/cred.h index b13ef6c62..28a8ea3a2 100644 --- a/sys/cddl/compat/opensolaris/sys/cred.h +++ b/sys/cddl/compat/opensolaris/sys/cred.h @@ -46,6 +46,7 @@ typedef struct ucred ucred_t; #define kcred (thread0.td_ucred) #define crgetuid(cred) ((cred)->cr_uid) +#define crgetruid(cred) ((cred)->cr_ruid) #define crgetgid(cred) ((cred)->cr_gid) #define crgetgroups(cred) ((cred)->cr_groups) #define crgetngroups(cred) ((cred)->cr_ngroups) diff --git a/sys/cddl/compat/opensolaris/sys/sdt.h b/sys/cddl/compat/opensolaris/sys/sdt.h index 6db45bb97..46b5b2244 100644 --- a/sys/cddl/compat/opensolaris/sys/sdt.h +++ b/sys/cddl/compat/opensolaris/sys/sdt.h @@ -41,6 +41,8 @@ #define DTRACE_PROBE1(name, type1, arg1) #define DTRACE_PROBE2(name, type1, arg1, type2, arg2) #define DTRACE_PROBE3(name, type1, arg1, type2, arg2, type3, arg3) -#define DTRACE_PROBE4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) +#define DTRACE_PROBE4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) + +#define SET_ERROR(err) (err) #endif /* _OPENSOLARIS_SYS_SDT_H_ */ diff --git a/sys/cddl/contrib/opensolaris/common/nvpair/fnvpair.c b/sys/cddl/contrib/opensolaris/common/nvpair/fnvpair.c index 1b67e6243..eb200a24e 100644 --- a/sys/cddl/contrib/opensolaris/common/nvpair/fnvpair.c +++ b/sys/cddl/contrib/opensolaris/common/nvpair/fnvpair.c @@ -30,6 +30,8 @@ #else #include #include +#include +#include #endif /* @@ -116,6 +118,18 @@ fnvlist_merge(nvlist_t *dst, nvlist_t *src) VERIFY0(nvlist_merge(dst, src, KM_SLEEP)); } +size_t +fnvlist_num_pairs(nvlist_t *nvl) +{ + size_t count = 0; + nvpair_t *pair; + + for (pair = nvlist_next_nvpair(nvl, 0); pair != NULL; + pair = nvlist_next_nvpair(nvl, pair)) + count++; + return (count); +} + void fnvlist_add_boolean(nvlist_t *nvl, const char *name) { diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.c b/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.c index 5df687662..f7bde48a3 100644 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.c +++ b/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* @@ -156,7 +157,11 @@ zfs_spa_version_map(int zpl_version) return (version); } -const char *zfs_history_event_names[LOG_END] = { +/* + * This is the table of legacy internal event names; it should not be modified. + * The internal events are now stored in the history log as strings. + */ +const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = { "invalid event", "pool create", "vdev add", diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.h b/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.h index 61327f9aa..f89054388 100644 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.h +++ b/sys/cddl/contrib/opensolaris/common/zfs/zfs_comutil.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _ZFS_COMUTIL_H @@ -37,7 +38,8 @@ extern void zpool_get_rewind_policy(nvlist_t *, zpool_rewind_policy_t *); extern int zfs_zpl_version_map(int spa_version); extern int zfs_spa_version_map(int zpl_version); -extern const char *zfs_history_event_names[LOG_END]; +#define ZFS_NUM_LEGACY_HISTORY_EVENTS 41 +extern const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS]; #ifdef __cplusplus } diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c b/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c index 495933540..cfa4f441d 100644 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c +++ b/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c @@ -33,6 +33,7 @@ #include #include #include +#include "zfs_namecheck.h" #include "zfs_ioctl_compat.h" static int zfs_version_ioctl = ZFS_IOCVER_CURRENT; @@ -49,8 +50,52 @@ zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag) { zfs_cmd_v15_t *zc_c; zfs_cmd_v28_t *zc28_c; + zfs_cmd_deadman_t *zcdm_c; switch (cflag) { + case ZFS_CMD_COMPAT_DEADMAN: + zcdm_c = (void *)addr; + /* zc */ + strlcpy(zc->zc_name, zcdm_c->zc_name, MAXPATHLEN); + strlcpy(zc->zc_value, zcdm_c->zc_value, MAXPATHLEN * 2); + strlcpy(zc->zc_string, zcdm_c->zc_string, MAXPATHLEN); + zc->zc_guid = zcdm_c->zc_guid; + zc->zc_nvlist_conf = zcdm_c->zc_nvlist_conf; + zc->zc_nvlist_conf_size = zcdm_c->zc_nvlist_conf_size; + zc->zc_nvlist_src = zcdm_c->zc_nvlist_src; + zc->zc_nvlist_src_size = zcdm_c->zc_nvlist_src_size; + zc->zc_nvlist_dst = zcdm_c->zc_nvlist_dst; + zc->zc_nvlist_dst_size = zcdm_c->zc_nvlist_dst_size; + zc->zc_cookie = zcdm_c->zc_cookie; + zc->zc_objset_type = zcdm_c->zc_objset_type; + zc->zc_perm_action = zcdm_c->zc_perm_action; + zc->zc_history = zcdm_c->zc_history; + zc->zc_history_len = zcdm_c->zc_history_len; + zc->zc_history_offset = zcdm_c->zc_history_offset; + zc->zc_obj = zcdm_c->zc_obj; + zc->zc_iflags = zcdm_c->zc_iflags; + zc->zc_share = zcdm_c->zc_share; + zc->zc_jailid = zcdm_c->zc_jailid; + zc->zc_objset_stats = zcdm_c->zc_objset_stats; + zc->zc_begin_record = zcdm_c->zc_begin_record; + zc->zc_defer_destroy = zcdm_c->zc_defer_destroy; + zc->zc_temphold = zcdm_c->zc_temphold; + zc->zc_action_handle = zcdm_c->zc_action_handle; + zc->zc_cleanup_fd = zcdm_c->zc_cleanup_fd; + zc->zc_simple = zcdm_c->zc_simple; + bcopy(zcdm_c->zc_pad, zc->zc_pad, sizeof(zc->zc_pad)); + zc->zc_sendobj = zcdm_c->zc_sendobj; + zc->zc_fromobj = zcdm_c->zc_fromobj; + zc->zc_createtxg = zcdm_c->zc_createtxg; + zc->zc_stat = zcdm_c->zc_stat; + + /* zc_inject_record doesn't change in libzfs_core */ + zcdm_c->zc_inject_record = zc->zc_inject_record; + + /* we always assume zc_nvlist_dst_filled is true */ + zc->zc_nvlist_dst_filled = B_TRUE; + break; + case ZFS_CMD_COMPAT_V28: zc28_c = (void *)addr; @@ -58,7 +103,6 @@ zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag) strlcpy(zc->zc_name, zc28_c->zc_name, MAXPATHLEN); strlcpy(zc->zc_value, zc28_c->zc_value, MAXPATHLEN * 2); strlcpy(zc->zc_string, zc28_c->zc_string, MAXPATHLEN); - strlcpy(zc->zc_top_ds, zc28_c->zc_top_ds, MAXPATHLEN); zc->zc_guid = zc28_c->zc_guid; zc->zc_nvlist_conf = zc28_c->zc_nvlist_conf; zc->zc_nvlist_conf_size = zc28_c->zc_nvlist_conf_size; @@ -174,19 +218,66 @@ zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag) } void -zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int cflag) +zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int request, + const int cflag) { zfs_cmd_v15_t *zc_c; zfs_cmd_v28_t *zc28_c; + zfs_cmd_deadman_t *zcdm_c; switch (cflag) { + case ZFS_CMD_COMPAT_DEADMAN: + zcdm_c = (void *)addr; + + strlcpy(zcdm_c->zc_name, zc->zc_name, MAXPATHLEN); + strlcpy(zcdm_c->zc_value, zc->zc_value, MAXPATHLEN * 2); + strlcpy(zcdm_c->zc_string, zc->zc_string, MAXPATHLEN); + zcdm_c->zc_guid = zc->zc_guid; + zcdm_c->zc_nvlist_conf = zc->zc_nvlist_conf; + zcdm_c->zc_nvlist_conf_size = zc->zc_nvlist_conf_size; + zcdm_c->zc_nvlist_src = zc->zc_nvlist_src; + zcdm_c->zc_nvlist_src_size = zc->zc_nvlist_src_size; + zcdm_c->zc_nvlist_dst = zc->zc_nvlist_dst; + zcdm_c->zc_nvlist_dst_size = zc->zc_nvlist_dst_size; + zcdm_c->zc_cookie = zc->zc_cookie; + zcdm_c->zc_objset_type = zc->zc_objset_type; + zcdm_c->zc_perm_action = zc->zc_perm_action; + zcdm_c->zc_history = zc->zc_history; + zcdm_c->zc_history_len = zc->zc_history_len; + zcdm_c->zc_history_offset = zc->zc_history_offset; + zcdm_c->zc_obj = zc->zc_obj; + zcdm_c->zc_iflags = zc->zc_iflags; + zcdm_c->zc_share = zc->zc_share; + zcdm_c->zc_jailid = zc->zc_jailid; + zcdm_c->zc_objset_stats = zc->zc_objset_stats; + zcdm_c->zc_begin_record = zc->zc_begin_record; + zcdm_c->zc_defer_destroy = zc->zc_defer_destroy; + zcdm_c->zc_temphold = zc->zc_temphold; + zcdm_c->zc_action_handle = zc->zc_action_handle; + zcdm_c->zc_cleanup_fd = zc->zc_cleanup_fd; + zcdm_c->zc_simple = zc->zc_simple; + bcopy(zc->zc_pad, zcdm_c->zc_pad, sizeof(zcdm_c->zc_pad)); + zcdm_c->zc_sendobj = zc->zc_sendobj; + zcdm_c->zc_fromobj = zc->zc_fromobj; + zcdm_c->zc_createtxg = zc->zc_createtxg; + zcdm_c->zc_stat = zc->zc_stat; + + /* zc_inject_record doesn't change in libzfs_core */ + zc->zc_inject_record = zcdm_c->zc_inject_record; +#ifndef _KERNEL + if (request == ZFS_IOC_RECV) + strlcpy(zcdm_c->zc_top_ds, + zc->zc_value + strlen(zc->zc_value) + 1, + (MAXPATHLEN * 2) - strlen(zc->zc_value) - 1); +#endif + break; + case ZFS_CMD_COMPAT_V28: zc28_c = (void *)addr; strlcpy(zc28_c->zc_name, zc->zc_name, MAXPATHLEN); strlcpy(zc28_c->zc_value, zc->zc_value, MAXPATHLEN * 2); strlcpy(zc28_c->zc_string, zc->zc_string, MAXPATHLEN); - strlcpy(zc28_c->zc_top_ds, zc->zc_top_ds, MAXPATHLEN); zc28_c->zc_guid = zc->zc_guid; zc28_c->zc_nvlist_conf = zc->zc_nvlist_conf; zc28_c->zc_nvlist_conf_size = zc->zc_nvlist_conf_size; @@ -216,7 +307,12 @@ zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int cflag) zc28_c->zc_fromobj = zc->zc_fromobj; zc28_c->zc_createtxg = zc->zc_createtxg; zc28_c->zc_stat = zc->zc_stat; - +#ifndef _KERNEL + if (request == ZFS_IOC_RECV) + strlcpy(zc28_c->zc_top_ds, + zc->zc_value + strlen(zc->zc_value) + 1, + MAXPATHLEN * 2 - strlen(zc->zc_value) - 1); +#endif /* zc_inject_record */ zc28_c->zc_inject_record.zi_objset = zc->zc_inject_record.zi_objset; @@ -476,22 +572,33 @@ zfs_ioctl_compat_pool_get_props(zfs_cmd_t *zc) #ifndef _KERNEL int -zcmd_ioctl_compat(int fd, unsigned long cmd, zfs_cmd_t *zc, const int cflag) +zcmd_ioctl_compat(int fd, int request, zfs_cmd_t *zc, const int cflag) { int nc, ret; void *zc_c; unsigned long ncmd; + zfs_iocparm_t zp; switch (cflag) { case ZFS_CMD_COMPAT_NONE: - ret = ioctl(fd, cmd, zc); - return (ret); + ncmd = _IOWR('Z', request, struct zfs_iocparm); + zp.zfs_cmd = (uint64_t)zc; + zp.zfs_cmd_size = sizeof(zfs_cmd_t); + zp.zfs_ioctl_version = ZFS_IOCVER_CURRENT; + return (ioctl(fd, ncmd, &zp)); + case ZFS_CMD_COMPAT_LZC: + ncmd = _IOWR('Z', request, struct zfs_cmd); + return (ioctl(fd, ncmd, zc)); + case ZFS_CMD_COMPAT_DEADMAN: + zc_c = malloc(sizeof(zfs_cmd_deadman_t)); + ncmd = _IOWR('Z', request, struct zfs_cmd_deadman); + break; case ZFS_CMD_COMPAT_V28: zc_c = malloc(sizeof(zfs_cmd_v28_t)); - ncmd = _IOWR('Z', ZFS_IOC(cmd), struct zfs_cmd_v28); + ncmd = _IOWR('Z', request, struct zfs_cmd_v28); break; case ZFS_CMD_COMPAT_V15: - nc = zfs_ioctl_v28_to_v15[ZFS_IOC(cmd)]; + nc = zfs_ioctl_v28_to_v15[request]; zc_c = malloc(sizeof(zfs_cmd_v15_t)); ncmd = _IOWR('Z', nc, struct zfs_cmd_v15); break; @@ -499,24 +606,25 @@ zcmd_ioctl_compat(int fd, unsigned long cmd, zfs_cmd_t *zc, const int cflag) return (EINVAL); } - if (ZFS_IOC(ncmd) == ZFS_IOC_COMPAT_FAIL) + if (ZFS_IOCREQ(ncmd) == ZFS_IOC_COMPAT_FAIL) return (ENOTSUP); - zfs_cmd_compat_put(zc, (caddr_t)zc_c, cflag); + zfs_cmd_compat_put(zc, (caddr_t)zc_c, request, cflag); + ret = ioctl(fd, ncmd, zc_c); if (cflag == ZFS_CMD_COMPAT_V15 && - nc == 2 /* ZFS_IOC_POOL_IMPORT */) - ret = ioctl(fd, _IOWR('Z', 4 /* ZFS_IOC_POOL_CONFIGS */, + nc == ZFS_IOC_POOL_IMPORT) + ret = ioctl(fd, _IOWR('Z', ZFS_IOC_POOL_CONFIGS, struct zfs_cmd_v15), zc_c); zfs_cmd_compat_get(zc, (caddr_t)zc_c, cflag); free(zc_c); if (cflag == ZFS_CMD_COMPAT_V15) { switch (nc) { - case 2: /* ZFS_IOC_POOL_IMPORT */ - case 4: /* ZFS_IOC_POOL_CONFIGS */ - case 5: /* ZFS_IOC_POOL_STATS */ - case 6: /* ZFS_IOC_POOL_TRYIMPORT */ + case ZFS_IOC_POOL_IMPORT: + case ZFS_IOC_POOL_CONFIGS: + case ZFS_IOC_POOL_STATS: + case ZFS_IOC_POOL_TRYIMPORT: zfs_ioctl_compat_fix_stats(zc, nc); break; case 41: /* ZFS_IOC_POOL_GET_PROPS (v15) */ @@ -528,16 +636,25 @@ zcmd_ioctl_compat(int fd, unsigned long cmd, zfs_cmd_t *zc, const int cflag) return (ret); } #else /* _KERNEL */ -void +int zfs_ioctl_compat_pre(zfs_cmd_t *zc, int *vec, const int cflag) { - if (cflag == ZFS_CMD_COMPAT_V15) + int error = 0; + + /* are we creating a clone? */ + if (*vec == ZFS_IOC_CREATE && zc->zc_value[0] != '\0') + *vec = ZFS_IOC_CLONE; + + if (cflag == ZFS_CMD_COMPAT_V15) { switch (*vec) { case 7: /* ZFS_IOC_POOL_SCRUB (v15) */ zc->zc_cookie = POOL_SCAN_SCRUB; break; } + } + + return (error); } void @@ -545,9 +662,9 @@ zfs_ioctl_compat_post(zfs_cmd_t *zc, int vec, const int cflag) { if (cflag == ZFS_CMD_COMPAT_V15) { switch (vec) { - case 4: /* ZFS_IOC_POOL_CONFIGS */ - case 5: /* ZFS_IOC_POOL_STATS */ - case 6: /* ZFS_IOC_POOL_TRYIMPORT */ + case ZFS_IOC_POOL_CONFIGS: + case ZFS_IOC_POOL_STATS: + case ZFS_IOC_POOL_TRYIMPORT: zfs_ioctl_compat_fix_stats(zc, vec); break; case 41: /* ZFS_IOC_POOL_GET_PROPS (v15) */ @@ -556,4 +673,193 @@ zfs_ioctl_compat_post(zfs_cmd_t *zc, int vec, const int cflag) } } } + +nvlist_t * +zfs_ioctl_compat_innvl(zfs_cmd_t *zc, nvlist_t * innvl, const int vec, + const int cflag) +{ + nvlist_t *nvl, *tmpnvl, *hnvl; + nvpair_t *elem; + char *poolname, *snapname; + int err; + + if (cflag == ZFS_CMD_COMPAT_NONE || cflag == ZFS_CMD_COMPAT_LZC) + goto out; + + switch (vec) { + case ZFS_IOC_CREATE: + nvl = fnvlist_alloc(); + fnvlist_add_int32(nvl, "type", zc->zc_objset_type); + if (innvl != NULL) { + fnvlist_add_nvlist(nvl, "props", innvl); + nvlist_free(innvl); + } + return (nvl); + break; + case ZFS_IOC_CLONE: + nvl = fnvlist_alloc(); + fnvlist_add_string(nvl, "origin", zc->zc_value); + if (innvl != NULL) { + fnvlist_add_nvlist(nvl, "props", innvl); + nvlist_free(innvl); + } + return (nvl); + break; + case ZFS_IOC_SNAPSHOT: + if (innvl == NULL) + goto out; + nvl = fnvlist_alloc(); + fnvlist_add_nvlist(nvl, "props", innvl); + tmpnvl = fnvlist_alloc(); + snapname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value); + fnvlist_add_boolean(tmpnvl, snapname); + kmem_free(snapname, strlen(snapname + 1)); + /* check if we are doing a recursive snapshot */ + if (zc->zc_cookie) + dmu_get_recursive_snaps_nvl(zc->zc_name, zc->zc_value, + tmpnvl); + fnvlist_add_nvlist(nvl, "snaps", tmpnvl); + fnvlist_free(tmpnvl); + nvlist_free(innvl); + /* strip dataset part from zc->zc_name */ + zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0'; + return (nvl); + break; + case ZFS_IOC_SPACE_SNAPS: + nvl = fnvlist_alloc(); + fnvlist_add_string(nvl, "firstsnap", zc->zc_value); + if (innvl != NULL) + nvlist_free(innvl); + return (nvl); + break; + case ZFS_IOC_DESTROY_SNAPS: + if (innvl == NULL && cflag == ZFS_CMD_COMPAT_DEADMAN) + goto out; + nvl = fnvlist_alloc(); + if (innvl != NULL) { + fnvlist_add_nvlist(nvl, "snaps", innvl); + } else { + /* + * We are probably called by even older binaries, + * allocate and populate nvlist with recursive + * snapshots + */ + if (snapshot_namecheck(zc->zc_value, NULL, + NULL) == 0) { + tmpnvl = fnvlist_alloc(); + if (dmu_get_recursive_snaps_nvl(zc->zc_name, + zc->zc_value, tmpnvl) == 0) + fnvlist_add_nvlist(nvl, "snaps", + tmpnvl); + nvlist_free(tmpnvl); + } + } + if (innvl != NULL) + nvlist_free(innvl); + /* strip dataset part from zc->zc_name */ + zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0'; + return (nvl); + break; + case ZFS_IOC_HOLD: + nvl = fnvlist_alloc(); + tmpnvl = fnvlist_alloc(); + if (zc->zc_cleanup_fd != -1) + fnvlist_add_int32(nvl, "cleanup_fd", + (int32_t)zc->zc_cleanup_fd); + if (zc->zc_cookie) { + hnvl = fnvlist_alloc(); + if (dmu_get_recursive_snaps_nvl(zc->zc_name, + zc->zc_value, hnvl) == 0) { + elem = NULL; + while ((elem = nvlist_next_nvpair(hnvl, + elem)) != NULL) { + nvlist_add_string(tmpnvl, + nvpair_name(elem), zc->zc_string); + } + } + nvlist_free(hnvl); + } else { + snapname = kmem_asprintf("%s@%s", zc->zc_name, + zc->zc_value); + nvlist_add_string(tmpnvl, snapname, zc->zc_string); + kmem_free(snapname, strlen(snapname + 1)); + } + fnvlist_add_nvlist(nvl, "holds", tmpnvl); + nvlist_free(tmpnvl); + if (innvl != NULL) + nvlist_free(innvl); + /* strip dataset part from zc->zc_name */ + zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0'; + return (nvl); + break; + case ZFS_IOC_RELEASE: + nvl = fnvlist_alloc(); + tmpnvl = fnvlist_alloc(); + if (zc->zc_cookie) { + hnvl = fnvlist_alloc(); + if (dmu_get_recursive_snaps_nvl(zc->zc_name, + zc->zc_value, hnvl) == 0) { + elem = NULL; + while ((elem = nvlist_next_nvpair(hnvl, + elem)) != NULL) { + fnvlist_add_boolean(tmpnvl, + zc->zc_string); + fnvlist_add_nvlist(nvl, + nvpair_name(elem), tmpnvl); + } + } + nvlist_free(hnvl); + } else { + snapname = kmem_asprintf("%s@%s", zc->zc_name, + zc->zc_value); + fnvlist_add_boolean(tmpnvl, zc->zc_string); + fnvlist_add_nvlist(nvl, snapname, tmpnvl); + kmem_free(snapname, strlen(snapname + 1)); + } + nvlist_free(tmpnvl); + if (innvl != NULL) + nvlist_free(innvl); + /* strip dataset part from zc->zc_name */ + zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0'; + return (nvl); + break; + } +out: + return (innvl); +} + +nvlist_t * +zfs_ioctl_compat_outnvl(zfs_cmd_t *zc, nvlist_t * outnvl, const int vec, + const int cflag) +{ + nvlist_t *tmpnvl; + + if (cflag == ZFS_CMD_COMPAT_NONE || cflag == ZFS_CMD_COMPAT_LZC) + return (outnvl); + + switch (vec) { + case ZFS_IOC_SPACE_SNAPS: + (void) nvlist_lookup_uint64(outnvl, "used", &zc->zc_cookie); + (void) nvlist_lookup_uint64(outnvl, "compressed", + &zc->zc_objset_type); + (void) nvlist_lookup_uint64(outnvl, "uncompressed", + &zc->zc_perm_action); + nvlist_free(outnvl); + /* return empty outnvl */ + tmpnvl = fnvlist_alloc(); + return (tmpnvl); + break; + case ZFS_IOC_CREATE: + case ZFS_IOC_CLONE: + case ZFS_IOC_HOLD: + case ZFS_IOC_RELEASE: + nvlist_free(outnvl); + /* return empty outnvl */ + tmpnvl = fnvlist_alloc(); + return (tmpnvl); + break; + } + + return (outnvl); +} #endif /* KERNEL */ diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h b/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h index b20cecafc..6e8d51d37 100644 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h +++ b/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h @@ -45,17 +45,31 @@ extern "C" { */ /* ioctl versions for vfs.zfs.version.ioctl */ +#define ZFS_IOCVER_UNDEF -1 +#define ZFS_IOCVER_NONE 0 #define ZFS_IOCVER_DEADMAN 1 -#define ZFS_IOCVER_CURRENT ZFS_IOCVER_DEADMAN +#define ZFS_IOCVER_LZC 2 +#define ZFS_IOCVER_ZCMD 3 +#define ZFS_IOCVER_CURRENT ZFS_IOCVER_ZCMD /* compatibility conversion flag */ #define ZFS_CMD_COMPAT_NONE 0 #define ZFS_CMD_COMPAT_V15 1 #define ZFS_CMD_COMPAT_V28 2 +#define ZFS_CMD_COMPAT_DEADMAN 3 +#define ZFS_CMD_COMPAT_LZC 4 #define ZFS_IOC_COMPAT_PASS 254 #define ZFS_IOC_COMPAT_FAIL 255 +#define ZFS_IOCREQ(ioreq) ((ioreq) & 0xff) + +typedef struct zfs_iocparm { + uint32_t zfs_ioctl_version; + uint64_t zfs_cmd; + uint64_t zfs_cmd_size; +} zfs_iocparm_t; + typedef struct zinject_record_v15 { uint64_t zi_objset; uint64_t zi_object; @@ -148,6 +162,44 @@ typedef struct zfs_cmd_v28 { zfs_stat_t zc_stat; } zfs_cmd_v28_t; +typedef struct zfs_cmd_deadman { + char zc_name[MAXPATHLEN]; + char zc_value[MAXPATHLEN * 2]; + char zc_string[MAXNAMELEN]; + char zc_top_ds[MAXPATHLEN]; + uint64_t zc_guid; + uint64_t zc_nvlist_conf; /* really (char *) */ + uint64_t zc_nvlist_conf_size; + uint64_t zc_nvlist_src; /* really (char *) */ + uint64_t zc_nvlist_src_size; + uint64_t zc_nvlist_dst; /* really (char *) */ + uint64_t zc_nvlist_dst_size; + uint64_t zc_cookie; + uint64_t zc_objset_type; + uint64_t zc_perm_action; + uint64_t zc_history; /* really (char *) */ + uint64_t zc_history_len; + uint64_t zc_history_offset; + uint64_t zc_obj; + uint64_t zc_iflags; /* internal to zfs(7fs) */ + zfs_share_t zc_share; + uint64_t zc_jailid; + dmu_objset_stats_t zc_objset_stats; + struct drr_begin zc_begin_record; + /* zc_inject_record doesn't change in libzfs_core */ + zinject_record_t zc_inject_record; + boolean_t zc_defer_destroy; + boolean_t zc_temphold; + uint64_t zc_action_handle; + int zc_cleanup_fd; + uint8_t zc_simple; + uint8_t zc_pad[3]; /* alignment */ + uint64_t zc_sendobj; + uint64_t zc_fromobj; + uint64_t zc_createtxg; + zfs_stat_t zc_stat; +} zfs_cmd_deadman_t; + #ifdef _KERNEL unsigned static long zfs_ioctl_v15_to_v28[] = { 0, /* 0 ZFS_IOC_POOL_CREATE */ @@ -272,13 +324,17 @@ unsigned static long zfs_ioctl_v28_to_v15[] = { #endif /* ! _KERNEL */ #ifdef _KERNEL -void zfs_ioctl_compat_pre(zfs_cmd_t *, int *, const int); +int zfs_ioctl_compat_pre(zfs_cmd_t *, int *, const int); void zfs_ioctl_compat_post(zfs_cmd_t *, const int, const int); +nvlist_t *zfs_ioctl_compat_innvl(zfs_cmd_t *, nvlist_t *, const int, + const int); +nvlist_t *zfs_ioctl_compat_outnvl(zfs_cmd_t *, nvlist_t *, const int, + const int); #else -int zcmd_ioctl_compat(int, unsigned long, zfs_cmd_t *, const int); +int zcmd_ioctl_compat(int, int, zfs_cmd_t *, const int); #endif /* _KERNEL */ void zfs_cmd_compat_get(zfs_cmd_t *, caddr_t, const int); -void zfs_cmd_compat_put(zfs_cmd_t *, caddr_t, const int); +void zfs_cmd_compat_put(zfs_cmd_t *, caddr_t, const int, const int); #ifdef __cplusplus } diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zprop_common.c b/sys/cddl/contrib/opensolaris/common/zfs/zprop_common.c index 4d7e79c0a..ca2e72c5d 100644 --- a/sys/cddl/contrib/opensolaris/common/zfs/zprop_common.c +++ b/sys/cddl/contrib/opensolaris/common/zfs/zprop_common.c @@ -22,6 +22,9 @@ * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ /* * Common routines used by zfs and zpool property management. @@ -129,7 +132,8 @@ zprop_register_hidden(int prop, const char *name, zprop_type_t type, zprop_attr_t attr, int objset_types, const char *colname) { zprop_register_impl(prop, name, type, 0, NULL, attr, - objset_types, NULL, colname, B_FALSE, B_FALSE, NULL); + objset_types, NULL, colname, + type == PROP_TYPE_NUMBER, B_FALSE, NULL); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/Makefile.files b/sys/cddl/contrib/opensolaris/uts/common/Makefile.files index 40009e3ff..d026de8b6 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/Makefile.files +++ b/sys/cddl/contrib/opensolaris/uts/common/Makefile.files @@ -49,8 +49,10 @@ ZFS_COMMON_OBJS += \ dsl_dir.o \ dsl_dataset.o \ dsl_deadlist.o \ + dsl_destroy.o \ dsl_pool.o \ dsl_synctask.o \ + dsl_userhold.o \ dmu_zfetch.o \ dsl_deleg.o \ dsl_prop.o \ @@ -61,6 +63,7 @@ ZFS_COMMON_OBJS += \ lzjb.o \ metaslab.o \ refcount.o \ + rrwlock.o \ sa.o \ sha256.o \ spa.o \ @@ -120,7 +123,6 @@ ZFS_OBJS += \ zfs_onexit.o \ zfs_replay.o \ zfs_rlock.o \ - rrwlock.o \ zfs_vfsops.o \ zfs_vnops.o \ zvol.o diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c index a7db20998..8d8d7d06b 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ /* @@ -1787,12 +1787,12 @@ arc_buf_free(arc_buf_t *buf, void *tag) } } -int +boolean_t arc_buf_remove_ref(arc_buf_t *buf, void* tag) { arc_buf_hdr_t *hdr = buf->b_hdr; kmutex_t *hash_lock = HDR_LOCK(hdr); - int no_callback = (buf->b_efunc == NULL); + boolean_t no_callback = (buf->b_efunc == NULL); if (hdr->b_state == arc_anon) { ASSERT(hdr->b_datacnt == 1); @@ -2042,7 +2042,7 @@ evict_start: ARCSTAT_INCR(arcstat_mutex_miss, missed); /* - * We have just evicted some date into the ghost state, make + * We have just evicted some data into the ghost state, make * sure we also adjust the ghost state size if necessary. */ if (arc_no_grow && @@ -2875,7 +2875,7 @@ arc_bcopy_func(zio_t *zio, arc_buf_t *buf, void *arg) { if (zio == NULL || zio->io_error == 0) bcopy(buf->b_data, arg, buf->b_hdr->b_size); - VERIFY(arc_buf_remove_ref(buf, arg) == 1); + VERIFY(arc_buf_remove_ref(buf, arg)); } /* a generic arc_done_func_t */ @@ -2884,7 +2884,7 @@ arc_getbuf_func(zio_t *zio, arc_buf_t *buf, void *arg) { arc_buf_t **bufp = arg; if (zio && zio->io_error) { - VERIFY(arc_buf_remove_ref(buf, arg) == 1); + VERIFY(arc_buf_remove_ref(buf, arg)); *bufp = NULL; } else { *bufp = buf; @@ -3733,14 +3733,14 @@ arc_memory_throttle(uint64_t reserve, uint64_t inflight_data, uint64_t txg) */ if (curproc == pageproc) { if (page_load > available_memory / 4) - return (ERESTART); + return (SET_ERROR(ERESTART)); /* Note: reserve is inflated, so we deflate */ page_load += reserve / 8; return (0); } else if (page_load > 0 && arc_reclaim_needed()) { /* memory is low, delay before restarting */ ARCSTAT_INCR(arcstat_memory_throttle_count, 1); - return (EAGAIN); + return (SET_ERROR(EAGAIN)); } page_load = 0; @@ -3755,7 +3755,7 @@ arc_memory_throttle(uint64_t reserve, uint64_t inflight_data, uint64_t txg) if (inflight_data > available_memory / 4) { ARCSTAT_INCR(arcstat_memory_throttle_count, 1); - return (ERESTART); + return (SET_ERROR(ERESTART)); } #endif return (0); @@ -3780,13 +3780,13 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg) */ if (spa_get_random(10000) == 0) { dprintf("forcing random failure\n"); - return (ERESTART); + return (SET_ERROR(ERESTART)); } #endif if (reserve > arc_c/4 && !arc_no_grow) arc_c = MIN(arc_c_max, reserve * 4); if (reserve > arc_c) - return (ENOMEM); + return (SET_ERROR(ENOMEM)); /* * Don't count loaned bufs as in flight dirty data to prevent long @@ -3819,7 +3819,7 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg) arc_anon->arcs_lsize[ARC_BUFC_METADATA]>>10, arc_anon->arcs_lsize[ARC_BUFC_DATA]>>10, reserve>>10, arc_c>>10); - return (ERESTART); + return (SET_ERROR(ERESTART)); } atomic_add_64(&arc_tempreserve, reserve); return (0); @@ -4511,7 +4511,7 @@ l2arc_read_done(zio_t *zio) if (zio->io_error != 0) { ARCSTAT_BUMP(arcstat_l2_io_error); } else { - zio->io_error = EIO; + zio->io_error = SET_ERROR(EIO); } if (!equal) ARCSTAT_BUMP(arcstat_l2_cksum_bad); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bplist.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bplist.c index 066ccc6b1..ee12db3a2 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bplist.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bplist.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -52,6 +53,12 @@ bplist_append(bplist_t *bpl, const blkptr_t *bp) mutex_exit(&bpl->bpl_lock); } +/* + * To aid debugging, we keep the most recently removed entry. This way if + * we are in the callback, we can easily locate the entry. + */ +static bplist_entry_t *bplist_iterate_last_removed; + void bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx) { @@ -59,6 +66,7 @@ bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx) mutex_enter(&bpl->bpl_lock); while (bpe = list_head(&bpl->bpl_list)) { + bplist_iterate_last_removed = bpe; list_remove(&bpl->bpl_list, bpe); mutex_exit(&bpl->bpl_lock); func(arg, &bpe->bpe_blk, tx); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c index 3e55663c9..1b9baa779 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c @@ -392,6 +392,10 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx); } + dmu_object_info_t doi; + ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi)); + ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ); + mutex_enter(&bpo->bpo_lock); dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c index 2dc614745..44aca76c7 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -39,7 +39,7 @@ #include static void dbuf_destroy(dmu_buf_impl_t *db); -static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx); +static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx); static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx); /* @@ -499,7 +499,7 @@ dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb) } else { ASSERT(db->db_blkid != DMU_BONUS_BLKID); ASSERT3P(db->db_buf, ==, NULL); - VERIFY(arc_buf_remove_ref(buf, db) == 1); + VERIFY(arc_buf_remove_ref(buf, db)); db->db_state = DB_UNCACHED; } cv_broadcast(&db->db_changed); @@ -598,7 +598,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) ASSERT(!refcount_is_zero(&db->db_holds)); if (db->db_state == DB_NOFILL) - return (EIO); + return (SET_ERROR(EIO)); DB_DNODE_ENTER(db); dn = DB_DNODE(db); @@ -655,7 +655,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) cv_wait(&db->db_changed, &db->db_mtx); } if (db->db_state == DB_UNCACHED) - err = EIO; + err = SET_ERROR(EIO); } mutex_exit(&db->db_mtx); } @@ -828,10 +828,12 @@ dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx) continue; /* found a level 0 buffer in the range */ - if (dbuf_undirty(db, tx)) + mutex_enter(&db->db_mtx); + if (dbuf_undirty(db, tx)) { + /* mutex has been dropped and dbuf destroyed */ continue; + } - mutex_enter(&db->db_mtx); if (db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL || db->db_state == DB_EVICTING) { @@ -958,7 +960,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx) mutex_enter(&db->db_mtx); dbuf_set_data(db, buf); - VERIFY(arc_buf_remove_ref(obuf, db) == 1); + VERIFY(arc_buf_remove_ref(obuf, db)); db->db.db_size = size; if (db->db_level == 0) { @@ -1258,7 +1260,10 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) return (dr); } -static int +/* + * Return TRUE if this evicted the dbuf. + */ +static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) { dnode_t *dn; @@ -1267,18 +1272,17 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) ASSERT(txg != 0); ASSERT(db->db_blkid != DMU_BONUS_BLKID); + ASSERT0(db->db_level); + ASSERT(MUTEX_HELD(&db->db_mtx)); - mutex_enter(&db->db_mtx); /* * If this buffer is not dirty, we're done. */ for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next) if (dr->dr_txg <= txg) break; - if (dr == NULL || dr->dr_txg < txg) { - mutex_exit(&db->db_mtx); - return (0); - } + if (dr == NULL || dr->dr_txg < txg) + return (B_FALSE); ASSERT(dr->dr_txg == txg); ASSERT(dr->dr_dbuf == db); @@ -1286,24 +1290,12 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) dn = DB_DNODE(db); /* - * If this buffer is currently held, we cannot undirty - * it, since one of the current holders may be in the - * middle of an update. Note that users of dbuf_undirty() - * should not place a hold on the dbuf before the call. - * Also note: we can get here with a spill block, so - * test for that similar to how dbuf_dirty does. + * Note: This code will probably work even if there are concurrent + * holders, but it is untested in that scenerio, as the ZPL and + * ztest have additional locking (the range locks) that prevents + * that type of concurrent access. */ - if (refcount_count(&db->db_holds) > db->db_dirtycnt) { - mutex_exit(&db->db_mtx); - /* Make sure we don't toss this buffer at sync phase */ - if (db->db_blkid != DMU_SPILL_BLKID) { - mutex_enter(&dn->dn_mtx); - dnode_clear_range(dn, db->db_blkid, 1, tx); - mutex_exit(&dn->dn_mtx); - } - DB_DNODE_EXIT(db); - return (0); - } + ASSERT3U(refcount_count(&db->db_holds), ==, db->db_dirtycnt); dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); @@ -1332,21 +1324,13 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) } DB_DNODE_EXIT(db); - if (db->db_level == 0) { - if (db->db_state != DB_NOFILL) { - dbuf_unoverride(dr); + if (db->db_state != DB_NOFILL) { + dbuf_unoverride(dr); - ASSERT(db->db_buf != NULL); - ASSERT(dr->dt.dl.dr_data != NULL); - if (dr->dt.dl.dr_data != db->db_buf) - VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, - db) == 1); - } - } else { ASSERT(db->db_buf != NULL); - ASSERT(list_head(&dr->dt.di.dr_children) == NULL); - mutex_destroy(&dr->dt.di.dr_mtx); - list_destroy(&dr->dt.di.dr_children); + ASSERT(dr->dt.dl.dr_data != NULL); + if (dr->dt.dl.dr_data != db->db_buf) + VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db)); } kmem_free(dr, sizeof (dbuf_dirty_record_t)); @@ -1358,13 +1342,12 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) ASSERT(db->db_state == DB_NOFILL || arc_released(buf)); dbuf_set_data(db, NULL); - VERIFY(arc_buf_remove_ref(buf, db) == 1); + VERIFY(arc_buf_remove_ref(buf, db)); dbuf_evict(db); - return (1); + return (B_TRUE); } - mutex_exit(&db->db_mtx); - return (0); + return (B_FALSE); } #pragma weak dmu_buf_will_dirty = dbuf_will_dirty @@ -1463,7 +1446,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) mutex_exit(&db->db_mtx); (void) dbuf_dirty(db, tx); bcopy(buf->b_data, db->db.db_data, db->db.db_size); - VERIFY(arc_buf_remove_ref(buf, db) == 1); + VERIFY(arc_buf_remove_ref(buf, db)); xuio_stat_wbuf_copied(); return; } @@ -1481,10 +1464,10 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) arc_release(db->db_buf, db); } dr->dt.dl.dr_data = buf; - VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1); + VERIFY(arc_buf_remove_ref(db->db_buf, db)); } else if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) { arc_release(db->db_buf, db); - VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1); + VERIFY(arc_buf_remove_ref(db->db_buf, db)); } db->db_buf = NULL; } @@ -1610,7 +1593,7 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse, if (level >= nlevels || (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) { /* the buffer has no parent yet */ - return (ENOENT); + return (SET_ERROR(ENOENT)); } else if (level < nlevels-1) { /* this block is referenced from an indirect block */ int err = dbuf_hold_impl(dn, level+1, @@ -1861,7 +1844,7 @@ top: err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp); if (fail_sparse) { if (err == 0 && bp && BP_IS_HOLE(bp)) - err = ENOENT; + err = SET_ERROR(ENOENT); if (err) { if (parent) dbuf_rele(parent, NULL); @@ -1958,7 +1941,7 @@ dbuf_spill_set_blksz(dmu_buf_t *db_fake, uint64_t blksz, dmu_tx_t *tx) dnode_t *dn; if (db->db_blkid != DMU_SPILL_BLKID) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); if (blksz == 0) blksz = SPA_MINBLOCKSIZE; if (blksz > SPA_MAXBLOCKSIZE) @@ -2067,10 +2050,10 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) * This dbuf has anonymous data associated with it. */ dbuf_set_data(db, NULL); - VERIFY(arc_buf_remove_ref(buf, db) == 1); + VERIFY(arc_buf_remove_ref(buf, db)); dbuf_evict(db); } else { - VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0); + VERIFY(!arc_buf_remove_ref(db->db_buf, db)); /* * A dbuf will be eligible for eviction if either the @@ -2571,7 +2554,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) if (db->db_state != DB_NOFILL) { if (dr->dt.dl.dr_data != db->db_buf) VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, - db) == 1); + db)); else if (!arc_released(db->db_buf)) arc_set_callback(db->db_buf, dbuf_do_evict, db); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c index 885af958d..f6ab09494 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -174,7 +174,7 @@ ddt_object_lookup(ddt_t *ddt, enum ddt_type type, enum ddt_class class, ddt_entry_t *dde) { if (!ddt_object_exists(ddt, type, class)) - return (ENOENT); + return (SET_ERROR(ENOENT)); return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os, ddt->ddt_object[type][class], dde)); @@ -235,7 +235,7 @@ ddt_object_info(ddt_t *ddt, enum ddt_type type, enum ddt_class class, dmu_object_info_t *doi) { if (!ddt_object_exists(ddt, type, class)) - return (ENOENT); + return (SET_ERROR(ENOENT)); return (dmu_object_info(ddt->ddt_os, ddt->ddt_object[type][class], doi)); @@ -1157,5 +1157,5 @@ ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde) ddb->ddb_type = 0; } while (++ddb->ddb_class < DDT_CLASSES); - return (ENOENT); + return (SET_ERROR(ENOENT)); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c index 60c5c7f39..c5654ad4e 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -146,7 +146,7 @@ dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset, db = dbuf_hold(dn, blkid, tag); rw_exit(&dn->dn_struct_rwlock); if (db == NULL) { - err = EIO; + err = SET_ERROR(EIO); } else { err = dbuf_read(db, NULL, db_flags); if (err) { @@ -177,9 +177,9 @@ dmu_set_bonus(dmu_buf_t *db_fake, int newsize, dmu_tx_t *tx) dn = DB_DNODE(db); if (dn->dn_bonus != db) { - error = EINVAL; + error = SET_ERROR(EINVAL); } else if (newsize < 0 || newsize > db_fake->db_size) { - error = EINVAL; + error = SET_ERROR(EINVAL); } else { dnode_setbonuslen(dn, newsize, tx); error = 0; @@ -200,9 +200,9 @@ dmu_set_bonustype(dmu_buf_t *db_fake, dmu_object_type_t type, dmu_tx_t *tx) dn = DB_DNODE(db); if (!DMU_OT_IS_VALID(type)) { - error = EINVAL; + error = SET_ERROR(EINVAL); } else if (dn->dn_bonus != db) { - error = EINVAL; + error = SET_ERROR(EINVAL); } else { dnode_setbonus_type(dn, type, tx); error = 0; @@ -329,12 +329,12 @@ dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp) dn = DB_DNODE(db); if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_SA) { - err = EINVAL; + err = SET_ERROR(EINVAL); } else { rw_enter(&dn->dn_struct_rwlock, RW_READER); if (!dn->dn_have_spill) { - err = ENOENT; + err = SET_ERROR(ENOENT); } else { err = dmu_spill_hold_by_dnode(dn, DB_RF_HAVESTRUCT | DB_RF_CANFAIL, tag, dbp); @@ -400,7 +400,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, (longlong_t)dn->dn_object, dn->dn_datablksz, (longlong_t)offset, (longlong_t)length); rw_exit(&dn->dn_struct_rwlock); - return (EIO); + return (SET_ERROR(EIO)); } nblks = 1; } @@ -417,7 +417,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, rw_exit(&dn->dn_struct_rwlock); dmu_buf_rele_array(dbp, nblks, tag); zio_nowait(zio); - return (EIO); + return (SET_ERROR(EIO)); } /* initiate async i/o */ if (read) @@ -449,7 +449,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, db->db_state == DB_FILL) cv_wait(&db->db_changed, &db->db_mtx); if (db->db_state == DB_UNCACHED) - err = EIO; + err = SET_ERROR(EIO); mutex_exit(&db->db_mtx); if (err) { dmu_buf_rele_array(dbp, nblks, tag); @@ -1204,7 +1204,7 @@ void dmu_return_arcbuf(arc_buf_t *buf) { arc_return_buf(buf, FTAG); - VERIFY(arc_buf_remove_ref(buf, FTAG) == 1); + VERIFY(arc_buf_remove_ref(buf, FTAG)); } /* @@ -1363,7 +1363,8 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd, dmu_tx_hold_space(tx, zgd->zgd_db->db_size); if (dmu_tx_assign(tx, TXG_WAIT) != 0) { dmu_tx_abort(tx); - return (EIO); /* Make zl_get_data do txg_waited_synced() */ + /* Make zl_get_data do txg_waited_synced() */ + return (SET_ERROR(EIO)); } dsa = kmem_alloc(sizeof (dmu_sync_arg_t), KM_SLEEP); @@ -1448,7 +1449,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) * This txg has already synced. There's nothing to do. */ mutex_exit(&db->db_mtx); - return (EEXIST); + return (SET_ERROR(EEXIST)); } if (txg <= spa_syncing_txg(os->os_spa)) { @@ -1470,7 +1471,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) * There's no need to log writes to freed blocks, so we're done. */ mutex_exit(&db->db_mtx); - return (ENOENT); + return (SET_ERROR(ENOENT)); } ASSERT(dr->dr_next == NULL || dr->dr_next->dr_txg < txg); @@ -1499,7 +1500,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) * have been dirtied since, or we would have cleared the state. */ mutex_exit(&db->db_mtx); - return (EALREADY); + return (SET_ERROR(EALREADY)); } ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c index e028a1077..945641b4b 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -135,7 +136,7 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, int err = 0; if (issig(JUSTLOOKING) && issig(FORREAL)) - return (EINTR); + return (SET_ERROR(EINTR)); if (zb->zb_object != DMU_META_DNODE_OBJECT) return (0); @@ -158,7 +159,7 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &aflags, zb) != 0) - return (EIO); + return (SET_ERROR(EIO)); blk = abuf->b_data; for (i = 0; i < blksz >> DNODE_SHIFT; i++) { @@ -178,51 +179,53 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, } int -dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct file *fp, offset_t *offp) +dmu_diff(const char *tosnap_name, const char *fromsnap_name, +#ifdef illumos + struct vnode *vp, offset_t *offp) +#else + struct file *fp, offset_t *offp) +#endif { struct diffarg da; - dsl_dataset_t *ds = tosnap->os_dsl_dataset; - dsl_dataset_t *fromds = fromsnap->os_dsl_dataset; - dsl_dataset_t *findds; - dsl_dataset_t *relds; - int err = 0; - - /* make certain we are looking at snapshots */ - if (!dsl_dataset_is_snapshot(ds) || !dsl_dataset_is_snapshot(fromds)) - return (EINVAL); - - /* fromsnap must be earlier and from the same lineage as tosnap */ - if (fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg) - return (EXDEV); - - relds = NULL; - findds = ds; - - while (fromds->ds_dir != findds->ds_dir) { - dsl_pool_t *dp = ds->ds_dir->dd_pool; - - if (!dsl_dir_is_clone(findds->ds_dir)) { - if (relds) - dsl_dataset_rele(relds, FTAG); - return (EXDEV); - } - - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_dataset_hold_obj(dp, - findds->ds_dir->dd_phys->dd_origin_obj, FTAG, &findds); - rw_exit(&dp->dp_config_rwlock); - - if (relds) - dsl_dataset_rele(relds, FTAG); + dsl_dataset_t *fromsnap; + dsl_dataset_t *tosnap; + dsl_pool_t *dp; + int error; + uint64_t fromtxg; + + if (strchr(tosnap_name, '@') == NULL || + strchr(fromsnap_name, '@') == NULL) + return (SET_ERROR(EINVAL)); + + error = dsl_pool_hold(tosnap_name, FTAG, &dp); + if (error != 0) + return (error); + + error = dsl_dataset_hold(dp, tosnap_name, FTAG, &tosnap); + if (error != 0) { + dsl_pool_rele(dp, FTAG); + return (error); + } - if (err) - return (EXDEV); + error = dsl_dataset_hold(dp, fromsnap_name, FTAG, &fromsnap); + if (error != 0) { + dsl_dataset_rele(tosnap, FTAG); + dsl_pool_rele(dp, FTAG); + return (error); + } - relds = findds; + if (!dsl_dataset_is_before(tosnap, fromsnap)) { + dsl_dataset_rele(fromsnap, FTAG); + dsl_dataset_rele(tosnap, FTAG); + dsl_pool_rele(dp, FTAG); + return (SET_ERROR(EXDEV)); } - if (relds) - dsl_dataset_rele(relds, FTAG); + fromtxg = fromsnap->ds_phys->ds_creation_txg; + dsl_dataset_rele(fromsnap, FTAG); + + dsl_dataset_long_hold(tosnap, FTAG); + dsl_pool_rele(dp, FTAG); da.da_fp = fp; da.da_offp = offp; @@ -231,15 +234,18 @@ dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct file *fp, offset_t *offp) da.da_err = 0; da.da_td = curthread; - err = traverse_dataset(ds, fromds->ds_phys->ds_creation_txg, + error = traverse_dataset(tosnap, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, diff_cb, &da); - if (err) { - da.da_err = err; + if (error != 0) { + da.da_err = error; } else { /* we set the da.da_err we return as side-effect */ (void) write_record(&da); } + dsl_dataset_long_rele(tosnap, FTAG); + dsl_dataset_rele(tosnap, FTAG); + return (da.da_err); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c index 8dff46048..11561f27c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -90,7 +91,7 @@ dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot, int err; if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx)) - return (EBADF); + return (SET_ERROR(EBADF)); err = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, FTAG, &dn); if (err) @@ -112,7 +113,7 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, int err; if (object == DMU_META_DNODE_OBJECT) - return (EBADF); + return (SET_ERROR(EBADF)); err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, FTAG, &dn); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c index 3cc3c86d6..4f8c265ab 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -43,6 +44,7 @@ #include #include #include +#include /* * Needed to close a window in dnode_move() that allows the objset to be freed @@ -279,11 +281,11 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, err = arc_read(NULL, spa, os->os_rootbp, arc_getbuf_func, &os->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); - if (err) { + if (err != 0) { kmem_free(os, sizeof (objset_t)); /* convert checksum errors into IO errors */ if (err == ECKSUM) - err = EIO; + err = SET_ERROR(EIO); return (err); } @@ -319,34 +321,49 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, * checksum/compression/copies. */ if (ds) { - err = dsl_prop_register(ds, "primarycache", + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), primary_cache_changed_cb, os); - if (err == 0) - err = dsl_prop_register(ds, "secondarycache", + if (err == 0) { + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE), secondary_cache_changed_cb, os); + } if (!dsl_dataset_is_snapshot(ds)) { - if (err == 0) - err = dsl_prop_register(ds, "checksum", + if (err == 0) { + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum_changed_cb, os); - if (err == 0) - err = dsl_prop_register(ds, "compression", + } + if (err == 0) { + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_COMPRESSION), compression_changed_cb, os); - if (err == 0) - err = dsl_prop_register(ds, "copies", + } + if (err == 0) { + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_COPIES), copies_changed_cb, os); - if (err == 0) - err = dsl_prop_register(ds, "dedup", + } + if (err == 0) { + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_DEDUP), dedup_changed_cb, os); - if (err == 0) - err = dsl_prop_register(ds, "logbias", + } + if (err == 0) { + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_LOGBIAS), logbias_changed_cb, os); - if (err == 0) - err = dsl_prop_register(ds, "sync", + } + if (err == 0) { + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_SYNC), sync_changed_cb, os); + } } - if (err) { + if (err != 0) { VERIFY(arc_buf_remove_ref(os->os_phys_buf, - &os->os_phys_buf) == 1); + &os->os_phys_buf)); kmem_free(os, sizeof (objset_t)); return (err); } @@ -424,45 +441,67 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp) return (err); } -/* called from zpl */ +/* + * Holds the pool while the objset is held. Therefore only one objset + * can be held at a time. + */ int dmu_objset_hold(const char *name, void *tag, objset_t **osp) { + dsl_pool_t *dp; dsl_dataset_t *ds; int err; - err = dsl_dataset_hold(name, tag, &ds); - if (err) + err = dsl_pool_hold(name, tag, &dp); + if (err != 0) return (err); + err = dsl_dataset_hold(dp, name, tag, &ds); + if (err != 0) { + dsl_pool_rele(dp, tag); + return (err); + } err = dmu_objset_from_ds(ds, osp); - if (err) + if (err != 0) { dsl_dataset_rele(ds, tag); + dsl_pool_rele(dp, tag); + } return (err); } -/* called from zpl */ +/* + * dsl_pool must not be held when this is called. + * Upon successful return, there will be a longhold on the dataset, + * and the dsl_pool will not be held. + */ int dmu_objset_own(const char *name, dmu_objset_type_t type, boolean_t readonly, void *tag, objset_t **osp) { + dsl_pool_t *dp; dsl_dataset_t *ds; int err; - err = dsl_dataset_own(name, B_FALSE, tag, &ds); - if (err) + err = dsl_pool_hold(name, FTAG, &dp); + if (err != 0) + return (err); + err = dsl_dataset_own(dp, name, tag, &ds); + if (err != 0) { + dsl_pool_rele(dp, FTAG); return (err); + } err = dmu_objset_from_ds(ds, osp); - if (err) { + dsl_pool_rele(dp, FTAG); + if (err != 0) { dsl_dataset_disown(ds, tag); } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) { - dmu_objset_disown(*osp, tag); - return (EINVAL); + dsl_dataset_disown(ds, tag); + return (SET_ERROR(EINVAL)); } else if (!readonly && dsl_dataset_is_snapshot(ds)) { - dmu_objset_disown(*osp, tag); - return (EROFS); + dsl_dataset_disown(ds, tag); + return (SET_ERROR(EROFS)); } return (err); } @@ -470,7 +509,9 @@ dmu_objset_own(const char *name, dmu_objset_type_t type, void dmu_objset_rele(objset_t *os, void *tag) { + dsl_pool_t *dp = dmu_objset_pool(os); dsl_dataset_rele(os->os_dsl_dataset, tag); + dsl_pool_rele(dp, tag); } void @@ -479,7 +520,7 @@ dmu_objset_disown(objset_t *os, void *tag) dsl_dataset_disown(os->os_dsl_dataset, tag); } -int +void dmu_objset_evict_dbufs(objset_t *os) { dnode_t *dn; @@ -514,9 +555,7 @@ dmu_objset_evict_dbufs(objset_t *os) mutex_enter(&os->os_lock); dn = next_dn; } - dn = list_head(&os->os_dnodes); mutex_exit(&os->os_lock); - return (dn != DMU_META_DNODE(os)); } void @@ -529,33 +568,37 @@ dmu_objset_evict(objset_t *os) if (ds) { if (!dsl_dataset_is_snapshot(ds)) { - VERIFY(0 == dsl_prop_unregister(ds, "checksum", + VERIFY0(dsl_prop_unregister(ds, + zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum_changed_cb, os)); - VERIFY(0 == dsl_prop_unregister(ds, "compression", + VERIFY0(dsl_prop_unregister(ds, + zfs_prop_to_name(ZFS_PROP_COMPRESSION), compression_changed_cb, os)); - VERIFY(0 == dsl_prop_unregister(ds, "copies", + VERIFY0(dsl_prop_unregister(ds, + zfs_prop_to_name(ZFS_PROP_COPIES), copies_changed_cb, os)); - VERIFY(0 == dsl_prop_unregister(ds, "dedup", + VERIFY0(dsl_prop_unregister(ds, + zfs_prop_to_name(ZFS_PROP_DEDUP), dedup_changed_cb, os)); - VERIFY(0 == dsl_prop_unregister(ds, "logbias", + VERIFY0(dsl_prop_unregister(ds, + zfs_prop_to_name(ZFS_PROP_LOGBIAS), logbias_changed_cb, os)); - VERIFY(0 == dsl_prop_unregister(ds, "sync", + VERIFY0(dsl_prop_unregister(ds, + zfs_prop_to_name(ZFS_PROP_SYNC), sync_changed_cb, os)); } - VERIFY(0 == dsl_prop_unregister(ds, "primarycache", + VERIFY0(dsl_prop_unregister(ds, + zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), primary_cache_changed_cb, os)); - VERIFY(0 == dsl_prop_unregister(ds, "secondarycache", + VERIFY0(dsl_prop_unregister(ds, + zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE), secondary_cache_changed_cb, os)); } if (os->os_sa) sa_tear_down(os); - /* - * We should need only a single pass over the dnode list, since - * nothing can be added to the list at this point. - */ - (void) dmu_objset_evict_dbufs(os); + dmu_objset_evict_dbufs(os); dnode_special_close(&os->os_meta_dnode); if (DMU_USERUSED_DNODE(os)) { @@ -566,7 +609,7 @@ dmu_objset_evict(objset_t *os) ASSERT3P(list_head(&os->os_dnodes), ==, NULL); - VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1); + VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf)); /* * This is a barrier to prevent the objset from going away in @@ -598,10 +641,11 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, dnode_t *mdn; ASSERT(dmu_tx_is_syncing(tx)); + if (ds != NULL) - VERIFY(0 == dmu_objset_from_ds(ds, &os)); + VERIFY0(dmu_objset_from_ds(ds, &os)); else - VERIFY(0 == dmu_objset_open_impl(spa, NULL, bp, &os)); + VERIFY0(dmu_objset_open_impl(spa, NULL, bp, &os)); mdn = DMU_META_DNODE(os); @@ -649,371 +693,194 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, return (os); } -struct oscarg { - void (*userfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); - void *userarg; - dsl_dataset_t *clone_origin; - const char *lastname; - dmu_objset_type_t type; - uint64_t flags; - cred_t *cr; -}; +typedef struct dmu_objset_create_arg { + const char *doca_name; + cred_t *doca_cred; + void (*doca_userfunc)(objset_t *os, void *arg, + cred_t *cr, dmu_tx_t *tx); + void *doca_userarg; + dmu_objset_type_t doca_type; + uint64_t doca_flags; +} dmu_objset_create_arg_t; /*ARGSUSED*/ static int -dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx) +dmu_objset_create_check(void *arg, dmu_tx_t *tx) { - dsl_dir_t *dd = arg1; - struct oscarg *oa = arg2; - objset_t *mos = dd->dd_pool->dp_meta_objset; - int err; - uint64_t ddobj; - - err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, - oa->lastname, sizeof (uint64_t), 1, &ddobj); - if (err != ENOENT) - return (err ? err : EEXIST); + dmu_objset_create_arg_t *doca = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dir_t *pdd; + const char *tail; + int error; - if (oa->clone_origin != NULL) { - /* You can't clone across pools. */ - if (oa->clone_origin->ds_dir->dd_pool != dd->dd_pool) - return (EXDEV); + if (strchr(doca->doca_name, '@') != NULL) + return (SET_ERROR(EINVAL)); - /* You can only clone snapshots, not the head datasets. */ - if (!dsl_dataset_is_snapshot(oa->clone_origin)) - return (EINVAL); + error = dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail); + if (error != 0) + return (error); + if (tail == NULL) { + dsl_dir_rele(pdd, FTAG); + return (SET_ERROR(EEXIST)); } + dsl_dir_rele(pdd, FTAG); return (0); } static void -dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dmu_objset_create_sync(void *arg, dmu_tx_t *tx) { - dsl_dir_t *dd = arg1; - spa_t *spa = dd->dd_pool->dp_spa; - struct oscarg *oa = arg2; + dmu_objset_create_arg_t *doca = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dir_t *pdd; + const char *tail; + dsl_dataset_t *ds; uint64_t obj; + blkptr_t *bp; + objset_t *os; - ASSERT(dmu_tx_is_syncing(tx)); - - obj = dsl_dataset_create_sync(dd, oa->lastname, - oa->clone_origin, oa->flags, oa->cr, tx); - - if (oa->clone_origin == NULL) { - dsl_pool_t *dp = dd->dd_pool; - dsl_dataset_t *ds; - blkptr_t *bp; - objset_t *os; + VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail)); - VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); - bp = dsl_dataset_get_blkptr(ds); - ASSERT(BP_IS_HOLE(bp)); + obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags, + doca->doca_cred, tx); - os = dmu_objset_create_impl(spa, ds, bp, oa->type, tx); + VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); + bp = dsl_dataset_get_blkptr(ds); + os = dmu_objset_create_impl(pdd->dd_pool->dp_spa, + ds, bp, doca->doca_type, tx); - if (oa->userfunc) - oa->userfunc(os, oa->userarg, oa->cr, tx); - dsl_dataset_rele(ds, FTAG); + if (doca->doca_userfunc != NULL) { + doca->doca_userfunc(os, doca->doca_userarg, + doca->doca_cred, tx); } - spa_history_log_internal(LOG_DS_CREATE, spa, tx, "dataset = %llu", obj); + spa_history_log_internal_ds(ds, "create", tx, ""); + dsl_dataset_rele(ds, FTAG); + dsl_dir_rele(pdd, FTAG); } int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg) { - dsl_dir_t *pdd; - const char *tail; - int err = 0; - struct oscarg oa = { 0 }; - - ASSERT(strchr(name, '@') == NULL); - err = dsl_dir_open(name, FTAG, &pdd, &tail); - if (err) - return (err); - if (tail == NULL) { - dsl_dir_close(pdd, FTAG); - return (EEXIST); - } + dmu_objset_create_arg_t doca; - oa.userfunc = func; - oa.userarg = arg; - oa.lastname = tail; - oa.type = type; - oa.flags = flags; - oa.cr = CRED(); + doca.doca_name = name; + doca.doca_cred = CRED(); + doca.doca_flags = flags; + doca.doca_userfunc = func; + doca.doca_userarg = arg; + doca.doca_type = type; - err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, - dmu_objset_create_sync, pdd, &oa, 5); - dsl_dir_close(pdd, FTAG); - return (err); + return (dsl_sync_task(name, + dmu_objset_create_check, dmu_objset_create_sync, &doca, 5)); } -int -dmu_objset_clone(const char *name, dsl_dataset_t *clone_origin, uint64_t flags) +typedef struct dmu_objset_clone_arg { + const char *doca_clone; + const char *doca_origin; + cred_t *doca_cred; +} dmu_objset_clone_arg_t; + +/*ARGSUSED*/ +static int +dmu_objset_clone_check(void *arg, dmu_tx_t *tx) { + dmu_objset_clone_arg_t *doca = arg; dsl_dir_t *pdd; const char *tail; - int err = 0; - struct oscarg oa = { 0 }; + int error; + dsl_dataset_t *origin; + dsl_pool_t *dp = dmu_tx_pool(tx); - ASSERT(strchr(name, '@') == NULL); - err = dsl_dir_open(name, FTAG, &pdd, &tail); - if (err) - return (err); + if (strchr(doca->doca_clone, '@') != NULL) + return (SET_ERROR(EINVAL)); + + error = dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail); + if (error != 0) + return (error); if (tail == NULL) { - dsl_dir_close(pdd, FTAG); - return (EEXIST); + dsl_dir_rele(pdd, FTAG); + return (SET_ERROR(EEXIST)); } - - oa.lastname = tail; - oa.clone_origin = clone_origin; - oa.flags = flags; - oa.cr = CRED(); - - err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, - dmu_objset_create_sync, pdd, &oa, 5); - dsl_dir_close(pdd, FTAG); - return (err); -} - -int -dmu_objset_destroy(const char *name, boolean_t defer) -{ - dsl_dataset_t *ds; - int error; - - error = dsl_dataset_own(name, B_TRUE, FTAG, &ds); - if (error == 0) { - error = dsl_dataset_destroy(ds, FTAG, defer); - /* dsl_dataset_destroy() closes the ds. */ + /* You can't clone across pools. */ + if (pdd->dd_pool != dp) { + dsl_dir_rele(pdd, FTAG); + return (SET_ERROR(EXDEV)); } + dsl_dir_rele(pdd, FTAG); - return (error); -} - -struct snaparg { - dsl_sync_task_group_t *dstg; - char *snapname; - char *htag; - char failed[MAXPATHLEN]; - boolean_t recursive; - boolean_t needsuspend; - boolean_t temporary; - nvlist_t *props; - struct dsl_ds_holdarg *ha; /* only needed in the temporary case */ - dsl_dataset_t *newds; -}; - -static int -snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) -{ - objset_t *os = arg1; - struct snaparg *sn = arg2; - int error; - - /* The props have already been checked by zfs_check_userprops(). */ - - error = dsl_dataset_snapshot_check(os->os_dsl_dataset, - sn->snapname, tx); - if (error) + error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin); + if (error != 0) return (error); - if (sn->temporary) { - /* - * Ideally we would just call - * dsl_dataset_user_hold_check() and - * dsl_dataset_destroy_check() here. However the - * dataset we want to hold and destroy is the snapshot - * that we just confirmed we can create, but it won't - * exist until after these checks are run. Do any - * checks we can here and if more checks are added to - * those routines in the future, similar checks may be - * necessary here. - */ - if (spa_version(os->os_spa) < SPA_VERSION_USERREFS) - return (ENOTSUP); - /* - * Not checking number of tags because the tag will be - * unique, as it will be the only tag. - */ - if (strlen(sn->htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) - return (E2BIG); + /* You can't clone across pools. */ + if (origin->ds_dir->dd_pool != dp) { + dsl_dataset_rele(origin, FTAG); + return (SET_ERROR(EXDEV)); + } - sn->ha = kmem_alloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); - sn->ha->temphold = B_TRUE; - sn->ha->htag = sn->htag; + /* You can only clone snapshots, not the head datasets. */ + if (!dsl_dataset_is_snapshot(origin)) { + dsl_dataset_rele(origin, FTAG); + return (SET_ERROR(EINVAL)); } - return (error); + dsl_dataset_rele(origin, FTAG); + + return (0); } static void -snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dmu_objset_clone_sync(void *arg, dmu_tx_t *tx) { - objset_t *os = arg1; - dsl_dataset_t *ds = os->os_dsl_dataset; - struct snaparg *sn = arg2; - - dsl_dataset_snapshot_sync(ds, sn->snapname, tx); - - if (sn->props) { - dsl_props_arg_t pa; - pa.pa_props = sn->props; - pa.pa_source = ZPROP_SRC_LOCAL; - dsl_props_set_sync(ds->ds_prev, &pa, tx); - } + dmu_objset_clone_arg_t *doca = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dir_t *pdd; + const char *tail; + dsl_dataset_t *origin, *ds; + uint64_t obj; + char namebuf[MAXNAMELEN]; - if (sn->temporary) { - struct dsl_ds_destroyarg da; + VERIFY0(dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail)); + VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin)); - dsl_dataset_user_hold_sync(ds->ds_prev, sn->ha, tx); - kmem_free(sn->ha, sizeof (struct dsl_ds_holdarg)); - sn->ha = NULL; - sn->newds = ds->ds_prev; + obj = dsl_dataset_create_sync(pdd, tail, origin, 0, + doca->doca_cred, tx); - da.ds = ds->ds_prev; - da.defer = B_TRUE; - dsl_dataset_destroy_sync(&da, FTAG, tx); - } + VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); + dsl_dataset_name(origin, namebuf); + spa_history_log_internal_ds(ds, "clone", tx, + "origin=%s (%llu)", namebuf, origin->ds_object); + dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele(origin, FTAG); + dsl_dir_rele(pdd, FTAG); } -static int -dmu_objset_snapshot_one(const char *name, void *arg) +int +dmu_objset_clone(const char *clone, const char *origin) { - struct snaparg *sn = arg; - objset_t *os; - int err; - char *cp; - - /* - * If the objset starts with a '%', then ignore it unless it was - * explicitly named (ie, not recursive). These hidden datasets - * are always inconsistent, and by not opening them here, we can - * avoid a race with dsl_dir_destroy_check(). - */ - cp = strrchr(name, '/'); - if (cp && cp[1] == '%' && sn->recursive) - return (0); - - (void) strcpy(sn->failed, name); - - /* - * Check permissions if we are doing a recursive snapshot. The - * permission checks for the starting dataset have already been - * performed in zfs_secpolicy_snapshot() - */ - if (sn->recursive && (err = zfs_secpolicy_snapshot_perms(name, CRED()))) - return (err); - - err = dmu_objset_hold(name, sn, &os); - if (err != 0) - return (err); - - /* - * If the objset is in an inconsistent state (eg, in the process - * of being destroyed), don't snapshot it. As with %hidden - * datasets, we return EBUSY if this name was explicitly - * requested (ie, not recursive), and otherwise ignore it. - */ - if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) { - dmu_objset_rele(os, sn); - return (sn->recursive ? 0 : EBUSY); - } + dmu_objset_clone_arg_t doca; - if (sn->needsuspend) { - err = zil_suspend(dmu_objset_zil(os)); - if (err) { - dmu_objset_rele(os, sn); - return (err); - } - } - dsl_sync_task_create(sn->dstg, snapshot_check, snapshot_sync, - os, sn, 3); + doca.doca_clone = clone; + doca.doca_origin = origin; + doca.doca_cred = CRED(); - return (0); + return (dsl_sync_task(clone, + dmu_objset_clone_check, dmu_objset_clone_sync, &doca, 5)); } int -dmu_objset_snapshot(char *fsname, char *snapname, char *tag, - nvlist_t *props, boolean_t recursive, boolean_t temporary, int cleanup_fd) +dmu_objset_snapshot_one(const char *fsname, const char *snapname) { - dsl_sync_task_t *dst; - struct snaparg sn; - spa_t *spa; - minor_t minor; int err; + char *longsnap = kmem_asprintf("%s@%s", fsname, snapname); + nvlist_t *snaps = fnvlist_alloc(); - (void) strcpy(sn.failed, fsname); - - err = spa_open(fsname, &spa, FTAG); - if (err) - return (err); - - if (temporary) { - if (cleanup_fd < 0) { - spa_close(spa, FTAG); - return (EINVAL); - } - if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) { - spa_close(spa, FTAG); - return (err); - } - } - - sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - sn.snapname = snapname; - sn.htag = tag; - sn.props = props; - sn.recursive = recursive; - sn.needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); - sn.temporary = temporary; - sn.ha = NULL; - sn.newds = NULL; - - if (recursive) { - err = dmu_objset_find(fsname, - dmu_objset_snapshot_one, &sn, DS_FIND_CHILDREN); - } else { - err = dmu_objset_snapshot_one(fsname, &sn); - } - - if (err == 0) - err = dsl_sync_task_group_wait(sn.dstg); - - for (dst = list_head(&sn.dstg->dstg_tasks); dst; - dst = list_next(&sn.dstg->dstg_tasks, dst)) { - objset_t *os = dst->dst_arg1; - dsl_dataset_t *ds = os->os_dsl_dataset; - if (dst->dst_err) { - dsl_dataset_name(ds, sn.failed); - } else if (temporary) { - dsl_register_onexit_hold_cleanup(sn.newds, tag, minor); - } - if (sn.needsuspend) - zil_resume(dmu_objset_zil(os)); -#ifdef __FreeBSD__ -#ifdef _KERNEL - if (dst->dst_err == 0 && dmu_objset_type(os) == DMU_OST_ZVOL) { - char name[MAXNAMELEN]; - - dmu_objset_name(os, name); - strlcat(name, "@", sizeof(name)); - strlcat(name, snapname, sizeof(name)); - zvol_create_minors(name); - } -#endif -#endif - dmu_objset_rele(os, &sn); - } - - if (err) - (void) strcpy(fsname, sn.failed); - if (temporary) - zfs_onexit_fd_rele(cleanup_fd); - dsl_sync_task_group_destroy(sn.dstg); - spa_close(spa, FTAG); + fnvlist_add_boolean(snaps, longsnap); + strfree(longsnap); + err = dsl_dataset_snapshot(snaps, NULL, NULL); + fnvlist_free(snaps); return (err); } @@ -1052,9 +919,9 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) objset_t *os = arg; dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; - ASSERT(bp == os->os_rootbp); - ASSERT(BP_GET_TYPE(bp) == DMU_OT_OBJSET); - ASSERT(BP_GET_LEVEL(bp) == 0); + ASSERT3P(bp, ==, os->os_rootbp); + ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); + ASSERT0(BP_GET_LEVEL(bp)); /* * Update rootbp fill count: it should be the number of objects @@ -1161,7 +1028,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff]; while (dr = list_head(list)) { - ASSERT(dr->dr_dbuf->db_level == 0); + ASSERT0(dr->dr_dbuf->db_level); list_remove(list, dr); if (dr->dr_zio) zio_nowait(dr->dr_zio); @@ -1435,9 +1302,9 @@ dmu_objset_userspace_upgrade(objset_t *os) if (dmu_objset_userspace_present(os)) return (0); if (!dmu_objset_userused_enabled(os)) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); if (dmu_objset_is_snapshot(os)) - return (EINVAL); + return (SET_ERROR(EINVAL)); /* * We simply need to mark every object dirty, so that it will be @@ -1453,15 +1320,15 @@ dmu_objset_userspace_upgrade(objset_t *os) int objerr; if (issig(JUSTLOOKING) && issig(FORREAL)) - return (EINTR); + return (SET_ERROR(EINTR)); objerr = dmu_bonus_hold(os, obj, FTAG, &db); - if (objerr) + if (objerr != 0) continue; tx = dmu_tx_create(os); dmu_tx_hold_bonus(tx, obj); objerr = dmu_tx_assign(tx, TXG_WAIT); - if (objerr) { + if (objerr != 0) { dmu_tx_abort(tx); continue; } @@ -1529,7 +1396,7 @@ dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen, uint64_t ignored; if (ds->ds_phys->ds_snapnames_zapobj == 0) - return (ENOENT); + return (SET_ERROR(ENOENT)); return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_phys->ds_snapnames_zapobj, name, 8, 1, &ignored, MT_FIRST, @@ -1544,8 +1411,10 @@ dmu_snapshot_list_next(objset_t *os, int namelen, char *name, zap_cursor_t cursor; zap_attribute_t attr; + ASSERT(dsl_pool_config_held(dmu_objset_pool(os))); + if (ds->ds_phys->ds_snapnames_zapobj == 0) - return (ENOENT); + return (SET_ERROR(ENOENT)); zap_cursor_init_serialized(&cursor, ds->ds_dir->dd_pool->dp_meta_objset, @@ -1553,12 +1422,12 @@ dmu_snapshot_list_next(objset_t *os, int namelen, char *name, if (zap_cursor_retrieve(&cursor, &attr) != 0) { zap_cursor_fini(&cursor); - return (ENOENT); + return (SET_ERROR(ENOENT)); } if (strlen(attr.za_name) + 1 > namelen) { zap_cursor_fini(&cursor); - return (ENAMETOOLONG); + return (SET_ERROR(ENAMETOOLONG)); } (void) strcpy(name, attr.za_name); @@ -1584,7 +1453,7 @@ dmu_dir_list_next(objset_t *os, int namelen, char *name, /* there is no next dir on a snapshot! */ if (os->os_dsl_dataset->ds_object != dd->dd_phys->dd_head_dataset_obj) - return (ENOENT); + return (SET_ERROR(ENOENT)); zap_cursor_init_serialized(&cursor, dd->dd_pool->dp_meta_objset, @@ -1592,12 +1461,12 @@ dmu_dir_list_next(objset_t *os, int namelen, char *name, if (zap_cursor_retrieve(&cursor, &attr) != 0) { zap_cursor_fini(&cursor); - return (ENOENT); + return (SET_ERROR(ENOENT)); } if (strlen(attr.za_name) + 1 > namelen) { zap_cursor_fini(&cursor); - return (ENAMETOOLONG); + return (SET_ERROR(ENAMETOOLONG)); } (void) strcpy(name, attr.za_name); @@ -1610,42 +1479,122 @@ dmu_dir_list_next(objset_t *os, int namelen, char *name, return (0); } -struct findarg { - int (*func)(const char *, void *); - void *arg; -}; - -/* ARGSUSED */ -static int -findfunc(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) -{ - struct findarg *fa = arg; - return (fa->func(dsname, fa->arg)); -} - /* - * Find all objsets under name, and for each, call 'func(child_name, arg)'. - * Perhaps change all callers to use dmu_objset_find_spa()? + * Find objsets under and including ddobj, call func(ds) on each. */ int -dmu_objset_find(const char *name, int func(const char *, void *), void *arg, - int flags) +dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj, + int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags) { - struct findarg fa; - fa.func = func; - fa.arg = arg; - return (dmu_objset_find_spa(NULL, name, findfunc, &fa, flags)); + dsl_dir_t *dd; + dsl_dataset_t *ds; + zap_cursor_t zc; + zap_attribute_t *attr; + uint64_t thisobj; + int err; + + ASSERT(dsl_pool_config_held(dp)); + + err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd); + if (err != 0) + return (err); + + /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ + if (dd->dd_myname[0] == '$') { + dsl_dir_rele(dd, FTAG); + return (0); + } + + thisobj = dd->dd_phys->dd_head_dataset_obj; + attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + + /* + * Iterate over all children. + */ + if (flags & DS_FIND_CHILDREN) { + for (zap_cursor_init(&zc, dp->dp_meta_objset, + dd->dd_phys->dd_child_dir_zapobj); + zap_cursor_retrieve(&zc, attr) == 0; + (void) zap_cursor_advance(&zc)) { + ASSERT3U(attr->za_integer_length, ==, + sizeof (uint64_t)); + ASSERT3U(attr->za_num_integers, ==, 1); + + err = dmu_objset_find_dp(dp, attr->za_first_integer, + func, arg, flags); + if (err != 0) + break; + } + zap_cursor_fini(&zc); + + if (err != 0) { + dsl_dir_rele(dd, FTAG); + kmem_free(attr, sizeof (zap_attribute_t)); + return (err); + } + } + + /* + * Iterate over all snapshots. + */ + if (flags & DS_FIND_SNAPSHOTS) { + dsl_dataset_t *ds; + err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); + + if (err == 0) { + uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; + dsl_dataset_rele(ds, FTAG); + + for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); + zap_cursor_retrieve(&zc, attr) == 0; + (void) zap_cursor_advance(&zc)) { + ASSERT3U(attr->za_integer_length, ==, + sizeof (uint64_t)); + ASSERT3U(attr->za_num_integers, ==, 1); + + err = dsl_dataset_hold_obj(dp, + attr->za_first_integer, FTAG, &ds); + if (err != 0) + break; + err = func(dp, ds, arg); + dsl_dataset_rele(ds, FTAG); + if (err != 0) + break; + } + zap_cursor_fini(&zc); + } + } + + dsl_dir_rele(dd, FTAG); + kmem_free(attr, sizeof (zap_attribute_t)); + + if (err != 0) + return (err); + + /* + * Apply to self. + */ + err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); + if (err != 0) + return (err); + err = func(dp, ds, arg); + dsl_dataset_rele(ds, FTAG); + return (err); } /* - * Find all objsets under name, call func on each + * Find all objsets under name, and for each, call 'func(child_name, arg)'. + * The dp_config_rwlock must not be held when this is called, and it + * will not be held when the callback is called. + * Therefore this function should only be used when the pool is not changing + * (e.g. in syncing context), or the callback can deal with the possible races. */ -int -dmu_objset_find_spa(spa_t *spa, const char *name, - int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags) +static int +dmu_objset_find_impl(spa_t *spa, const char *name, + int func(const char *, void *), void *arg, int flags) { dsl_dir_t *dd; - dsl_pool_t *dp; + dsl_pool_t *dp = spa_get_dsl(spa); dsl_dataset_t *ds; zap_cursor_t zc; zap_attribute_t *attr; @@ -1653,21 +1602,23 @@ dmu_objset_find_spa(spa_t *spa, const char *name, uint64_t thisobj; int err; - if (name == NULL) - name = spa_name(spa); - err = dsl_dir_open_spa(spa, name, FTAG, &dd, NULL); - if (err) + dsl_pool_config_enter(dp, FTAG); + + err = dsl_dir_hold(dp, name, FTAG, &dd, NULL); + if (err != 0) { + dsl_pool_config_exit(dp, FTAG); return (err); + } /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ if (dd->dd_myname[0] == '$') { - dsl_dir_close(dd, FTAG); + dsl_dir_rele(dd, FTAG); + dsl_pool_config_exit(dp, FTAG); return (0); } thisobj = dd->dd_phys->dd_head_dataset_obj; attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); - dp = dd->dd_pool; /* * Iterate over all children. @@ -1677,19 +1628,24 @@ dmu_objset_find_spa(spa_t *spa, const char *name, dd->dd_phys->dd_child_dir_zapobj); zap_cursor_retrieve(&zc, attr) == 0; (void) zap_cursor_advance(&zc)) { - ASSERT(attr->za_integer_length == sizeof (uint64_t)); - ASSERT(attr->za_num_integers == 1); + ASSERT3U(attr->za_integer_length, ==, + sizeof (uint64_t)); + ASSERT3U(attr->za_num_integers, ==, 1); child = kmem_asprintf("%s/%s", name, attr->za_name); - err = dmu_objset_find_spa(spa, child, func, arg, flags); + dsl_pool_config_exit(dp, FTAG); + err = dmu_objset_find_impl(spa, child, + func, arg, flags); + dsl_pool_config_enter(dp, FTAG); strfree(child); - if (err) + if (err != 0) break; } zap_cursor_fini(&zc); - if (err) { - dsl_dir_close(dd, FTAG); + if (err != 0) { + dsl_dir_rele(dd, FTAG); + dsl_pool_config_exit(dp, FTAG); kmem_free(attr, sizeof (zap_attribute_t)); return (err); } @@ -1699,11 +1655,7 @@ dmu_objset_find_spa(spa_t *spa, const char *name, * Iterate over all snapshots. */ if (flags & DS_FIND_SNAPSHOTS) { - if (!dsl_pool_sync_context(dp)) - rw_enter(&dp->dp_config_rwlock, RW_READER); err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); - if (!dsl_pool_sync_context(dp)) - rw_exit(&dp->dp_config_rwlock); if (err == 0) { uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; @@ -1712,64 +1664,50 @@ dmu_objset_find_spa(spa_t *spa, const char *name, for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); zap_cursor_retrieve(&zc, attr) == 0; (void) zap_cursor_advance(&zc)) { - ASSERT(attr->za_integer_length == + ASSERT3U(attr->za_integer_length, ==, sizeof (uint64_t)); - ASSERT(attr->za_num_integers == 1); + ASSERT3U(attr->za_num_integers, ==, 1); child = kmem_asprintf("%s@%s", name, attr->za_name); - err = func(spa, attr->za_first_integer, - child, arg); + dsl_pool_config_exit(dp, FTAG); + err = func(child, arg); + dsl_pool_config_enter(dp, FTAG); strfree(child); - if (err) + if (err != 0) break; } zap_cursor_fini(&zc); } } - dsl_dir_close(dd, FTAG); + dsl_dir_rele(dd, FTAG); kmem_free(attr, sizeof (zap_attribute_t)); + dsl_pool_config_exit(dp, FTAG); - if (err) + if (err != 0) return (err); - /* - * Apply to self if appropriate. - */ - err = func(spa, thisobj, name, arg); - return (err); + /* Apply to self. */ + return (func(name, arg)); } -/* ARGSUSED */ +/* + * See comment above dmu_objset_find_impl(). + */ int -dmu_objset_prefetch(const char *name, void *arg) +dmu_objset_find(char *name, int func(const char *, void *), void *arg, + int flags) { - dsl_dataset_t *ds; - - if (dsl_dataset_hold(name, FTAG, &ds)) - return (0); - - if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) { - mutex_enter(&ds->ds_opening_lock); - if (ds->ds_objset == NULL) { - uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; - zbookmark_t zb; - - SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT, - ZB_ROOT_LEVEL, ZB_ROOT_BLKID); - - (void) arc_read(NULL, dsl_dataset_get_spa(ds), - &ds->ds_phys->ds_bp, NULL, NULL, - ZIO_PRIORITY_ASYNC_READ, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, - &aflags, &zb); - } - mutex_exit(&ds->ds_opening_lock); - } + spa_t *spa; + int error; - dsl_dataset_rele(ds, FTAG); - return (0); + error = spa_open(name, &spa, FTAG); + if (error != 0) + return (error); + error = dmu_objset_find_impl(spa, name, func, arg, flags); + spa_close(spa, FTAG); + return (error); } void @@ -1785,3 +1723,19 @@ dmu_objset_get_user(objset_t *os) ASSERT(MUTEX_HELD(&os->os_user_ptr_lock)); return (os->os_user_ptr); } + +/* + * Determine name of filesystem, given name of snapshot. + * buf must be at least MAXNAMELEN bytes + */ +int +dmu_fsname(const char *snapname, char *buf) +{ + char *atp = strchr(snapname, '@'); + if (atp == NULL) + return (SET_ERROR(EINVAL)); + if (atp - snapname >= MAXNAMELEN) + return (SET_ERROR(ENAMETOOLONG)); + (void) strlcpy(buf, snapname, atp - snapname + 1); + return (0); +} diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c index 5d5c494a6..5da0700de 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2012, Martin Matuska . All rights reserved. */ @@ -47,11 +47,14 @@ #include #include #include +#include +#include /* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */ int zfs_send_corrupt_data = B_FALSE; static char *dmu_recv_tag = "dmu_recv_tag"; +static const char *recv_clone_name = "%recv"; static int dump_bytes(dmu_sendarg_t *dsp, void *buf, int len) @@ -107,7 +110,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, dsp->dsa_pending_op != PENDING_FREE) { if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) - return (EINTR); + return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } @@ -131,7 +134,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, /* not a continuation. Push out pending record */ if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) - return (EINTR); + return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } } @@ -145,7 +148,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, if (length == -1ULL) { if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) - return (EINTR); + return (SET_ERROR(EINTR)); } else { dsp->dsa_pending_op = PENDING_FREE; } @@ -169,7 +172,7 @@ dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type, if (dsp->dsa_pending_op != PENDING_NONE) { if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) - return (EINTR); + return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } /* write a DATA record */ @@ -189,9 +192,9 @@ dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type, drrw->drr_key.ddk_cksum = bp->blk_cksum; if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) - return (EINTR); + return (SET_ERROR(EINTR)); if (dump_bytes(dsp, data, blksz) != 0) - return (EINTR); + return (SET_ERROR(EINTR)); return (0); } @@ -203,7 +206,7 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) if (dsp->dsa_pending_op != PENDING_NONE) { if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) - return (EINTR); + return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } @@ -215,9 +218,9 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) drrs->drr_toguid = dsp->dsa_toguid; if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t))) - return (EINTR); + return (SET_ERROR(EINTR)); if (dump_bytes(dsp, data, blksz)) - return (EINTR); + return (SET_ERROR(EINTR)); return (0); } @@ -237,7 +240,7 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) dsp->dsa_pending_op != PENDING_FREEOBJECTS) { if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) - return (EINTR); + return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) { @@ -252,7 +255,7 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) /* can't be aggregated. Push out pending record */ if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) - return (EINTR); + return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } } @@ -280,7 +283,7 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) if (dsp->dsa_pending_op != PENDING_NONE) { if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) - return (EINTR); + return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } @@ -297,17 +300,17 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) drro->drr_toguid = dsp->dsa_toguid; if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) - return (EINTR); + return (SET_ERROR(EINTR)); if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) - return (EINTR); + return (SET_ERROR(EINTR)); /* free anything past the end of the file */ if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL)) - return (EINTR); - if (dsp->dsa_err) - return (EINTR); + return (SET_ERROR(EINTR)); + if (dsp->dsa_err != 0) + return (SET_ERROR(EINTR)); return (0); } @@ -325,7 +328,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, int err = 0; if (issig(JUSTLOOKING) && issig(FORREAL)) - return (EINTR); + return (SET_ERROR(EINTR)); if (zb->zb_object != DMU_META_DNODE_OBJECT && DMU_OBJECT_IS_SPECIAL(zb->zb_object)) { @@ -349,14 +352,14 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &aflags, zb) != 0) - return (EIO); + return (SET_ERROR(EIO)); blk = abuf->b_data; for (i = 0; i < blksz >> DNODE_SHIFT; i++) { uint64_t dnobj = (zb->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; err = dump_dnode(dsp, dnobj, blk+i); - if (err) + if (err != 0) break; } (void) arc_buf_remove_ref(abuf, &abuf); @@ -368,7 +371,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &aflags, zb) != 0) - return (EIO); + return (SET_ERROR(EIO)); err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data); (void) arc_buf_remove_ref(abuf, &abuf); @@ -390,7 +393,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, ptr++) *ptr = 0x2f5baddb10c; } else { - return (EIO); + return (SET_ERROR(EIO)); } } @@ -403,44 +406,38 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, return (err); } -int -dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, - int outfd, struct file *fp, offset_t *off) +/* + * Releases dp, ds, and fromds, using the specified tag. + */ +static int +dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, +#ifdef illumos + dsl_dataset_t *fromds, int outfd, vnode_t *vp, offset_t *off) +#else + dsl_dataset_t *fromds, int outfd, struct file *fp, offset_t *off) +#endif { - dsl_dataset_t *ds = tosnap->os_dsl_dataset; - dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; + objset_t *os; dmu_replay_record_t *drr; dmu_sendarg_t *dsp; int err; uint64_t fromtxg = 0; - /* tosnap must be a snapshot */ - if (ds->ds_phys->ds_next_snap_obj == 0) - return (EINVAL); - - /* fromsnap must be an earlier snapshot from the same fs as tosnap */ - if (fromds && (ds->ds_dir != fromds->ds_dir || - fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) - return (EXDEV); - - if (fromorigin) { - dsl_pool_t *dp = ds->ds_dir->dd_pool; - - if (fromsnap) - return (EINVAL); - - if (dsl_dir_is_clone(ds->ds_dir)) { - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_dataset_hold_obj(dp, - ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); - rw_exit(&dp->dp_config_rwlock); - if (err) - return (err); - } else { - fromorigin = B_FALSE; - } + if (fromds != NULL && !dsl_dataset_is_before(ds, fromds)) { + dsl_dataset_rele(fromds, tag); + dsl_dataset_rele(ds, tag); + dsl_pool_rele(dp, tag); + return (SET_ERROR(EXDEV)); } + err = dmu_objset_from_ds(ds, &os); + if (err != 0) { + if (fromds != NULL) + dsl_dataset_rele(fromds, tag); + dsl_dataset_rele(ds, tag); + dsl_pool_rele(dp, tag); + return (err); + } drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); drr->drr_type = DRR_BEGIN; @@ -449,13 +446,17 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, DMU_SUBSTREAM); #ifdef _KERNEL - if (dmu_objset_type(tosnap) == DMU_OST_ZFS) { + if (dmu_objset_type(os) == DMU_OST_ZFS) { uint64_t version; - if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) { + if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0) { kmem_free(drr, sizeof (dmu_replay_record_t)); - return (EINVAL); + if (fromds != NULL) + dsl_dataset_rele(fromds, tag); + dsl_dataset_rele(ds, tag); + dsl_pool_rele(dp, tag); + return (SET_ERROR(EINVAL)); } - if (version == ZPL_VERSION_SA) { + if (version >= ZPL_VERSION_SA) { DMU_SET_FEATUREFLAGS( drr->drr_u.drr_begin.drr_versioninfo, DMU_BACKUP_FEATURE_SA_SPILL); @@ -465,21 +466,22 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, drr->drr_u.drr_begin.drr_creation_time = ds->ds_phys->ds_creation_time; - drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type; - if (fromorigin) + drr->drr_u.drr_begin.drr_type = dmu_objset_type(os); + if (fromds != NULL && ds->ds_dir != fromds->ds_dir) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; - if (fromds) + if (fromds != NULL) drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid; dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); - if (fromds) + if (fromds != NULL) { fromtxg = fromds->ds_phys->ds_creation_txg; - if (fromorigin) - dsl_dataset_rele(fromds, FTAG); + dsl_dataset_rele(fromds, tag); + fromds = NULL; + } dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP); @@ -488,7 +490,7 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, dsp->dsa_proc = curproc; dsp->dsa_td = curthread; dsp->dsa_fp = fp; - dsp->dsa_os = tosnap; + dsp->dsa_os = os; dsp->dsa_off = off; dsp->dsa_toguid = ds->ds_phys->ds_guid; ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0); @@ -498,6 +500,9 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, list_insert_head(&ds->ds_sendstreams, dsp); mutex_exit(&ds->ds_sendstream_lock); + dsl_dataset_long_hold(ds, FTAG); + dsl_pool_rele(dp, tag); + if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { err = dsp->dsa_err; goto out; @@ -508,10 +513,10 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, if (dsp->dsa_pending_op != PENDING_NONE) if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) - err = EINTR; + err = SET_ERROR(EINTR); - if (err) { - if (err == EINTR && dsp->dsa_err) + if (err != 0) { + if (err == EINTR && dsp->dsa_err != 0) err = dsp->dsa_err; goto out; } @@ -534,43 +539,105 @@ out: kmem_free(drr, sizeof (dmu_replay_record_t)); kmem_free(dsp, sizeof (dmu_sendarg_t)); + dsl_dataset_long_rele(ds, FTAG); + dsl_dataset_rele(ds, tag); + return (err); } int -dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, - uint64_t *sizep) +dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, +#ifdef illumos + int outfd, vnode_t *vp, offset_t *off) +#else + int outfd, struct file *fp, offset_t *off) +#endif +{ + dsl_pool_t *dp; + dsl_dataset_t *ds; + dsl_dataset_t *fromds = NULL; + int err; + + err = dsl_pool_hold(pool, FTAG, &dp); + if (err != 0) + return (err); + + err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds); + if (err != 0) { + dsl_pool_rele(dp, FTAG); + return (err); + } + + if (fromsnap != 0) { + err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds); + if (err != 0) { + dsl_dataset_rele(ds, FTAG); + dsl_pool_rele(dp, FTAG); + return (err); + } + } + + return (dmu_send_impl(FTAG, dp, ds, fromds, outfd, fp, off)); +} + +int +dmu_send(const char *tosnap, const char *fromsnap, +#ifdef illumos + int outfd, vnode_t *vp, offset_t *off) +#else + int outfd, struct file *fp, offset_t *off) +#endif +{ + dsl_pool_t *dp; + dsl_dataset_t *ds; + dsl_dataset_t *fromds = NULL; + int err; + + if (strchr(tosnap, '@') == NULL) + return (SET_ERROR(EINVAL)); + if (fromsnap != NULL && strchr(fromsnap, '@') == NULL) + return (SET_ERROR(EINVAL)); + + err = dsl_pool_hold(tosnap, FTAG, &dp); + if (err != 0) + return (err); + + err = dsl_dataset_hold(dp, tosnap, FTAG, &ds); + if (err != 0) { + dsl_pool_rele(dp, FTAG); + return (err); + } + + if (fromsnap != NULL) { + err = dsl_dataset_hold(dp, fromsnap, FTAG, &fromds); + if (err != 0) { + dsl_dataset_rele(ds, FTAG); + dsl_pool_rele(dp, FTAG); + return (err); + } + } + return (dmu_send_impl(FTAG, dp, ds, fromds, outfd, fp, off)); +} + +int +dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep) { - dsl_dataset_t *ds = tosnap->os_dsl_dataset; - dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; dsl_pool_t *dp = ds->ds_dir->dd_pool; int err; uint64_t size; + ASSERT(dsl_pool_config_held(dp)); + /* tosnap must be a snapshot */ - if (ds->ds_phys->ds_next_snap_obj == 0) - return (EINVAL); - - /* fromsnap must be an earlier snapshot from the same fs as tosnap */ - if (fromds && (ds->ds_dir != fromds->ds_dir || - fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) - return (EXDEV); - - if (fromorigin) { - if (fromsnap) - return (EINVAL); - - if (dsl_dir_is_clone(ds->ds_dir)) { - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_dataset_hold_obj(dp, - ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); - rw_exit(&dp->dp_config_rwlock); - if (err) - return (err); - } else { - fromorigin = B_FALSE; - } - } + if (!dsl_dataset_is_snapshot(ds)) + return (SET_ERROR(EINVAL)); + + /* + * fromsnap must be an earlier snapshot from the same fs as tosnap, + * or the origin's fs. + */ + if (fromds != NULL && !dsl_dataset_is_before(ds, fromds)) + return (SET_ERROR(EXDEV)); /* Get uncompressed size estimate of changed data. */ if (fromds == NULL) { @@ -579,9 +646,7 @@ dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, uint64_t used, comp; err = dsl_dataset_space_written(fromds, ds, &used, &comp, &size); - if (fromorigin) - dsl_dataset_rele(fromds, FTAG); - if (err) + if (err != 0) return (err); } @@ -601,11 +666,8 @@ dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, * block, which we observe in practice. */ uint64_t recordsize; - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_prop_get_ds(ds, "recordsize", - sizeof (recordsize), 1, &recordsize, NULL); - rw_exit(&dp->dp_config_rwlock); - if (err) + err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize); + if (err != 0) return (err); size -= size / recordsize * sizeof (blkptr_t); @@ -617,116 +679,62 @@ dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, return (0); } -struct recvbeginsyncarg { - const char *tofs; - const char *tosnap; - dsl_dataset_t *origin; - uint64_t fromguid; - dmu_objset_type_t type; - void *tag; - boolean_t force; - uint64_t dsflags; - char clonelastname[MAXNAMELEN]; - dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */ - cred_t *cr; -}; - -/* ARGSUSED */ -static int -recv_new_check(void *arg1, void *arg2, dmu_tx_t *tx) -{ - dsl_dir_t *dd = arg1; - struct recvbeginsyncarg *rbsa = arg2; - objset_t *mos = dd->dd_pool->dp_meta_objset; - uint64_t val; - int err; - - err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, - strrchr(rbsa->tofs, '/') + 1, sizeof (uint64_t), 1, &val); - - if (err != ENOENT) - return (err ? err : EEXIST); - - if (rbsa->origin) { - /* make sure it's a snap in the same pool */ - if (rbsa->origin->ds_dir->dd_pool != dd->dd_pool) - return (EXDEV); - if (!dsl_dataset_is_snapshot(rbsa->origin)) - return (EINVAL); - if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid) - return (ENODEV); - } - - return (0); -} - -static void -recv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx) -{ - dsl_dir_t *dd = arg1; - struct recvbeginsyncarg *rbsa = arg2; - uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; - uint64_t dsobj; - - /* Create and open new dataset. */ - dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1, - rbsa->origin, flags, rbsa->cr, tx); - VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj, - B_TRUE, dmu_recv_tag, &rbsa->ds)); +typedef struct dmu_recv_begin_arg { + const char *drba_origin; + dmu_recv_cookie_t *drba_cookie; + cred_t *drba_cred; +} dmu_recv_begin_arg_t; - if (rbsa->origin == NULL) { - (void) dmu_objset_create_impl(dd->dd_pool->dp_spa, - rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx); - } - - spa_history_log_internal(LOG_DS_REPLAY_FULL_SYNC, - dd->dd_pool->dp_spa, tx, "dataset = %lld", dsobj); -} - -/* ARGSUSED */ static int -recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx) +recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, + uint64_t fromguid) { - dsl_dataset_t *ds = arg1; - struct recvbeginsyncarg *rbsa = arg2; - int err; uint64_t val; + int error; + dsl_pool_t *dp = ds->ds_dir->dd_pool; /* must not have any changes since most recent snapshot */ - if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds)) - return (ETXTBSY); + if (!drba->drba_cookie->drc_force && + dsl_dataset_modified_since_lastsnap(ds)) + return (SET_ERROR(ETXTBSY)); + + /* temporary clone name must not exist */ + error = zap_lookup(dp->dp_meta_objset, + ds->ds_dir->dd_phys->dd_child_dir_zapobj, recv_clone_name, + 8, 1, &val); + if (error != ENOENT) + return (error == 0 ? EBUSY : error); /* new snapshot name must not exist */ - err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, - ds->ds_phys->ds_snapnames_zapobj, rbsa->tosnap, 8, 1, &val); - if (err == 0) - return (EEXIST); - if (err != ENOENT) - return (err); + error = zap_lookup(dp->dp_meta_objset, + ds->ds_phys->ds_snapnames_zapobj, drba->drba_cookie->drc_tosnap, + 8, 1, &val); + if (error != ENOENT) + return (error == 0 ? EEXIST : error); - if (rbsa->fromguid) { + if (fromguid != 0) { /* if incremental, most recent snapshot must match fromguid */ if (ds->ds_prev == NULL) - return (ENODEV); + return (SET_ERROR(ENODEV)); /* * most recent snapshot must match fromguid, or there are no * changes since the fromguid one */ - if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid) { + if (ds->ds_prev->ds_phys->ds_guid != fromguid) { uint64_t birth = ds->ds_prev->ds_phys->ds_bp.blk_birth; uint64_t obj = ds->ds_prev->ds_phys->ds_prev_snap_obj; while (obj != 0) { dsl_dataset_t *snap; - err = dsl_dataset_hold_obj(ds->ds_dir->dd_pool, - obj, FTAG, &snap); - if (err) - return (ENODEV); + error = dsl_dataset_hold_obj(dp, obj, FTAG, + &snap); + if (error != 0) + return (SET_ERROR(ENODEV)); if (snap->ds_phys->ds_creation_txg < birth) { dsl_dataset_rele(snap, FTAG); - return (ENODEV); + return (SET_ERROR(ENODEV)); } - if (snap->ds_phys->ds_guid == rbsa->fromguid) { + if (snap->ds_phys->ds_guid == fromguid) { dsl_dataset_rele(snap, FTAG); break; /* it's ok */ } @@ -734,67 +742,161 @@ recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx) dsl_dataset_rele(snap, FTAG); } if (obj == 0) - return (ENODEV); + return (SET_ERROR(ENODEV)); } } else { /* if full, most recent snapshot must be $ORIGIN */ if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL) - return (ENODEV); + return (SET_ERROR(ENODEV)); } - /* temporary clone name must not exist */ - err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, - ds->ds_dir->dd_phys->dd_child_dir_zapobj, - rbsa->clonelastname, 8, 1, &val); - if (err == 0) - return (EEXIST); - if (err != ENOENT) - return (err); - return (0); + +} + +static int +dmu_recv_begin_check(void *arg, dmu_tx_t *tx) +{ + dmu_recv_begin_arg_t *drba = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + struct drr_begin *drrb = drba->drba_cookie->drc_drrb; + uint64_t fromguid = drrb->drr_fromguid; + int flags = drrb->drr_flags; + int error; + dsl_dataset_t *ds; + const char *tofs = drba->drba_cookie->drc_tofs; + + /* already checked */ + ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); + + if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == + DMU_COMPOUNDSTREAM || + drrb->drr_type >= DMU_OST_NUMTYPES || + ((flags & DRR_FLAG_CLONE) && drba->drba_origin == NULL)) + return (SET_ERROR(EINVAL)); + + /* Verify pool version supports SA if SA_SPILL feature set */ + if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & + DMU_BACKUP_FEATURE_SA_SPILL) && + spa_version(dp->dp_spa) < SPA_VERSION_SA) { + return (SET_ERROR(ENOTSUP)); + } + + error = dsl_dataset_hold(dp, tofs, FTAG, &ds); + if (error == 0) { + /* target fs already exists; recv into temp clone */ + + /* Can't recv a clone into an existing fs */ + if (flags & DRR_FLAG_CLONE) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EINVAL)); + } + + error = recv_begin_check_existing_impl(drba, ds, fromguid); + dsl_dataset_rele(ds, FTAG); + } else if (error == ENOENT) { + /* target fs does not exist; must be a full backup or clone */ + char buf[MAXNAMELEN]; + + /* + * If it's a non-clone incremental, we are missing the + * target fs, so fail the recv. + */ + if (fromguid != 0 && !(flags & DRR_FLAG_CLONE)) + return (SET_ERROR(ENOENT)); + + /* Open the parent of tofs */ + ASSERT3U(strlen(tofs), <, MAXNAMELEN); + (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1); + error = dsl_dataset_hold(dp, buf, FTAG, &ds); + if (error != 0) + return (error); + + if (drba->drba_origin != NULL) { + dsl_dataset_t *origin; + error = dsl_dataset_hold(dp, drba->drba_origin, + FTAG, &origin); + if (error != 0) { + dsl_dataset_rele(ds, FTAG); + return (error); + } + if (!dsl_dataset_is_snapshot(origin)) { + dsl_dataset_rele(origin, FTAG); + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EINVAL)); + } + if (origin->ds_phys->ds_guid != fromguid) { + dsl_dataset_rele(origin, FTAG); + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(ENODEV)); + } + dsl_dataset_rele(origin, FTAG); + } + dsl_dataset_rele(ds, FTAG); + error = 0; + } + return (error); } -/* ARGSUSED */ static void -recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) { - dsl_dataset_t *ohds = arg1; - struct recvbeginsyncarg *rbsa = arg2; - dsl_pool_t *dp = ohds->ds_dir->dd_pool; - dsl_dataset_t *cds; - uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; + dmu_recv_begin_arg_t *drba = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + struct drr_begin *drrb = drba->drba_cookie->drc_drrb; + const char *tofs = drba->drba_cookie->drc_tofs; + dsl_dataset_t *ds, *newds; uint64_t dsobj; + int error; + uint64_t crflags; + + crflags = (drrb->drr_flags & DRR_FLAG_CI_DATA) ? + DS_FLAG_CI_DATASET : 0; - /* create and open the temporary clone */ - dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname, - ohds->ds_prev, flags, rbsa->cr, tx); - VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, B_TRUE, dmu_recv_tag, &cds)); + error = dsl_dataset_hold(dp, tofs, FTAG, &ds); + if (error == 0) { + /* create temporary clone */ + dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name, + ds->ds_prev, crflags, drba->drba_cred, tx); + dsl_dataset_rele(ds, FTAG); + } else { + dsl_dir_t *dd; + const char *tail; + dsl_dataset_t *origin = NULL; + + VERIFY0(dsl_dir_hold(dp, tofs, FTAG, &dd, &tail)); + + if (drba->drba_origin != NULL) { + VERIFY0(dsl_dataset_hold(dp, drba->drba_origin, + FTAG, &origin)); + } + + /* Create new dataset. */ + dsobj = dsl_dataset_create_sync(dd, + strrchr(tofs, '/') + 1, + origin, crflags, drba->drba_cred, tx); + if (origin != NULL) + dsl_dataset_rele(origin, FTAG); + dsl_dir_rele(dd, FTAG); + drba->drba_cookie->drc_newfs = B_TRUE; + } + VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds)); + + dmu_buf_will_dirty(newds->ds_dbuf, tx); + newds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; /* * If we actually created a non-clone, we need to create the * objset in our new dataset. */ - if (BP_IS_HOLE(dsl_dataset_get_blkptr(cds))) { + if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) { (void) dmu_objset_create_impl(dp->dp_spa, - cds, dsl_dataset_get_blkptr(cds), rbsa->type, tx); + newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx); } - rbsa->ds = cds; + drba->drba_cookie->drc_ds = newds; - spa_history_log_internal(LOG_DS_REPLAY_INC_SYNC, - dp->dp_spa, tx, "dataset = %lld", dsobj); -} - -static boolean_t -dmu_recv_verify_features(dsl_dataset_t *ds, struct drr_begin *drrb) -{ - int featureflags; - - featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); - - /* Verify pool version supports SA if SA_SPILL feature set */ - return ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) && - (spa_version(dsl_dataset_get_spa(ds)) < SPA_VERSION_SA)); + spa_history_log_internal_ds(newds, "receive", tx, ""); } /* @@ -802,132 +904,55 @@ dmu_recv_verify_features(dsl_dataset_t *ds, struct drr_begin *drrb) * succeeds; otherwise we will leak the holds on the datasets. */ int -dmu_recv_begin(char *tofs, char *tosnap, char *top_ds, struct drr_begin *drrb, - boolean_t force, objset_t *origin, dmu_recv_cookie_t *drc) +dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, + boolean_t force, char *origin, dmu_recv_cookie_t *drc) { - int err = 0; - boolean_t byteswap; - struct recvbeginsyncarg rbsa = { 0 }; - uint64_t versioninfo; - int flags; - dsl_dataset_t *ds; - - if (drrb->drr_magic == DMU_BACKUP_MAGIC) - byteswap = FALSE; - else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) - byteswap = TRUE; - else - return (EINVAL); - - rbsa.tofs = tofs; - rbsa.tosnap = tosnap; - rbsa.origin = origin ? origin->os_dsl_dataset : NULL; - rbsa.fromguid = drrb->drr_fromguid; - rbsa.type = drrb->drr_type; - rbsa.tag = FTAG; - rbsa.dsflags = 0; - rbsa.cr = CRED(); - versioninfo = drrb->drr_versioninfo; - flags = drrb->drr_flags; - - if (byteswap) { - rbsa.type = BSWAP_32(rbsa.type); - rbsa.fromguid = BSWAP_64(rbsa.fromguid); - versioninfo = BSWAP_64(versioninfo); - flags = BSWAP_32(flags); - } - - if (DMU_GET_STREAM_HDRTYPE(versioninfo) == DMU_COMPOUNDSTREAM || - rbsa.type >= DMU_OST_NUMTYPES || - ((flags & DRR_FLAG_CLONE) && origin == NULL)) - return (EINVAL); - - if (flags & DRR_FLAG_CI_DATA) - rbsa.dsflags = DS_FLAG_CI_DATASET; + dmu_recv_begin_arg_t drba = { 0 }; + dmu_replay_record_t *drr; bzero(drc, sizeof (dmu_recv_cookie_t)); drc->drc_drrb = drrb; drc->drc_tosnap = tosnap; - drc->drc_top_ds = top_ds; + drc->drc_tofs = tofs; drc->drc_force = force; - /* - * Process the begin in syncing context. - */ - - /* open the dataset we are logically receiving into */ - err = dsl_dataset_hold(tofs, dmu_recv_tag, &ds); - if (err == 0) { - if (dmu_recv_verify_features(ds, drrb)) { - dsl_dataset_rele(ds, dmu_recv_tag); - return (ENOTSUP); - } - /* target fs already exists; recv into temp clone */ - - /* Can't recv a clone into an existing fs */ - if (flags & DRR_FLAG_CLONE) { - dsl_dataset_rele(ds, dmu_recv_tag); - return (EINVAL); - } + if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) + drc->drc_byteswap = B_TRUE; + else if (drrb->drr_magic != DMU_BACKUP_MAGIC) + return (SET_ERROR(EINVAL)); - /* must not have an incremental recv already in progress */ - if (!mutex_tryenter(&ds->ds_recvlock)) { - dsl_dataset_rele(ds, dmu_recv_tag); - return (EBUSY); - } - - /* tmp clone name is: tofs/%tosnap" */ - (void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname), - "%%%s", tosnap); - rbsa.force = force; - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - recv_existing_check, recv_existing_sync, ds, &rbsa, 5); - if (err) { - mutex_exit(&ds->ds_recvlock); - dsl_dataset_rele(ds, dmu_recv_tag); - return (err); - } - drc->drc_logical_ds = ds; - drc->drc_real_ds = rbsa.ds; - } else if (err == ENOENT) { - /* target fs does not exist; must be a full backup or clone */ - char *cp; - - /* - * If it's a non-clone incremental, we are missing the - * target fs, so fail the recv. - */ - if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE)) - return (ENOENT); - - /* Open the parent of tofs */ - cp = strrchr(tofs, '/'); - *cp = '\0'; - err = dsl_dataset_hold(tofs, FTAG, &ds); - *cp = '/'; - if (err) - return (err); - - if (dmu_recv_verify_features(ds, drrb)) { - dsl_dataset_rele(ds, FTAG); - return (ENOTSUP); - } + drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); + drr->drr_type = DRR_BEGIN; + drr->drr_u.drr_begin = *drc->drc_drrb; + if (drc->drc_byteswap) { + fletcher_4_incremental_byteswap(drr, + sizeof (dmu_replay_record_t), &drc->drc_cksum); + } else { + fletcher_4_incremental_native(drr, + sizeof (dmu_replay_record_t), &drc->drc_cksum); + } + kmem_free(drr, sizeof (dmu_replay_record_t)); - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - recv_new_check, recv_new_sync, ds->ds_dir, &rbsa, 5); - dsl_dataset_rele(ds, FTAG); - if (err) - return (err); - drc->drc_logical_ds = drc->drc_real_ds = rbsa.ds; - drc->drc_newfs = B_TRUE; + if (drc->drc_byteswap) { + drrb->drr_magic = BSWAP_64(drrb->drr_magic); + drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); + drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); + drrb->drr_type = BSWAP_32(drrb->drr_type); + drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); + drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); } - return (err); + drba.drba_origin = origin; + drba.drba_cookie = drc; + drba.drba_cred = CRED(); + + return (dsl_sync_task(tofs, dmu_recv_begin_check, dmu_recv_begin_sync, + &drba, 5)); } struct restorearg { int err; - int byteswap; + boolean_t byteswap; kthread_t *td; struct file *fp; char *buf; @@ -964,7 +989,8 @@ free_guid_map_onexit(void *arg) guid_map_entry_t *gmep; while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { - dsl_dataset_rele(gmep->gme_ds, ca); + dsl_dataset_long_rele(gmep->gme_ds, gmep); + dsl_dataset_rele(gmep->gme_ds, gmep); kmem_free(gmep, sizeof (guid_map_entry_t)); } avl_destroy(ca); @@ -1013,10 +1039,10 @@ restore_read(struct restorearg *ra, int len) len - done, ra->voff, &resid); if (resid == len - done) - ra->err = EINVAL; + ra->err = SET_ERROR(EINVAL); ra->voff += len - done - resid; done = len - resid; - if (ra->err) + if (ra->err != 0) return (NULL); } @@ -1125,17 +1151,17 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) drro->drr_blksz < SPA_MINBLOCKSIZE || drro->drr_blksz > SPA_MAXBLOCKSIZE || drro->drr_bonuslen > DN_MAX_BONUSLEN) { - return (EINVAL); + return (SET_ERROR(EINVAL)); } err = dmu_object_info(os, drro->drr_object, NULL); if (err != 0 && err != ENOENT) - return (EINVAL); + return (SET_ERROR(EINVAL)); if (drro->drr_bonuslen) { data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); - if (ra->err) + if (ra->err != 0) return (ra->err); } @@ -1144,7 +1170,7 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) tx = dmu_tx_create(os); dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { + if (err != 0) { dmu_tx_abort(tx); return (err); } @@ -1158,14 +1184,14 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) drro->drr_type, drro->drr_blksz, drro->drr_bonustype, drro->drr_bonuslen); } - if (err) { - return (EINVAL); + if (err != 0) { + return (SET_ERROR(EINVAL)); } tx = dmu_tx_create(os); dmu_tx_hold_bonus(tx, drro->drr_object); err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { + if (err != 0) { dmu_tx_abort(tx); return (err); } @@ -1202,7 +1228,7 @@ restore_freeobjects(struct restorearg *ra, objset_t *os, uint64_t obj; if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) - return (EINVAL); + return (SET_ERROR(EINVAL)); for (obj = drrfo->drr_firstobj; obj < drrfo->drr_firstobj + drrfo->drr_numobjs; @@ -1213,7 +1239,7 @@ restore_freeobjects(struct restorearg *ra, objset_t *os, continue; err = dmu_free_object(os, obj); - if (err) + if (err != 0) return (err); } return (0); @@ -1229,21 +1255,21 @@ restore_write(struct restorearg *ra, objset_t *os, if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || !DMU_OT_IS_VALID(drrw->drr_type)) - return (EINVAL); + return (SET_ERROR(EINVAL)); data = restore_read(ra, drrw->drr_length); if (data == NULL) return (ra->err); if (dmu_object_info(os, drrw->drr_object, NULL) != 0) - return (EINVAL); + return (SET_ERROR(EINVAL)); tx = dmu_tx_create(os); dmu_tx_hold_write(tx, drrw->drr_object, drrw->drr_offset, drrw->drr_length); err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { + if (err != 0) { dmu_tx_abort(tx); return (err); } @@ -1278,7 +1304,7 @@ restore_write_byref(struct restorearg *ra, objset_t *os, dmu_buf_t *dbp; if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset) - return (EINVAL); + return (SET_ERROR(EINVAL)); /* * If the GUID of the referenced dataset is different from the @@ -1288,10 +1314,10 @@ restore_write_byref(struct restorearg *ra, objset_t *os, gmesrch.guid = drrwbr->drr_refguid; if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch, &where)) == NULL) { - return (EINVAL); + return (SET_ERROR(EINVAL)); } if (dmu_objset_from_ds(gmep->gme_ds, &ref_os)) - return (EINVAL); + return (SET_ERROR(EINVAL)); } else { ref_os = os; } @@ -1305,7 +1331,7 @@ restore_write_byref(struct restorearg *ra, objset_t *os, dmu_tx_hold_write(tx, drrwbr->drr_object, drrwbr->drr_offset, drrwbr->drr_length); err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { + if (err != 0) { dmu_tx_abort(tx); return (err); } @@ -1326,14 +1352,14 @@ restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) if (drrs->drr_length < SPA_MINBLOCKSIZE || drrs->drr_length > SPA_MAXBLOCKSIZE) - return (EINVAL); + return (SET_ERROR(EINVAL)); data = restore_read(ra, drrs->drr_length); if (data == NULL) return (ra->err); if (dmu_object_info(os, drrs->drr_object, NULL) != 0) - return (EINVAL); + return (SET_ERROR(EINVAL)); VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db)); if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) { @@ -1346,7 +1372,7 @@ restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) dmu_tx_hold_spill(tx, db->db_object); err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { + if (err != 0) { dmu_buf_rele(db, FTAG); dmu_buf_rele(db_spill, FTAG); dmu_tx_abort(tx); @@ -1375,16 +1401,26 @@ restore_free(struct restorearg *ra, objset_t *os, if (drrf->drr_length != -1ULL && drrf->drr_offset + drrf->drr_length < drrf->drr_offset) - return (EINVAL); + return (SET_ERROR(EINVAL)); if (dmu_object_info(os, drrf->drr_object, NULL) != 0) - return (EINVAL); + return (SET_ERROR(EINVAL)); err = dmu_free_long_range(os, drrf->drr_object, drrf->drr_offset, drrf->drr_length); return (err); } +/* used to destroy the drc_ds on error */ +static void +dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc) +{ + char name[MAXNAMELEN]; + dsl_dataset_name(drc->drc_ds, name); + dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); + (void) dsl_destroy_head(name); +} + /* * NB: callers *must* call dmu_recv_end() if this succeeds. */ @@ -1398,36 +1434,8 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, zio_cksum_t pcksum; int featureflags; - if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) - ra.byteswap = TRUE; - - { - /* compute checksum of drr_begin record */ - dmu_replay_record_t *drr; - drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); - - drr->drr_type = DRR_BEGIN; - drr->drr_u.drr_begin = *drc->drc_drrb; - if (ra.byteswap) { - fletcher_4_incremental_byteswap(drr, - sizeof (dmu_replay_record_t), &ra.cksum); - } else { - fletcher_4_incremental_native(drr, - sizeof (dmu_replay_record_t), &ra.cksum); - } - kmem_free(drr, sizeof (dmu_replay_record_t)); - } - - if (ra.byteswap) { - struct drr_begin *drrb = drc->drc_drrb; - drrb->drr_magic = BSWAP_64(drrb->drr_magic); - drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); - drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); - drrb->drr_type = BSWAP_32(drrb->drr_type); - drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); - drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); - } - + ra.byteswap = drc->drc_byteswap; + ra.cksum = drc->drc_cksum; ra.td = curthread; ra.fp = fp; ra.voff = *voffp; @@ -1435,16 +1443,16 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); /* these were verified in dmu_recv_begin */ - ASSERT(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo) == + ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==, DMU_SUBSTREAM); - ASSERT(drc->drc_drrb->drr_type < DMU_OST_NUMTYPES); + ASSERT3U(drc->drc_drrb->drr_type, <, DMU_OST_NUMTYPES); /* * Open the objset we are modifying. */ - VERIFY(dmu_objset_from_ds(drc->drc_real_ds, &os) == 0); + VERIFY0(dmu_objset_from_ds(drc->drc_ds, &os)); - ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); + ASSERT(drc->drc_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); @@ -1453,11 +1461,11 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, minor_t minor; if (cleanup_fd == -1) { - ra.err = EBADF; + ra.err = SET_ERROR(EBADF); goto out; } ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor); - if (ra.err) { + if (ra.err != 0) { cleanup_fd = -1; goto out; } @@ -1471,12 +1479,12 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, ra.err = zfs_onexit_add_cb(minor, free_guid_map_onexit, ra.guid_to_ds_map, action_handlep); - if (ra.err) + if (ra.err != 0) goto out; } else { ra.err = zfs_onexit_cb_data(minor, *action_handlep, (void **)&ra.guid_to_ds_map); - if (ra.err) + if (ra.err != 0) goto out; } @@ -1490,7 +1498,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, while (ra.err == 0 && NULL != (drr = restore_read(&ra, sizeof (*drr)))) { if (issig(JUSTLOOKING) && issig(FORREAL)) { - ra.err = EINTR; + ra.err = SET_ERROR(EINTR); goto out; } @@ -1544,7 +1552,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, * everything before the DRR_END record. */ if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum)) - ra.err = ECKSUM; + ra.err = SET_ERROR(ECKSUM); goto out; } case DRR_SPILL: @@ -1554,7 +1562,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, break; } default: - ra.err = EINVAL; + ra.err = SET_ERROR(EINVAL); goto out; } pcksum = ra.cksum; @@ -1570,14 +1578,7 @@ out: * destroy what we created, so we don't leave it in the * inconsistent restoring state. */ - txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0); - - (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, - B_FALSE); - if (drc->drc_real_ds != drc->drc_logical_ds) { - mutex_exit(&drc->drc_logical_ds->ds_recvlock); - dsl_dataset_rele(drc->drc_logical_ds, dmu_recv_tag); - } + dmu_recv_cleanup_ds(drc); } kmem_free(ra.buf, ra.bufsize); @@ -1585,141 +1586,176 @@ out: return (ra.err); } -struct recvendsyncarg { - char *tosnap; - uint64_t creation_time; - uint64_t toguid; -}; - static int -recv_end_check(void *arg1, void *arg2, dmu_tx_t *tx) +dmu_recv_end_check(void *arg, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - struct recvendsyncarg *resa = arg2; + dmu_recv_cookie_t *drc = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + int error; + + ASSERT3P(drc->drc_ds->ds_owner, ==, dmu_recv_tag); + + if (!drc->drc_newfs) { + dsl_dataset_t *origin_head; + + error = dsl_dataset_hold(dp, drc->drc_tofs, FTAG, &origin_head); + if (error != 0) + return (error); + error = dsl_dataset_clone_swap_check_impl(drc->drc_ds, + origin_head, drc->drc_force); + if (error != 0) { + dsl_dataset_rele(origin_head, FTAG); + return (error); + } + error = dsl_dataset_snapshot_check_impl(origin_head, + drc->drc_tosnap, tx); + dsl_dataset_rele(origin_head, FTAG); + if (error != 0) + return (error); - return (dsl_dataset_snapshot_check(ds, resa->tosnap, tx)); + error = dsl_destroy_head_check_impl(drc->drc_ds, 1); + } else { + error = dsl_dataset_snapshot_check_impl(drc->drc_ds, + drc->drc_tosnap, tx); + } + return (error); } static void -recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dmu_recv_end_sync(void *arg, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - struct recvendsyncarg *resa = arg2; + dmu_recv_cookie_t *drc = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + + spa_history_log_internal_ds(drc->drc_ds, "finish receiving", + tx, "snap=%s", drc->drc_tosnap); + + if (!drc->drc_newfs) { + dsl_dataset_t *origin_head; + + VERIFY0(dsl_dataset_hold(dp, drc->drc_tofs, FTAG, + &origin_head)); + dsl_dataset_clone_swap_sync_impl(drc->drc_ds, + origin_head, tx); + dsl_dataset_snapshot_sync_impl(origin_head, + drc->drc_tosnap, tx); + + /* set snapshot's creation time and guid */ + dmu_buf_will_dirty(origin_head->ds_prev->ds_dbuf, tx); + origin_head->ds_prev->ds_phys->ds_creation_time = + drc->drc_drrb->drr_creation_time; + origin_head->ds_prev->ds_phys->ds_guid = + drc->drc_drrb->drr_toguid; + origin_head->ds_prev->ds_phys->ds_flags &= + ~DS_FLAG_INCONSISTENT; + + dmu_buf_will_dirty(origin_head->ds_dbuf, tx); + origin_head->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; + + dsl_dataset_rele(origin_head, FTAG); + dsl_destroy_head_sync_impl(drc->drc_ds, tx); + } else { + dsl_dataset_t *ds = drc->drc_ds; - dsl_dataset_snapshot_sync(ds, resa->tosnap, tx); + dsl_dataset_snapshot_sync_impl(ds, drc->drc_tosnap, tx); - /* set snapshot's creation time and guid */ - dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); - ds->ds_prev->ds_phys->ds_creation_time = resa->creation_time; - ds->ds_prev->ds_phys->ds_guid = resa->toguid; - ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; + /* set snapshot's creation time and guid */ + dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); + ds->ds_prev->ds_phys->ds_creation_time = + drc->drc_drrb->drr_creation_time; + ds->ds_prev->ds_phys->ds_guid = drc->drc_drrb->drr_toguid; + ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; - dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; + } + drc->drc_newsnapobj = drc->drc_ds->ds_phys->ds_prev_snap_obj; + /* + * Release the hold from dmu_recv_begin. This must be done before + * we return to open context, so that when we free the dataset's dnode, + * we can evict its bonus buffer. + */ + dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); + drc->drc_ds = NULL; } static int -add_ds_to_guidmap(avl_tree_t *guid_map, dsl_dataset_t *ds) +add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj) { - dsl_pool_t *dp = ds->ds_dir->dd_pool; - uint64_t snapobj = ds->ds_phys->ds_prev_snap_obj; + dsl_pool_t *dp; dsl_dataset_t *snapds; guid_map_entry_t *gmep; int err; ASSERT(guid_map != NULL); - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_dataset_hold_obj(dp, snapobj, guid_map, &snapds); + err = dsl_pool_hold(name, FTAG, &dp); + if (err != 0) + return (err); + gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP); + err = dsl_dataset_hold_obj(dp, snapobj, gmep, &snapds); if (err == 0) { - gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP); gmep->guid = snapds->ds_phys->ds_guid; gmep->gme_ds = snapds; avl_add(guid_map, gmep); - } + dsl_dataset_long_hold(snapds, gmep); + } else + kmem_free(gmep, sizeof (*gmep)); - rw_exit(&dp->dp_config_rwlock); + dsl_pool_rele(dp, FTAG); return (err); } +static int dmu_recv_end_modified_blocks = 3; + static int dmu_recv_existing_end(dmu_recv_cookie_t *drc) { - struct recvendsyncarg resa; - dsl_dataset_t *ds = drc->drc_logical_ds; - int err, myerr; - - if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) { - err = dsl_dataset_clone_swap(drc->drc_real_ds, ds, - drc->drc_force); - if (err) - goto out; - } else { - mutex_exit(&ds->ds_recvlock); - dsl_dataset_rele(ds, dmu_recv_tag); - (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, - B_FALSE); - return (EBUSY); - } + int error; + char name[MAXNAMELEN]; - resa.creation_time = drc->drc_drrb->drr_creation_time; - resa.toguid = drc->drc_drrb->drr_toguid; - resa.tosnap = drc->drc_tosnap; +#ifdef _KERNEL + /* + * We will be destroying the ds; make sure its origin is unmounted if + * necessary. + */ + dsl_dataset_name(drc->drc_ds, name); + zfs_destroy_unmount_origin(name); +#endif - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - recv_end_check, recv_end_sync, ds, &resa, 3); - if (err) { - /* swap back */ - (void) dsl_dataset_clone_swap(drc->drc_real_ds, ds, B_TRUE); - } + error = dsl_sync_task(drc->drc_tofs, + dmu_recv_end_check, dmu_recv_end_sync, drc, + dmu_recv_end_modified_blocks); -out: - mutex_exit(&ds->ds_recvlock); - if (err == 0 && drc->drc_guid_to_ds_map != NULL) - (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds); - dsl_dataset_disown(ds, dmu_recv_tag); - myerr = dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE); - ASSERT0(myerr); - return (err); + if (error != 0) + dmu_recv_cleanup_ds(drc); + return (error); } static int dmu_recv_new_end(dmu_recv_cookie_t *drc) { - struct recvendsyncarg resa; - dsl_dataset_t *ds = drc->drc_logical_ds; - int err; - - /* - * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() - * expects it to have a ds_user_ptr (and zil), but clone_swap() - * can close it. - */ - txg_wait_synced(ds->ds_dir->dd_pool, 0); + int error; - resa.creation_time = drc->drc_drrb->drr_creation_time; - resa.toguid = drc->drc_drrb->drr_toguid; - resa.tosnap = drc->drc_tosnap; + error = dsl_sync_task(drc->drc_tofs, + dmu_recv_end_check, dmu_recv_end_sync, drc, + dmu_recv_end_modified_blocks); - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - recv_end_check, recv_end_sync, ds, &resa, 3); - if (err) { - /* clean up the fs we just recv'd into */ - (void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE); - } else { - if (drc->drc_guid_to_ds_map != NULL) - (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds); - /* release the hold from dmu_recv_begin */ - dsl_dataset_disown(ds, dmu_recv_tag); + if (error != 0) { + dmu_recv_cleanup_ds(drc); + } else if (drc->drc_guid_to_ds_map != NULL) { + (void) add_ds_to_guidmap(drc->drc_tofs, + drc->drc_guid_to_ds_map, + drc->drc_newsnapobj); } - return (err); + return (error); } int dmu_recv_end(dmu_recv_cookie_t *drc) { - if (drc->drc_logical_ds != drc->drc_real_ds) - return (dmu_recv_existing_end(drc)); - else + if (drc->drc_newfs) return (dmu_recv_new_end(drc)); + else + return (dmu_recv_existing_end(drc)); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c index f3d5069d4..1ff47c811 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -265,7 +265,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); - if (err) + if (err != 0) return (err); cbp = buf->b_data; @@ -282,7 +282,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, zb->zb_level - 1, zb->zb_blkid * epb + i); err = traverse_visitbp(td, dnp, &cbp[i], &czb); - if (err) { + if (err != 0) { if (!hard) break; lasterr = err; @@ -295,7 +295,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); - if (err) + if (err != 0) return (err); dnp = buf->b_data; @@ -308,7 +308,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, for (i = 0; i < epb; i++) { err = traverse_dnode(td, &dnp[i], zb->zb_objset, zb->zb_blkid * epb + i); - if (err) { + if (err != 0) { if (!hard) break; lasterr = err; @@ -321,7 +321,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); - if (err) + if (err != 0) return (err); osp = buf->b_data; @@ -405,7 +405,7 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp, for (j = 0; j < dnp->dn_nblkptr; j++) { SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j); err = traverse_visitbp(td, dnp, &dnp->dn_blkptr[j], &czb); - if (err) { + if (err != 0) { if (!hard) break; lasterr = err; @@ -415,7 +415,7 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp, if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID); err = traverse_visitbp(td, dnp, &dnp->dn_spill, &czb); - if (err) { + if (err != 0) { if (!hard) return (err); lasterr = err; @@ -434,7 +434,7 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, ASSERT(pfd->pd_blks_fetched >= 0); if (pfd->pd_cancel) - return (EINTR); + return (SET_ERROR(EINTR)); if (bp == NULL || !((pfd->pd_flags & TRAVERSE_PREFETCH_DATA) || BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0) || @@ -514,14 +514,20 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, cv_init(&pd.pd_cv, NULL, CV_DEFAULT, NULL); /* See comment on ZIL traversal in dsl_scan_visitds. */ - if (ds != NULL && !dsl_dataset_is_snapshot(ds)) { - objset_t *os; + if (ds != NULL && !dsl_dataset_is_snapshot(ds) && !BP_IS_HOLE(rootbp)) { + uint32_t flags = ARC_WAIT; + objset_phys_t *osp; + arc_buf_t *buf; - err = dmu_objset_from_ds(ds, &os); - if (err) + err = arc_read(NULL, td.td_spa, rootbp, + arc_getbuf_func, &buf, + ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, NULL); + if (err != 0) return (err); - traverse_zil(&td, &os->os_zil_header); + osp = buf->b_data; + traverse_zil(&td, &osp->os_zil_header); + (void) arc_buf_remove_ref(buf, &buf); } if (!(flags & TRAVERSE_PREFETCH_DATA) || @@ -583,7 +589,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags, /* visit the MOS */ err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa), txg_start, NULL, flags, func, arg); - if (err) + if (err != 0) return (err); /* visit each dataset */ @@ -592,7 +598,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags, dmu_object_info_t doi; err = dmu_object_info(mos, obj, &doi); - if (err) { + if (err != 0) { if (!hard) return (err); lasterr = err; @@ -603,10 +609,10 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags, dsl_dataset_t *ds; uint64_t txg = txg_start; - rw_enter(&dp->dp_config_rwlock, RW_READER); + dsl_pool_config_enter(dp, FTAG); err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); - rw_exit(&dp->dp_config_rwlock); - if (err) { + dsl_pool_config_exit(dp, FTAG); + if (err != 0) { if (!hard) return (err); lasterr = err; @@ -616,7 +622,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags, txg = ds->ds_phys->ds_prev_snap_txg; err = traverse_dataset(ds, txg, flags, func, arg); dsl_dataset_rele(ds, FTAG); - if (err) { + if (err != 0) { if (!hard) return (err); lasterr = err; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c index 6c00dd956..3eeaca64e 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c @@ -48,7 +48,7 @@ dmu_tx_create_dd(dsl_dir_t *dd) { dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP); tx->tx_dir = dd; - if (dd) + if (dd != NULL) tx->tx_pool = dd->dd_pool; list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t), offsetof(dmu_tx_hold_t, txh_node)); @@ -160,7 +160,7 @@ dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid) db = dbuf_hold_level(dn, level, blkid, FTAG); rw_exit(&dn->dn_struct_rwlock); if (db == NULL) - return (EIO); + return (SET_ERROR(EIO)); err = dbuf_read(db, zio, DB_RF_CANFAIL | DB_RF_NOPREFETCH); dbuf_rele(db, FTAG); return (err); @@ -370,7 +370,7 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) out: if (txh->txh_space_towrite + txh->txh_space_tooverwrite > 2 * DMU_MAX_ACCESS) - err = EFBIG; + err = SET_ERROR(EFBIG); if (err) txh->txh_tx->tx_err = err; @@ -898,7 +898,7 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db) #endif static int -dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) +dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how) { dmu_tx_hold_t *txh; spa_t *spa = tx->tx_pool->dp_spa; @@ -922,9 +922,9 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) */ if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE && txg_how != TXG_WAIT) - return (EIO); + return (SET_ERROR(EIO)); - return (ERESTART); + return (SET_ERROR(ERESTART)); } tx->tx_txg = txg_hold_open(tx->tx_pool, &tx->tx_txgh); @@ -945,7 +945,7 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) if (dn->dn_assigned_txg == tx->tx_txg - 1) { mutex_exit(&dn->dn_mtx); tx->tx_needassign_txh = txh; - return (ERESTART); + return (SET_ERROR(ERESTART)); } if (dn->dn_assigned_txg == 0) dn->dn_assigned_txg = tx->tx_txg; @@ -961,13 +961,6 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) fudge += txh->txh_fudge; } - /* - * NB: This check must be after we've held the dnodes, so that - * the dmu_tx_unassign() logic will work properly - */ - if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg) - return (ERESTART); - /* * If a snapshot has been taken since we made our estimates, * assume that we won't be able to free or overwrite anything. @@ -1048,26 +1041,25 @@ dmu_tx_unassign(dmu_tx_t *tx) * * (1) TXG_WAIT. If the current open txg is full, waits until there's * a new one. This should be used when you're not holding locks. - * If will only fail if we're truly out of space (or over quota). + * It will only fail if we're truly out of space (or over quota). * * (2) TXG_NOWAIT. If we can't assign into the current open txg without * blocking, returns immediately with ERESTART. This should be used * whenever you're holding locks. On an ERESTART error, the caller * should drop locks, do a dmu_tx_wait(tx), and try again. - * - * (3) A specific txg. Use this if you need to ensure that multiple - * transactions all sync in the same txg. Like TXG_NOWAIT, it - * returns ERESTART if it can't assign you into the requested txg. */ int -dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how) +dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how) { int err; ASSERT(tx->tx_txg == 0); - ASSERT(txg_how != 0); + ASSERT(txg_how == TXG_WAIT || txg_how == TXG_NOWAIT); ASSERT(!dsl_pool_sync_context(tx->tx_pool)); + /* If we might wait, we must not hold the config lock. */ + ASSERT(txg_how != TXG_WAIT || !dsl_pool_config_held(tx->tx_pool)); + while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) { dmu_tx_unassign(tx); @@ -1088,6 +1080,7 @@ dmu_tx_wait(dmu_tx_t *tx) spa_t *spa = tx->tx_pool->dp_spa; ASSERT(tx->tx_txg == 0); + ASSERT(!dsl_pool_config_held(tx->tx_pool)); /* * It's possible that the pool has become active after this thread @@ -1214,6 +1207,14 @@ dmu_tx_get_txg(dmu_tx_t *tx) return (tx->tx_txg); } +dsl_pool_t * +dmu_tx_pool(dmu_tx_t *tx) +{ + ASSERT(tx->tx_pool != NULL); + return (tx->tx_pool); +} + + void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data) { diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c index 0fd01010f..9aefdd961 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -74,7 +74,11 @@ dnode_cons(void *arg, void *unused, int kmflag) mutex_init(&dn->dn_dbufs_mtx, NULL, MUTEX_DEFAULT, NULL); cv_init(&dn->dn_notxholds, NULL, CV_DEFAULT, NULL); - refcount_create(&dn->dn_holds); + /* + * Every dbuf has a reference, and dropping a tracked reference is + * O(number of references), so don't track dn_holds. + */ + refcount_create_untracked(&dn->dn_holds); refcount_create(&dn->dn_tx_holds); list_link_init(&dn->dn_link); @@ -1032,12 +1036,12 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, dn = (object == DMU_USERUSED_OBJECT) ? DMU_USERUSED_DNODE(os) : DMU_GROUPUSED_DNODE(os); if (dn == NULL) - return (ENOENT); + return (SET_ERROR(ENOENT)); type = dn->dn_type; if ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE) - return (ENOENT); + return (SET_ERROR(ENOENT)); if ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE) - return (EEXIST); + return (SET_ERROR(EEXIST)); DNODE_VERIFY(dn); (void) refcount_add(&dn->dn_holds, tag); *dnp = dn; @@ -1045,7 +1049,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, } if (object == 0 || object >= DN_MAX_OBJECT) - return (EINVAL); + return (SET_ERROR(EINVAL)); mdn = DMU_META_DNODE(os); ASSERT(mdn->dn_object == DMU_META_DNODE_OBJECT); @@ -1063,7 +1067,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, if (drop_struct_lock) rw_exit(&mdn->dn_struct_rwlock); if (db == NULL) - return (EIO); + return (SET_ERROR(EIO)); err = dbuf_read(db, NULL, DB_RF_CANFAIL); if (err) { dbuf_rele(db, FTAG); @@ -1371,7 +1375,7 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx) fail: rw_exit(&dn->dn_struct_rwlock); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } /* read-holding callers must not rely on the lock being continuously held */ @@ -1857,7 +1861,7 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, * at the pointer to this block in its parent, and its * going to be unallocated, so we will skip over it. */ - return (ESRCH); + return (SET_ERROR(ESRCH)); } error = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_HAVESTRUCT); if (error) { @@ -1873,7 +1877,7 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, * This can only happen when we are searching up the tree * and these conditions mean that we need to keep climbing. */ - error = ESRCH; + error = SET_ERROR(ESRCH); } else if (lvl == 0) { dnode_phys_t *dnp = data; span = DNODE_SHIFT; @@ -1886,7 +1890,7 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, *offset += (1ULL << span) * inc; } if (i < 0 || i == blkfill) - error = ESRCH; + error = SET_ERROR(ESRCH); } else { blkptr_t *bp = data; uint64_t start = *offset; @@ -1918,7 +1922,7 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, *offset = start; } if (i < 0 || i >= epb) - error = ESRCH; + error = SET_ERROR(ESRCH); } if (db) @@ -1962,7 +1966,7 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, rw_enter(&dn->dn_struct_rwlock, RW_READER); if (dn->dn_phys->dn_nlevels == 0) { - error = ESRCH; + error = SET_ERROR(ESRCH); goto out; } @@ -1971,7 +1975,7 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, if (flags & DNODE_FIND_HOLE) *offset = dn->dn_datablksz; } else { - error = ESRCH; + error = SET_ERROR(ESRCH); } goto out; } @@ -1992,7 +1996,7 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, if (error == 0 && (flags & DNODE_FIND_BACKWARDS ? initial_offset < *offset : initial_offset > *offset)) - error = ESRCH; + error = SET_ERROR(ESRCH); out: if (!(flags & DNODE_FIND_HAVELOCK)) rw_exit(&dn->dn_struct_rwlock); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c index 4862dcbdf..5a63b2929 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c @@ -480,6 +480,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]); dnode_evict_dbufs(dn); ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL); + ASSERT3P(dn->dn_bonus, ==, NULL); /* * XXX - It would be nice to assert this, but we may still diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c index 9c735be54..7333d3be2 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c @@ -20,10 +20,8 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. - * Copyright (c) 2011 Pawel Jakub Dawidek . - * All rights reserved. * Portions Copyright (c) 2011 Martin Matuska */ @@ -48,12 +46,8 @@ #include #include #include - -static char *dsl_reaper = "the grim reaper"; - -static dsl_checkfunc_t dsl_dataset_destroy_begin_check; -static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; -static dsl_syncfunc_t dsl_dataset_set_reservation_sync; +#include +#include #define SWITCH64(x, y) \ { \ @@ -66,9 +60,6 @@ static dsl_syncfunc_t dsl_dataset_set_reservation_sync; #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE -#define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper) - - /* * Figure out how much of this delta should be propogated to the dsl_dir * layer. If there's a refreservation, that space has already been @@ -255,7 +246,7 @@ dsl_dataset_evict(dmu_buf_t *db, void *dsv) { dsl_dataset_t *ds = dsv; - ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds)); + ASSERT(ds->ds_owner == NULL); unique_remove(ds->ds_fsid_guid); @@ -263,36 +254,30 @@ dsl_dataset_evict(dmu_buf_t *db, void *dsv) dmu_objset_evict(ds->ds_objset); if (ds->ds_prev) { - dsl_dataset_drop_ref(ds->ds_prev, ds); + dsl_dataset_rele(ds->ds_prev, ds); ds->ds_prev = NULL; } bplist_destroy(&ds->ds_pending_deadlist); - if (db != NULL) { + if (ds->ds_phys->ds_deadlist_obj != 0) dsl_deadlist_close(&ds->ds_deadlist); - } else { - ASSERT(ds->ds_deadlist.dl_dbuf == NULL); - ASSERT(!ds->ds_deadlist.dl_oldfmt); - } if (ds->ds_dir) - dsl_dir_close(ds->ds_dir, ds); + dsl_dir_rele(ds->ds_dir, ds); ASSERT(!list_link_active(&ds->ds_synced_link)); if (mutex_owned(&ds->ds_lock)) mutex_exit(&ds->ds_lock); mutex_destroy(&ds->ds_lock); - mutex_destroy(&ds->ds_recvlock); if (mutex_owned(&ds->ds_opening_lock)) mutex_exit(&ds->ds_opening_lock); mutex_destroy(&ds->ds_opening_lock); - rw_destroy(&ds->ds_rwlock); - cv_destroy(&ds->ds_exclusive_cv); + refcount_destroy(&ds->ds_longholds); kmem_free(ds, sizeof (dsl_dataset_t)); } -static int +int dsl_dataset_get_snapname(dsl_dataset_t *ds) { dsl_dataset_phys_t *headphys; @@ -308,7 +293,7 @@ dsl_dataset_get_snapname(dsl_dataset_t *ds) err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &headdbuf); - if (err) + if (err != 0) return (err); headphys = headdbuf->db_data; err = zap_value_search(dp->dp_meta_objset, @@ -317,7 +302,7 @@ dsl_dataset_get_snapname(dsl_dataset_t *ds) return (err); } -static int +int dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) { objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; @@ -337,8 +322,8 @@ dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) return (err); } -static int -dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx) +int +dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx) { objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; @@ -358,8 +343,8 @@ dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx) return (err); } -static int -dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, +int +dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, dsl_dataset_t **dsp) { objset_t *mos = dp->dp_meta_objset; @@ -368,17 +353,16 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, int err; dmu_object_info_t doi; - ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || - dsl_pool_sync_context(dp)); + ASSERT(dsl_pool_config_held(dp)); err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); - if (err) + if (err != 0) return (err); /* Make sure dsobj has the correct object type. */ dmu_object_info_from_db(dbuf, &doi); if (doi.doi_type != DMU_OT_DSL_DATASET) - return (EINVAL); + return (SET_ERROR(EINVAL)); ds = dmu_buf_get_user(dbuf); if (ds == NULL) { @@ -390,12 +374,9 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, ds->ds_phys = dbuf->db_data; mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); - - rw_init(&ds->ds_rwlock, 0, 0, 0); - cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); + refcount_create(&ds->ds_longholds); bplist_create(&ds->ds_pending_deadlist); dsl_deadlist_open(&ds->ds_deadlist, @@ -405,15 +386,13 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, offsetof(dmu_sendarg_t, dsa_link)); if (err == 0) { - err = dsl_dir_open_obj(dp, + err = dsl_dir_hold_obj(dp, ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); } - if (err) { + if (err != 0) { mutex_destroy(&ds->ds_lock); - mutex_destroy(&ds->ds_recvlock); mutex_destroy(&ds->ds_opening_lock); - rw_destroy(&ds->ds_rwlock); - cv_destroy(&ds->ds_exclusive_cv); + refcount_destroy(&ds->ds_longholds); bplist_destroy(&ds->ds_pending_deadlist); dsl_deadlist_close(&ds->ds_deadlist); kmem_free(ds, sizeof (dsl_dataset_t)); @@ -423,8 +402,8 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, if (!dsl_dataset_is_snapshot(ds)) { ds->ds_snapname[0] = '\0'; - if (ds->ds_phys->ds_prev_snap_obj) { - err = dsl_dataset_get_ref(dp, + if (ds->ds_phys->ds_prev_snap_obj != 0) { + err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev); } @@ -440,29 +419,14 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, } if (err == 0 && !dsl_dataset_is_snapshot(ds)) { - /* - * In sync context, we're called with either no lock - * or with the write lock. If we're not syncing, - * we're always called with the read lock held. - */ - boolean_t need_lock = - !RW_WRITE_HELD(&dp->dp_config_rwlock) && - dsl_pool_sync_context(dp); - - if (need_lock) - rw_enter(&dp->dp_config_rwlock, RW_READER); - - err = dsl_prop_get_ds(ds, - "refreservation", sizeof (uint64_t), 1, - &ds->ds_reserved, NULL); + err = dsl_prop_get_int_ds(ds, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), + &ds->ds_reserved); if (err == 0) { - err = dsl_prop_get_ds(ds, - "refquota", sizeof (uint64_t), 1, - &ds->ds_quota, NULL); + err = dsl_prop_get_int_ds(ds, + zfs_prop_to_name(ZFS_PROP_REFQUOTA), + &ds->ds_quota); } - - if (need_lock) - rw_exit(&dp->dp_config_rwlock); } else { ds->ds_reserved = ds->ds_quota = 0; } @@ -472,15 +436,13 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, bplist_destroy(&ds->ds_pending_deadlist); dsl_deadlist_close(&ds->ds_deadlist); if (ds->ds_prev) - dsl_dataset_drop_ref(ds->ds_prev, ds); - dsl_dir_close(ds->ds_dir, ds); + dsl_dataset_rele(ds->ds_prev, ds); + dsl_dir_rele(ds->ds_dir, ds); mutex_destroy(&ds->ds_lock); - mutex_destroy(&ds->ds_recvlock); mutex_destroy(&ds->ds_opening_lock); - rw_destroy(&ds->ds_rwlock); - cv_destroy(&ds->ds_exclusive_cv); + refcount_destroy(&ds->ds_longholds); kmem_free(ds, sizeof (dsl_dataset_t)); - if (err) { + if (err != 0) { dmu_buf_rele(dbuf, tag); return (err); } @@ -495,170 +457,118 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); - mutex_enter(&ds->ds_lock); - if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) { - mutex_exit(&ds->ds_lock); - dmu_buf_rele(ds->ds_dbuf, tag); - return (ENOENT); - } - mutex_exit(&ds->ds_lock); *dsp = ds; return (0); } -static int -dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag) -{ - dsl_pool_t *dp = ds->ds_dir->dd_pool; - - /* - * In syncing context we don't want the rwlock lock: there - * may be an existing writer waiting for sync phase to - * finish. We don't need to worry about such writers, since - * sync phase is single-threaded, so the writer can't be - * doing anything while we are active. - */ - if (dsl_pool_sync_context(dp)) { - ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); - return (0); - } - - /* - * Normal users will hold the ds_rwlock as a READER until they - * are finished (i.e., call dsl_dataset_rele()). "Owners" will - * drop their READER lock after they set the ds_owner field. - * - * If the dataset is being destroyed, the destroy thread will - * obtain a WRITER lock for exclusive access after it's done its - * open-context work and then change the ds_owner to - * dsl_reaper once destruction is assured. So threads - * may block here temporarily, until the "destructability" of - * the dataset is determined. - */ - ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock)); - mutex_enter(&ds->ds_lock); - while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) { - rw_exit(&dp->dp_config_rwlock); - cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock); - if (DSL_DATASET_IS_DESTROYED(ds)) { - mutex_exit(&ds->ds_lock); - dsl_dataset_drop_ref(ds, tag); - rw_enter(&dp->dp_config_rwlock, RW_READER); - return (ENOENT); - } - /* - * The dp_config_rwlock lives above the ds_lock. And - * we need to check DSL_DATASET_IS_DESTROYED() while - * holding the ds_lock, so we have to drop and reacquire - * the ds_lock here. - */ - mutex_exit(&ds->ds_lock); - rw_enter(&dp->dp_config_rwlock, RW_READER); - mutex_enter(&ds->ds_lock); - } - mutex_exit(&ds->ds_lock); - return (0); -} - -int -dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, - dsl_dataset_t **dsp) -{ - int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp); - - if (err) - return (err); - return (dsl_dataset_hold_ref(*dsp, tag)); -} - int -dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok, +dsl_dataset_hold(dsl_pool_t *dp, const char *name, void *tag, dsl_dataset_t **dsp) -{ - int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); - if (err) - return (err); - if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { - dsl_dataset_rele(*dsp, tag); - *dsp = NULL; - return (EBUSY); - } - return (0); -} - -int -dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp) { dsl_dir_t *dd; - dsl_pool_t *dp; const char *snapname; uint64_t obj; int err = 0; - err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname); - if (err) + err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname); + if (err != 0) return (err); - dp = dd->dd_pool; + ASSERT(dsl_pool_config_held(dp)); obj = dd->dd_phys->dd_head_dataset_obj; - rw_enter(&dp->dp_config_rwlock, RW_READER); - if (obj) - err = dsl_dataset_get_ref(dp, obj, tag, dsp); + if (obj != 0) + err = dsl_dataset_hold_obj(dp, obj, tag, dsp); else - err = ENOENT; - if (err) - goto out; - - err = dsl_dataset_hold_ref(*dsp, tag); + err = SET_ERROR(ENOENT); /* we may be looking for a snapshot */ if (err == 0 && snapname != NULL) { - dsl_dataset_t *ds = NULL; + dsl_dataset_t *ds; if (*snapname++ != '@') { dsl_dataset_rele(*dsp, tag); - err = ENOENT; - goto out; + dsl_dir_rele(dd, FTAG); + return (SET_ERROR(ENOENT)); } dprintf("looking for snapshot '%s'\n", snapname); err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); if (err == 0) - err = dsl_dataset_get_ref(dp, obj, tag, &ds); + err = dsl_dataset_hold_obj(dp, obj, tag, &ds); dsl_dataset_rele(*dsp, tag); - ASSERT3U((err == 0), ==, (ds != NULL)); - - if (ds) { + if (err == 0) { mutex_enter(&ds->ds_lock); if (ds->ds_snapname[0] == 0) (void) strlcpy(ds->ds_snapname, snapname, sizeof (ds->ds_snapname)); mutex_exit(&ds->ds_lock); - err = dsl_dataset_hold_ref(ds, tag); - *dsp = err ? NULL : ds; + *dsp = ds; } } -out: - rw_exit(&dp->dp_config_rwlock); - dsl_dir_close(dd, FTAG); + + dsl_dir_rele(dd, FTAG); return (err); } int -dsl_dataset_own(const char *name, boolean_t inconsistentok, +dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, + void *tag, dsl_dataset_t **dsp) +{ + int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); + if (err != 0) + return (err); + if (!dsl_dataset_tryown(*dsp, tag)) { + dsl_dataset_rele(*dsp, tag); + *dsp = NULL; + return (SET_ERROR(EBUSY)); + } + return (0); +} + +int +dsl_dataset_own(dsl_pool_t *dp, const char *name, void *tag, dsl_dataset_t **dsp) { - int err = dsl_dataset_hold(name, tag, dsp); - if (err) + int err = dsl_dataset_hold(dp, name, tag, dsp); + if (err != 0) return (err); - if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { + if (!dsl_dataset_tryown(*dsp, tag)) { dsl_dataset_rele(*dsp, tag); - return (EBUSY); + return (SET_ERROR(EBUSY)); } return (0); } +/* + * See the comment above dsl_pool_hold() for details. In summary, a long + * hold is used to prevent destruction of a dataset while the pool hold + * is dropped, allowing other concurrent operations (e.g. spa_sync()). + * + * The dataset and pool must be held when this function is called. After it + * is called, the pool hold may be released while the dataset is still held + * and accessed. + */ +void +dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag) +{ + ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); + (void) refcount_add(&ds->ds_longholds, tag); +} + +void +dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag) +{ + (void) refcount_remove(&ds->ds_longholds, tag); +} + +/* Return B_TRUE if there are any long holds on this dataset. */ +boolean_t +dsl_dataset_long_held(dsl_dataset_t *ds) +{ + return (!refcount_is_zero(&ds->ds_longholds)); +} + void dsl_dataset_name(dsl_dataset_t *ds, char *name) { @@ -666,7 +576,7 @@ dsl_dataset_name(dsl_dataset_t *ds, char *name) (void) strcpy(name, "mos"); } else { dsl_dir_name(ds->ds_dir, name); - VERIFY(0 == dsl_dataset_get_snapname(ds)); + VERIFY0(dsl_dataset_get_snapname(ds)); if (ds->ds_snapname[0]) { (void) strcat(name, "@"); /* @@ -693,7 +603,7 @@ dsl_dataset_namelen(dsl_dataset_t *ds) result = 3; /* "mos" */ } else { result = dsl_dir_namelen(ds->ds_dir); - VERIFY(0 == dsl_dataset_get_snapname(ds)); + VERIFY0(dsl_dataset_get_snapname(ds)); if (ds->ds_snapname[0]) { ++result; /* adding one for the @-sign */ if (!MUTEX_HELD(&ds->ds_lock)) { @@ -709,65 +619,42 @@ dsl_dataset_namelen(dsl_dataset_t *ds) return (result); } -void -dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag) -{ - dmu_buf_rele(ds->ds_dbuf, tag); -} - void dsl_dataset_rele(dsl_dataset_t *ds, void *tag) { - if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) { - rw_exit(&ds->ds_rwlock); - } - dsl_dataset_drop_ref(ds, tag); + dmu_buf_rele(ds->ds_dbuf, tag); } void dsl_dataset_disown(dsl_dataset_t *ds, void *tag) { - ASSERT((ds->ds_owner == tag && ds->ds_dbuf) || - (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL)); + ASSERT(ds->ds_owner == tag && ds->ds_dbuf != NULL); mutex_enter(&ds->ds_lock); ds->ds_owner = NULL; - if (RW_WRITE_HELD(&ds->ds_rwlock)) { - rw_exit(&ds->ds_rwlock); - cv_broadcast(&ds->ds_exclusive_cv); - } mutex_exit(&ds->ds_lock); - if (ds->ds_dbuf) - dsl_dataset_drop_ref(ds, tag); + dsl_dataset_long_rele(ds, tag); + if (ds->ds_dbuf != NULL) + dsl_dataset_rele(ds, tag); else dsl_dataset_evict(NULL, ds); } boolean_t -dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag) +dsl_dataset_tryown(dsl_dataset_t *ds, void *tag) { boolean_t gotit = FALSE; mutex_enter(&ds->ds_lock); - if (ds->ds_owner == NULL && - (!DS_IS_INCONSISTENT(ds) || inconsistentok)) { + if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) { ds->ds_owner = tag; - if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) - rw_exit(&ds->ds_rwlock); + dsl_dataset_long_hold(ds, tag); gotit = TRUE; } mutex_exit(&ds->ds_lock); return (gotit); } -void -dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner) -{ - ASSERT3P(owner, ==, ds->ds_owner); - if (!RW_WRITE_HELD(&ds->ds_rwlock)) - rw_enter(&ds->ds_rwlock, RW_WRITER); -} - uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, uint64_t flags, dmu_tx_t *tx) @@ -788,7 +675,7 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); - VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); + VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); dmu_buf_will_dirty(dbuf, tx); dsphys = dbuf->db_data; bzero(dsphys, sizeof (dsl_dataset_phys_t)); @@ -808,7 +695,7 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, if (origin == NULL) { dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); } else { - dsl_dataset_t *ohds; + dsl_dataset_t *ohds; /* head of the origin snapshot */ dsphys->ds_prev_snap_obj = origin->ds_object; dsphys->ds_prev_snap_txg = @@ -825,7 +712,7 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, dmu_buf_will_dirty(origin->ds_dbuf, tx); origin->ds_phys->ds_num_children++; - VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, + VERIFY0(dsl_dataset_hold_obj(dp, origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds)); dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); @@ -837,9 +724,8 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, zap_create(mos, DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); } - VERIFY(0 == zap_add_int(mos, - origin->ds_phys->ds_next_clones_obj, - dsobj, tx)); + VERIFY0(zap_add_int(mos, + origin->ds_phys->ds_next_clones_obj, dsobj, tx)); } dmu_buf_will_dirty(dd->dd_dbuf, tx); @@ -851,7 +737,7 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, zap_create(mos, DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); } - VERIFY3U(0, ==, zap_add_int(mos, + VERIFY0(zap_add_int(mos, origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); } } @@ -867,6 +753,16 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, return (dsobj); } +static void +dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) +{ + objset_t *os; + + VERIFY0(dmu_objset_from_ds(ds, &os)); + bzero(&os->os_zil_header, sizeof (os->os_zil_header)); + dsl_dataset_dirty(ds, tx); +} + uint64_t dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) @@ -875,29 +771,28 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, uint64_t dsobj, ddobj; dsl_dir_t *dd; + ASSERT(dmu_tx_is_syncing(tx)); ASSERT(lastname[0] != '@'); ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); - VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); + VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd)); - dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx); + dsobj = dsl_dataset_create_sync_dd(dd, origin, + flags & ~DS_CREATE_FLAG_NODIRTY, tx); dsl_deleg_set_create_perms(dd, tx, cr); - dsl_dir_close(dd, FTAG); + dsl_dir_rele(dd, FTAG); /* * If we are creating a clone, make sure we zero out any stale * data from the origin snapshots zil header. */ - if (origin != NULL) { + if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) { dsl_dataset_t *ds; - objset_t *os; - VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); - VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os)); - bzero(&os->os_zil_header, sizeof (os->os_zil_header)); - dsl_dataset_dirty(ds, tx); + VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); + dsl_dataset_zero_zil(ds, tx); dsl_dataset_rele(ds, FTAG); } @@ -926,7 +821,7 @@ dsl_check_snap_cb(const char *name, void *arg) } int -dmu_get_recursive_snaps_nvl(const char *fsname, const char *snapname, +dmu_get_recursive_snaps_nvl(char *fsname, const char *snapname, nvlist_t *snaps) { struct destroyarg *da; @@ -945,1090 +840,120 @@ dmu_get_recursive_snaps_nvl(const char *fsname, const char *snapname, #endif /* __FreeBSD__ */ /* - * The snapshots must all be in the same pool. + * The unique space in the head dataset can be calculated by subtracting + * the space used in the most recent snapshot, that is still being used + * in this file system, from the space currently in use. To figure out + * the space in the most recent snapshot still in use, we need to take + * the total space used in the snapshot and subtract out the space that + * has been freed up since the snapshot was taken. */ -int -dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed) +void +dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) { - int err; - dsl_sync_task_t *dst; - spa_t *spa; - nvpair_t *pair; - dsl_sync_task_group_t *dstg; - - pair = nvlist_next_nvpair(snaps, NULL); - if (pair == NULL) - return (0); - - err = spa_open(nvpair_name(pair), &spa, FTAG); - if (err) - return (err); - dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - - for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; - pair = nvlist_next_nvpair(snaps, pair)) { - dsl_dataset_t *ds; - - err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds); - if (err == 0) { - struct dsl_ds_destroyarg *dsda; - - dsl_dataset_make_exclusive(ds, dstg); - dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), - KM_SLEEP); - dsda->ds = ds; - dsda->defer = defer; - dsl_sync_task_create(dstg, dsl_dataset_destroy_check, - dsl_dataset_destroy_sync, dsda, dstg, 0); - } else if (err == ENOENT) { - err = 0; - } else { - (void) strcpy(failed, nvpair_name(pair)); - break; - } - } + uint64_t mrs_used; + uint64_t dlused, dlcomp, dluncomp; - if (err == 0) - err = dsl_sync_task_group_wait(dstg); + ASSERT(!dsl_dataset_is_snapshot(ds)); - for (dst = list_head(&dstg->dstg_tasks); dst; - dst = list_next(&dstg->dstg_tasks, dst)) { - struct dsl_ds_destroyarg *dsda = dst->dst_arg1; - dsl_dataset_t *ds = dsda->ds; + if (ds->ds_phys->ds_prev_snap_obj != 0) + mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes; + else + mrs_used = 0; - /* - * Return the file system name that triggered the error - */ - if (dst->dst_err) { - dsl_dataset_name(ds, failed); - } - ASSERT3P(dsda->rm_origin, ==, NULL); - dsl_dataset_disown(ds, dstg); - kmem_free(dsda, sizeof (struct dsl_ds_destroyarg)); - } + dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); - dsl_sync_task_group_destroy(dstg); - spa_close(spa, FTAG); - return (err); + ASSERT3U(dlused, <=, mrs_used); + ds->ds_phys->ds_unique_bytes = + ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused); + if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= + SPA_VERSION_UNIQUE_ACCURATE) + ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; } -static boolean_t -dsl_dataset_might_destroy_origin(dsl_dataset_t *ds) +void +dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, + dmu_tx_t *tx) { - boolean_t might_destroy = B_FALSE; - - mutex_enter(&ds->ds_lock); - if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 && - DS_IS_DEFER_DESTROY(ds)) - might_destroy = B_TRUE; - mutex_exit(&ds->ds_lock); + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + uint64_t count; + int err; - return (might_destroy); + ASSERT(ds->ds_phys->ds_num_children >= 2); + err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx); + /* + * The err should not be ENOENT, but a bug in a previous version + * of the code could cause upgrade_clones_cb() to not set + * ds_next_snap_obj when it should, leading to a missing entry. + * If we knew that the pool was created after + * SPA_VERSION_NEXT_CLONES, we could assert that it isn't + * ENOENT. However, at least we can check that we don't have + * too many entries in the next_clones_obj even after failing to + * remove this one. + */ + if (err != ENOENT) + VERIFY0(err); + ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, + &count)); + ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); } -/* - * If we're removing a clone, and these three conditions are true: - * 1) the clone's origin has no other children - * 2) the clone's origin has no user references - * 3) the clone's origin has been marked for deferred destruction - * Then, prepare to remove the origin as part of this sync task group. - */ -static int -dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag) -{ - dsl_dataset_t *ds = dsda->ds; - dsl_dataset_t *origin = ds->ds_prev; - if (dsl_dataset_might_destroy_origin(origin)) { - char *name; - int namelen; - int error; +blkptr_t * +dsl_dataset_get_blkptr(dsl_dataset_t *ds) +{ + return (&ds->ds_phys->ds_bp); +} - namelen = dsl_dataset_namelen(origin) + 1; - name = kmem_alloc(namelen, KM_SLEEP); - dsl_dataset_name(origin, name); -#ifdef _KERNEL - error = zfs_unmount_snap(name, NULL); - if (error) { - kmem_free(name, namelen); - return (error); - } -#endif - error = dsl_dataset_own(name, B_TRUE, tag, &origin); - kmem_free(name, namelen); - if (error) - return (error); - dsda->rm_origin = origin; - dsl_dataset_make_exclusive(origin, tag); +void +dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) +{ + ASSERT(dmu_tx_is_syncing(tx)); + /* If it's the meta-objset, set dp_meta_rootbp */ + if (ds == NULL) { + tx->tx_pool->dp_meta_rootbp = *bp; + } else { + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_bp = *bp; } +} - return (0); +spa_t * +dsl_dataset_get_spa(dsl_dataset_t *ds) +{ + return (ds->ds_dir->dd_pool->dp_spa); } -/* - * ds must be opened as OWNER. On return (whether successful or not), - * ds will be closed and caller can no longer dereference it. - */ -int -dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) +void +dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) { - int err; - dsl_sync_task_group_t *dstg; - objset_t *os; - dsl_dir_t *dd; - uint64_t obj; - struct dsl_ds_destroyarg dsda = { 0 }; - dsl_dataset_t dummy_ds = { 0 }; + dsl_pool_t *dp; - dsda.ds = ds; + if (ds == NULL) /* this is the meta-objset */ + return; - if (dsl_dataset_is_snapshot(ds)) { - /* Destroying a snapshot is simpler */ - dsl_dataset_make_exclusive(ds, tag); - - dsda.defer = defer; - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - dsl_dataset_destroy_check, dsl_dataset_destroy_sync, - &dsda, tag, 0); - ASSERT3P(dsda.rm_origin, ==, NULL); - goto out; - } else if (defer) { - err = EINVAL; - goto out; - } + ASSERT(ds->ds_objset != NULL); - dd = ds->ds_dir; - dummy_ds.ds_dir = dd; - dummy_ds.ds_object = ds->ds_object; + if (ds->ds_phys->ds_next_snap_obj != 0) + panic("dirtying snapshot!"); - if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds), - &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { - /* - * Check for errors and mark this ds as inconsistent, in - * case we crash while freeing the objects. - */ - err = dsl_sync_task_do(dd->dd_pool, - dsl_dataset_destroy_begin_check, - dsl_dataset_destroy_begin_sync, ds, NULL, 0); - if (err) - goto out; + dp = ds->ds_dir->dd_pool; - err = dmu_objset_from_ds(ds, &os); - if (err) - goto out; + if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { + /* up the hold count until we can be written out */ + dmu_buf_add_ref(ds->ds_dbuf, ds); + } +} - /* - * Remove all objects while in the open context so that - * there is less work to do in the syncing context. - */ - for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, - ds->ds_phys->ds_prev_snap_txg)) { - /* - * Ignore errors, if there is not enough disk space - * we will deal with it in dsl_dataset_destroy_sync(). - */ - (void) dmu_free_object(os, obj); - } - if (err != ESRCH) - goto out; - - /* - * Sync out all in-flight IO. - */ - txg_wait_synced(dd->dd_pool, 0); - - /* - * If we managed to free all the objects in open - * context, the user space accounting should be zero. - */ - if (ds->ds_phys->ds_bp.blk_fill == 0 && - dmu_objset_userused_enabled(os)) { - uint64_t count; - - ASSERT(zap_count(os, DMU_USERUSED_OBJECT, - &count) != 0 || count == 0); - ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, - &count) != 0 || count == 0); - } - } - - rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); - err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); - rw_exit(&dd->dd_pool->dp_config_rwlock); - - if (err) - goto out; - - /* - * Blow away the dsl_dir + head dataset. - */ - dsl_dataset_make_exclusive(ds, tag); - /* - * If we're removing a clone, we might also need to remove its - * origin. - */ - do { - dsda.need_prep = B_FALSE; - if (dsl_dir_is_clone(dd)) { - err = dsl_dataset_origin_rm_prep(&dsda, tag); - if (err) { - dsl_dir_close(dd, FTAG); - goto out; - } - } - - dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); - dsl_sync_task_create(dstg, dsl_dataset_destroy_check, - dsl_dataset_destroy_sync, &dsda, tag, 0); - dsl_sync_task_create(dstg, dsl_dir_destroy_check, - dsl_dir_destroy_sync, &dummy_ds, FTAG, 0); - err = dsl_sync_task_group_wait(dstg); - dsl_sync_task_group_destroy(dstg); - - /* - * We could be racing against 'zfs release' or 'zfs destroy -d' - * on the origin snap, in which case we can get EBUSY if we - * needed to destroy the origin snap but were not ready to - * do so. - */ - if (dsda.need_prep) { - ASSERT(err == EBUSY); - ASSERT(dsl_dir_is_clone(dd)); - ASSERT(dsda.rm_origin == NULL); - } - } while (dsda.need_prep); - - if (dsda.rm_origin != NULL) - dsl_dataset_disown(dsda.rm_origin, tag); - - /* if it is successful, dsl_dir_destroy_sync will close the dd */ - if (err) - dsl_dir_close(dd, FTAG); -out: - dsl_dataset_disown(ds, tag); - return (err); -} - -blkptr_t * -dsl_dataset_get_blkptr(dsl_dataset_t *ds) -{ - return (&ds->ds_phys->ds_bp); -} - -void -dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) -{ - ASSERT(dmu_tx_is_syncing(tx)); - /* If it's the meta-objset, set dp_meta_rootbp */ - if (ds == NULL) { - tx->tx_pool->dp_meta_rootbp = *bp; - } else { - dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_bp = *bp; - } -} - -spa_t * -dsl_dataset_get_spa(dsl_dataset_t *ds) -{ - return (ds->ds_dir->dd_pool->dp_spa); -} - -void -dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) -{ - dsl_pool_t *dp; - - if (ds == NULL) /* this is the meta-objset */ - return; - - ASSERT(ds->ds_objset != NULL); - - if (ds->ds_phys->ds_next_snap_obj != 0) - panic("dirtying snapshot!"); - - dp = ds->ds_dir->dd_pool; - - if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { - /* up the hold count until we can be written out */ - dmu_buf_add_ref(ds->ds_dbuf, ds); - } -} - -boolean_t -dsl_dataset_is_dirty(dsl_dataset_t *ds) -{ - for (int t = 0; t < TXG_SIZE; t++) { - if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, - ds, t)) - return (B_TRUE); - } - return (B_FALSE); -} - -/* - * The unique space in the head dataset can be calculated by subtracting - * the space used in the most recent snapshot, that is still being used - * in this file system, from the space currently in use. To figure out - * the space in the most recent snapshot still in use, we need to take - * the total space used in the snapshot and subtract out the space that - * has been freed up since the snapshot was taken. - */ -static void -dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) -{ - uint64_t mrs_used; - uint64_t dlused, dlcomp, dluncomp; - - ASSERT(!dsl_dataset_is_snapshot(ds)); - - if (ds->ds_phys->ds_prev_snap_obj != 0) - mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes; - else - mrs_used = 0; - - dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); - - ASSERT3U(dlused, <=, mrs_used); - ds->ds_phys->ds_unique_bytes = - ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused); - - if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= - SPA_VERSION_UNIQUE_ACCURATE) - ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; -} - -struct killarg { - dsl_dataset_t *ds; - dmu_tx_t *tx; -}; - -/* ARGSUSED */ -static int -kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, - const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) -{ - struct killarg *ka = arg; - dmu_tx_t *tx = ka->tx; - - if (bp == NULL) - return (0); - - if (zb->zb_level == ZB_ZIL_LEVEL) { - ASSERT(zilog != NULL); - /* - * It's a block in the intent log. It has no - * accounting, so just free it. - */ - dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); - } else { - ASSERT(zilog == NULL); - ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); - (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE); - } - - return (0); -} - -/* ARGSUSED */ -static int -dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) -{ - dsl_dataset_t *ds = arg1; - objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - uint64_t count; - int err; - - /* - * Can't delete a head dataset if there are snapshots of it. - * (Except if the only snapshots are from the branch we cloned - * from.) - */ - if (ds->ds_prev != NULL && - ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) - return (EBUSY); - - /* - * This is really a dsl_dir thing, but check it here so that - * we'll be less likely to leave this dataset inconsistent & - * nearly destroyed. - */ - err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); - if (err) - return (err); - if (count != 0) - return (EEXIST); - - return (0); -} - -/* ARGSUSED */ -static void -dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) -{ - dsl_dataset_t *ds = arg1; - dsl_pool_t *dp = ds->ds_dir->dd_pool; - - /* Mark it as inconsistent on-disk, in case we crash */ - dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; - - spa_history_log_internal(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, - "dataset = %llu", ds->ds_object); -} - -static int -dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag, - dmu_tx_t *tx) -{ - dsl_dataset_t *ds = dsda->ds; - dsl_dataset_t *ds_prev = ds->ds_prev; - - if (dsl_dataset_might_destroy_origin(ds_prev)) { - struct dsl_ds_destroyarg ndsda = {0}; - - /* - * If we're not prepared to remove the origin, don't remove - * the clone either. - */ - if (dsda->rm_origin == NULL) { - dsda->need_prep = B_TRUE; - return (EBUSY); - } - - ndsda.ds = ds_prev; - ndsda.is_origin_rm = B_TRUE; - return (dsl_dataset_destroy_check(&ndsda, tag, tx)); - } - - /* - * If we're not going to remove the origin after all, - * undo the open context setup. - */ - if (dsda->rm_origin != NULL) { - dsl_dataset_disown(dsda->rm_origin, tag); - dsda->rm_origin = NULL; - } - - return (0); -} - -/* - * If you add new checks here, you may need to add - * additional checks to the "temporary" case in - * snapshot_check() in dmu_objset.c. - */ -/* ARGSUSED */ -int -dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) -{ - struct dsl_ds_destroyarg *dsda = arg1; - dsl_dataset_t *ds = dsda->ds; - - /* we have an owner hold, so noone else can destroy us */ - ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); - - /* - * Only allow deferred destroy on pools that support it. - * NOTE: deferred destroy is only supported on snapshots. - */ - if (dsda->defer) { - if (spa_version(ds->ds_dir->dd_pool->dp_spa) < - SPA_VERSION_USERREFS) - return (ENOTSUP); - ASSERT(dsl_dataset_is_snapshot(ds)); - return (0); - } - - /* - * Can't delete a head dataset if there are snapshots of it. - * (Except if the only snapshots are from the branch we cloned - * from.) - */ - if (ds->ds_prev != NULL && - ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) - return (EBUSY); - - /* - * If we made changes this txg, traverse_dsl_dataset won't find - * them. Try again. - */ - if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) - return (EAGAIN); - - if (dsl_dataset_is_snapshot(ds)) { - /* - * If this snapshot has an elevated user reference count, - * we can't destroy it yet. - */ - if (ds->ds_userrefs > 0 && !dsda->releasing) - return (EBUSY); - - mutex_enter(&ds->ds_lock); - /* - * Can't delete a branch point. However, if we're destroying - * a clone and removing its origin due to it having a user - * hold count of 0 and having been marked for deferred destroy, - * it's OK for the origin to have a single clone. - */ - if (ds->ds_phys->ds_num_children > - (dsda->is_origin_rm ? 2 : 1)) { - mutex_exit(&ds->ds_lock); - return (EEXIST); - } - mutex_exit(&ds->ds_lock); - } else if (dsl_dir_is_clone(ds->ds_dir)) { - return (dsl_dataset_origin_check(dsda, arg2, tx)); - } - - /* XXX we should do some i/o error checking... */ - return (0); -} - -struct refsarg { - kmutex_t lock; - boolean_t gone; - kcondvar_t cv; -}; - -/* ARGSUSED */ -static void -dsl_dataset_refs_gone(dmu_buf_t *db, void *argv) -{ - struct refsarg *arg = argv; - - mutex_enter(&arg->lock); - arg->gone = TRUE; - cv_signal(&arg->cv); - mutex_exit(&arg->lock); -} - -static void -dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag) -{ - struct refsarg arg; - - bzero(&arg, sizeof(arg)); - mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL); - cv_init(&arg.cv, NULL, CV_DEFAULT, NULL); - arg.gone = FALSE; - (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys, - dsl_dataset_refs_gone); - dmu_buf_rele(ds->ds_dbuf, tag); - mutex_enter(&arg.lock); - while (!arg.gone) - cv_wait(&arg.cv, &arg.lock); - ASSERT(arg.gone); - mutex_exit(&arg.lock); - ds->ds_dbuf = NULL; - ds->ds_phys = NULL; - mutex_destroy(&arg.lock); - cv_destroy(&arg.cv); -} - -static void -remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx) -{ - objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - uint64_t count; - int err; - - ASSERT(ds->ds_phys->ds_num_children >= 2); - err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx); - /* - * The err should not be ENOENT, but a bug in a previous version - * of the code could cause upgrade_clones_cb() to not set - * ds_next_snap_obj when it should, leading to a missing entry. - * If we knew that the pool was created after - * SPA_VERSION_NEXT_CLONES, we could assert that it isn't - * ENOENT. However, at least we can check that we don't have - * too many entries in the next_clones_obj even after failing to - * remove this one. - */ - if (err != ENOENT) { - VERIFY0(err); - } - ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj, - &count)); - ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); -} - -static void -dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx) -{ - objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - zap_cursor_t zc; - zap_attribute_t za; - - /* - * If it is the old version, dd_clones doesn't exist so we can't - * find the clones, but deadlist_remove_key() is a no-op so it - * doesn't matter. - */ - if (ds->ds_dir->dd_phys->dd_clones == 0) - return; - - for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones); - zap_cursor_retrieve(&zc, &za) == 0; - zap_cursor_advance(&zc)) { - dsl_dataset_t *clone; - - VERIFY3U(0, ==, dsl_dataset_hold_obj(ds->ds_dir->dd_pool, - za.za_first_integer, FTAG, &clone)); - if (clone->ds_dir->dd_origin_txg > mintxg) { - dsl_deadlist_remove_key(&clone->ds_deadlist, - mintxg, tx); - dsl_dataset_remove_clones_key(clone, mintxg, tx); - } - dsl_dataset_rele(clone, FTAG); - } - zap_cursor_fini(&zc); -} - -struct process_old_arg { - dsl_dataset_t *ds; - dsl_dataset_t *ds_prev; - boolean_t after_branch_point; - zio_t *pio; - uint64_t used, comp, uncomp; -}; - -static int -process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) -{ - struct process_old_arg *poa = arg; - dsl_pool_t *dp = poa->ds->ds_dir->dd_pool; - - if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) { - dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx); - if (poa->ds_prev && !poa->after_branch_point && - bp->blk_birth > - poa->ds_prev->ds_phys->ds_prev_snap_txg) { - poa->ds_prev->ds_phys->ds_unique_bytes += - bp_get_dsize_sync(dp->dp_spa, bp); - } - } else { - poa->used += bp_get_dsize_sync(dp->dp_spa, bp); - poa->comp += BP_GET_PSIZE(bp); - poa->uncomp += BP_GET_UCSIZE(bp); - dsl_free_sync(poa->pio, dp, tx->tx_txg, bp); - } - return (0); -} - -static void -process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev, - dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx) -{ - struct process_old_arg poa = { 0 }; - dsl_pool_t *dp = ds->ds_dir->dd_pool; - objset_t *mos = dp->dp_meta_objset; - - ASSERT(ds->ds_deadlist.dl_oldfmt); - ASSERT(ds_next->ds_deadlist.dl_oldfmt); - - poa.ds = ds; - poa.ds_prev = ds_prev; - poa.after_branch_point = after_branch_point; - poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); - VERIFY3U(0, ==, bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj, - process_old_cb, &poa, tx)); - VERIFY0(zio_wait(poa.pio)); - ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes); - - /* change snapused */ - dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, - -poa.used, -poa.comp, -poa.uncomp, tx); - - /* swap next's deadlist to our deadlist */ - dsl_deadlist_close(&ds->ds_deadlist); - dsl_deadlist_close(&ds_next->ds_deadlist); - SWITCH64(ds_next->ds_phys->ds_deadlist_obj, - ds->ds_phys->ds_deadlist_obj); - dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); - dsl_deadlist_open(&ds_next->ds_deadlist, mos, - ds_next->ds_phys->ds_deadlist_obj); -} - -static int -old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx) -{ - int err; - struct killarg ka; - - /* - * Free everything that we point to (that's born after - * the previous snapshot, if we are a clone) - * - * NB: this should be very quick, because we already - * freed all the objects in open context. - */ - ka.ds = ds; - ka.tx = tx; - err = traverse_dataset(ds, - ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST, - kill_blkptr, &ka); - ASSERT0(err); - ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0); - - return (err); -} - -void -dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) +boolean_t +dsl_dataset_is_dirty(dsl_dataset_t *ds) { - struct dsl_ds_destroyarg *dsda = arg1; - dsl_dataset_t *ds = dsda->ds; - int err; - int after_branch_point = FALSE; - dsl_pool_t *dp = ds->ds_dir->dd_pool; - objset_t *mos = dp->dp_meta_objset; - dsl_dataset_t *ds_prev = NULL; - boolean_t wont_destroy; - uint64_t obj; - - wont_destroy = (dsda->defer && - (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)); - - ASSERT(ds->ds_owner || wont_destroy); - ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1); - ASSERT(ds->ds_prev == NULL || - ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); - ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); - - if (wont_destroy) { - ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); - dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; - return; - } - - /* signal any waiters that this dataset is going away */ - mutex_enter(&ds->ds_lock); - ds->ds_owner = dsl_reaper; - cv_broadcast(&ds->ds_exclusive_cv); - mutex_exit(&ds->ds_lock); - - /* Remove our reservation */ - if (ds->ds_reserved != 0) { - dsl_prop_setarg_t psa; - uint64_t value = 0; - - dsl_prop_setarg_init_uint64(&psa, "refreservation", - (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), - &value); - psa.psa_effective_value = 0; /* predict default value */ - - dsl_dataset_set_reservation_sync(ds, &psa, tx); - ASSERT0(ds->ds_reserved); - } - - ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); - - dsl_scan_ds_destroyed(ds, tx); - - obj = ds->ds_object; - - if (ds->ds_phys->ds_prev_snap_obj != 0) { - if (ds->ds_prev) { - ds_prev = ds->ds_prev; - } else { - VERIFY(0 == dsl_dataset_hold_obj(dp, - ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); - } - after_branch_point = - (ds_prev->ds_phys->ds_next_snap_obj != obj); - - dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); - if (after_branch_point && - ds_prev->ds_phys->ds_next_clones_obj != 0) { - remove_from_next_clones(ds_prev, obj, tx); - if (ds->ds_phys->ds_next_snap_obj != 0) { - VERIFY(0 == zap_add_int(mos, - ds_prev->ds_phys->ds_next_clones_obj, - ds->ds_phys->ds_next_snap_obj, tx)); - } - } - if (after_branch_point && - ds->ds_phys->ds_next_snap_obj == 0) { - /* This clone is toast. */ - ASSERT(ds_prev->ds_phys->ds_num_children > 1); - ds_prev->ds_phys->ds_num_children--; - - /* - * If the clone's origin has no other clones, no - * user holds, and has been marked for deferred - * deletion, then we should have done the necessary - * destroy setup for it. - */ - if (ds_prev->ds_phys->ds_num_children == 1 && - ds_prev->ds_userrefs == 0 && - DS_IS_DEFER_DESTROY(ds_prev)) { - ASSERT3P(dsda->rm_origin, !=, NULL); - } else { - ASSERT3P(dsda->rm_origin, ==, NULL); - } - } else if (!after_branch_point) { - ds_prev->ds_phys->ds_next_snap_obj = - ds->ds_phys->ds_next_snap_obj; - } - } - - if (dsl_dataset_is_snapshot(ds)) { - dsl_dataset_t *ds_next; - uint64_t old_unique; - uint64_t used = 0, comp = 0, uncomp = 0; - - VERIFY(0 == dsl_dataset_hold_obj(dp, - ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); - ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); - - old_unique = ds_next->ds_phys->ds_unique_bytes; - - dmu_buf_will_dirty(ds_next->ds_dbuf, tx); - ds_next->ds_phys->ds_prev_snap_obj = - ds->ds_phys->ds_prev_snap_obj; - ds_next->ds_phys->ds_prev_snap_txg = - ds->ds_phys->ds_prev_snap_txg; - ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, - ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); - - - if (ds_next->ds_deadlist.dl_oldfmt) { - process_old_deadlist(ds, ds_prev, ds_next, - after_branch_point, tx); - } else { - /* Adjust prev's unique space. */ - if (ds_prev && !after_branch_point) { - dsl_deadlist_space_range(&ds_next->ds_deadlist, - ds_prev->ds_phys->ds_prev_snap_txg, - ds->ds_phys->ds_prev_snap_txg, - &used, &comp, &uncomp); - ds_prev->ds_phys->ds_unique_bytes += used; - } - - /* Adjust snapused. */ - dsl_deadlist_space_range(&ds_next->ds_deadlist, - ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, - &used, &comp, &uncomp); - dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, - -used, -comp, -uncomp, tx); - - /* Move blocks to be freed to pool's free list. */ - dsl_deadlist_move_bpobj(&ds_next->ds_deadlist, - &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg, - tx); - dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, - DD_USED_HEAD, used, comp, uncomp, tx); - - /* Merge our deadlist into next's and free it. */ - dsl_deadlist_merge(&ds_next->ds_deadlist, - ds->ds_phys->ds_deadlist_obj, tx); - } - dsl_deadlist_close(&ds->ds_deadlist); - dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); - - /* Collapse range in clone heads */ - dsl_dataset_remove_clones_key(ds, - ds->ds_phys->ds_creation_txg, tx); - - if (dsl_dataset_is_snapshot(ds_next)) { - dsl_dataset_t *ds_nextnext; - - /* - * Update next's unique to include blocks which - * were previously shared by only this snapshot - * and it. Those blocks will be born after the - * prev snap and before this snap, and will have - * died after the next snap and before the one - * after that (ie. be on the snap after next's - * deadlist). - */ - VERIFY(0 == dsl_dataset_hold_obj(dp, - ds_next->ds_phys->ds_next_snap_obj, - FTAG, &ds_nextnext)); - dsl_deadlist_space_range(&ds_nextnext->ds_deadlist, - ds->ds_phys->ds_prev_snap_txg, - ds->ds_phys->ds_creation_txg, - &used, &comp, &uncomp); - ds_next->ds_phys->ds_unique_bytes += used; - dsl_dataset_rele(ds_nextnext, FTAG); - ASSERT3P(ds_next->ds_prev, ==, NULL); - - /* Collapse range in this head. */ - dsl_dataset_t *hds; - VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, - ds->ds_dir->dd_phys->dd_head_dataset_obj, - FTAG, &hds)); - dsl_deadlist_remove_key(&hds->ds_deadlist, - ds->ds_phys->ds_creation_txg, tx); - dsl_dataset_rele(hds, FTAG); - - } else { - ASSERT3P(ds_next->ds_prev, ==, ds); - dsl_dataset_drop_ref(ds_next->ds_prev, ds_next); - ds_next->ds_prev = NULL; - if (ds_prev) { - VERIFY(0 == dsl_dataset_get_ref(dp, - ds->ds_phys->ds_prev_snap_obj, - ds_next, &ds_next->ds_prev)); - } - - dsl_dataset_recalc_head_uniq(ds_next); - - /* - * Reduce the amount of our unconsmed refreservation - * being charged to our parent by the amount of - * new unique data we have gained. - */ - if (old_unique < ds_next->ds_reserved) { - int64_t mrsdelta; - uint64_t new_unique = - ds_next->ds_phys->ds_unique_bytes; - - ASSERT(old_unique <= new_unique); - mrsdelta = MIN(new_unique - old_unique, - ds_next->ds_reserved - old_unique); - dsl_dir_diduse_space(ds->ds_dir, - DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); - } - } - dsl_dataset_rele(ds_next, FTAG); - } else { - zfeature_info_t *async_destroy = - &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]; - objset_t *os; - - /* - * There's no next snapshot, so this is a head dataset. - * Destroy the deadlist. Unless it's a clone, the - * deadlist should be empty. (If it's a clone, it's - * safe to ignore the deadlist contents.) - */ - dsl_deadlist_close(&ds->ds_deadlist); - dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); - ds->ds_phys->ds_deadlist_obj = 0; - - VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os)); - - if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) { - err = old_synchronous_dataset_destroy(ds, tx); - } else { - /* - * Move the bptree into the pool's list of trees to - * clean up and update space accounting information. - */ - uint64_t used, comp, uncomp; - - zil_destroy_sync(dmu_objset_zil(os), tx); - - if (!spa_feature_is_active(dp->dp_spa, async_destroy)) { - spa_feature_incr(dp->dp_spa, async_destroy, tx); - dp->dp_bptree_obj = bptree_alloc(mos, tx); - VERIFY(zap_add(mos, - DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, - &dp->dp_bptree_obj, tx) == 0); - } - - used = ds->ds_dir->dd_phys->dd_used_bytes; - comp = ds->ds_dir->dd_phys->dd_compressed_bytes; - uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes; - - ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || - ds->ds_phys->ds_unique_bytes == used); - - bptree_add(mos, dp->dp_bptree_obj, - &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg, - used, comp, uncomp, tx); - dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, - -used, -comp, -uncomp, tx); - dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD, - used, comp, uncomp, tx); - } - - if (ds->ds_prev != NULL) { - if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { - VERIFY3U(0, ==, zap_remove_int(mos, - ds->ds_prev->ds_dir->dd_phys->dd_clones, - ds->ds_object, tx)); - } - dsl_dataset_rele(ds->ds_prev, ds); - ds->ds_prev = ds_prev = NULL; - } - } - - /* - * This must be done after the dsl_traverse(), because it will - * re-open the objset. - */ - if (ds->ds_objset) { - dmu_objset_evict(ds->ds_objset); - ds->ds_objset = NULL; - } - - if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { - /* Erase the link in the dir */ - dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); - ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; - ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); - err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); - ASSERT(err == 0); - } else { - /* remove from snapshot namespace */ - dsl_dataset_t *ds_head; - ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); - VERIFY(0 == dsl_dataset_hold_obj(dp, - ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); - VERIFY(0 == dsl_dataset_get_snapname(ds)); -#ifdef ZFS_DEBUG - { - uint64_t val; - - err = dsl_dataset_snap_lookup(ds_head, - ds->ds_snapname, &val); - ASSERT0(err); - ASSERT3U(val, ==, obj); - } -#endif - err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx); - ASSERT(err == 0); - dsl_dataset_rele(ds_head, FTAG); - } - - if (ds_prev && ds->ds_prev != ds_prev) - dsl_dataset_rele(ds_prev, FTAG); - - spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); - spa_history_log_internal(LOG_DS_DESTROY, dp->dp_spa, tx, - "dataset = %llu", ds->ds_object); - - if (ds->ds_phys->ds_next_clones_obj != 0) { - uint64_t count; - ASSERT(0 == zap_count(mos, - ds->ds_phys->ds_next_clones_obj, &count) && count == 0); - VERIFY(0 == dmu_object_free(mos, - ds->ds_phys->ds_next_clones_obj, tx)); - } - if (ds->ds_phys->ds_props_obj != 0) - VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); - if (ds->ds_phys->ds_userrefs_obj != 0) - VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx)); - dsl_dir_close(ds->ds_dir, ds); - ds->ds_dir = NULL; - dsl_dataset_drain_refs(ds, tag); - VERIFY(0 == dmu_object_free(mos, obj, tx)); - - if (dsda->rm_origin) { - /* - * Remove the origin of the clone we just destroyed. - */ - struct dsl_ds_destroyarg ndsda = {0}; - - ndsda.ds = dsda->rm_origin; - dsl_dataset_destroy_sync(&ndsda, tag, tx); + for (int t = 0; t < TXG_SIZE; t++) { + if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, + ds, t)) + return (B_TRUE); } + return (B_FALSE); } static int @@ -2047,10 +972,10 @@ dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) - return (ENOSPC); + return (SET_ERROR(ENOSPC)); /* - * Propogate any reserved space for this snapshot to other + * Propagate any reserved space for this snapshot to other * snapshot checks in this sync group. */ if (asize > 0) @@ -2059,58 +984,115 @@ dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) return (0); } +typedef struct dsl_dataset_snapshot_arg { + nvlist_t *ddsa_snaps; + nvlist_t *ddsa_props; + nvlist_t *ddsa_errors; +} dsl_dataset_snapshot_arg_t; + int -dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname, + dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - const char *snapname = arg2; - int err; + int error; uint64_t value; + ds->ds_trysnap_txg = tx->tx_txg; + + if (!dmu_tx_is_syncing(tx)) + return (0); + /* * We don't allow multiple snapshots of the same txg. If there * is already one, try again. */ if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) - return (EAGAIN); - - /* - * Check for conflicting name snapshot name. - */ - err = dsl_dataset_snap_lookup(ds, snapname, &value); - if (err == 0) - return (EEXIST); - if (err != ENOENT) - return (err); + return (SET_ERROR(EAGAIN)); /* - * Check that the dataset's name is not too long. Name consists - * of the dataset's length + 1 for the @-sign + snapshot name's length + * Check for conflicting snapshot name. */ - if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) - return (ENAMETOOLONG); + error = dsl_dataset_snap_lookup(ds, snapname, &value); + if (error == 0) + return (SET_ERROR(EEXIST)); + if (error != ENOENT) + return (error); - err = dsl_dataset_snapshot_reserve_space(ds, tx); - if (err) - return (err); + error = dsl_dataset_snapshot_reserve_space(ds, tx); + if (error != 0) + return (error); - ds->ds_trysnap_txg = tx->tx_txg; return (0); } +static int +dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_snapshot_arg_t *ddsa = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + nvpair_t *pair; + int rv = 0; + + for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); + pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { + int error = 0; + dsl_dataset_t *ds; + char *name, *atp; + char dsname[MAXNAMELEN]; + + name = nvpair_name(pair); + if (strlen(name) >= MAXNAMELEN) + error = SET_ERROR(ENAMETOOLONG); + if (error == 0) { + atp = strchr(name, '@'); + if (atp == NULL) + error = SET_ERROR(EINVAL); + if (error == 0) + (void) strlcpy(dsname, name, atp - name + 1); + } + if (error == 0) + error = dsl_dataset_hold(dp, dsname, FTAG, &ds); + if (error == 0) { + error = dsl_dataset_snapshot_check_impl(ds, + atp + 1, tx); + dsl_dataset_rele(ds, FTAG); + } + + if (error != 0) { + if (ddsa->ddsa_errors != NULL) { + fnvlist_add_int32(ddsa->ddsa_errors, + name, error); + } + rv = error; + } + } + return (rv); +} + void -dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, + dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - const char *snapname = arg2; + static zil_header_t zero_zil; + dsl_pool_t *dp = ds->ds_dir->dd_pool; dmu_buf_t *dbuf; dsl_dataset_phys_t *dsphys; uint64_t dsobj, crtxg; objset_t *mos = dp->dp_meta_objset; - int err; + objset_t *os; + + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); + + /* + * If we are on an old pool, the zil must not be active, in which + * case it will be zeroed. Usually zil_suspend() accomplishes this. + */ + ASSERT(spa_version(dmu_tx_pool(tx)->dp_spa) >= SPA_VERSION_FAST_SNAP || + dmu_objset_from_ds(ds, &os) != 0 || + bcmp(&os->os_phys->os_zil_header, &zero_zil, + sizeof (zero_zil)) == 0); - ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); /* * The origin's ds_creation_txg has to be < TXG_INITIAL @@ -2122,7 +1104,7 @@ dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); - VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); + VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); dmu_buf_will_dirty(dbuf, tx); dsphys = dbuf->db_data; bzero(dsphys, sizeof (dsl_dataset_phys_t)); @@ -2146,74 +1128,272 @@ dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsphys->ds_bp = ds->ds_phys->ds_bp; dmu_buf_rele(dbuf, FTAG); - ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); - if (ds->ds_prev) { - uint64_t next_clones_obj = - ds->ds_prev->ds_phys->ds_next_clones_obj; - ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == - ds->ds_object || - ds->ds_prev->ds_phys->ds_num_children > 1); - if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { - dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); - ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, - ds->ds_prev->ds_phys->ds_creation_txg); - ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; - } else if (next_clones_obj != 0) { - remove_from_next_clones(ds->ds_prev, - dsphys->ds_next_snap_obj, tx); - VERIFY3U(0, ==, zap_add_int(mos, - next_clones_obj, dsobj, tx)); + ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); + if (ds->ds_prev) { + uint64_t next_clones_obj = + ds->ds_prev->ds_phys->ds_next_clones_obj; + ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == + ds->ds_object || + ds->ds_prev->ds_phys->ds_num_children > 1); + if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { + dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); + ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, + ds->ds_prev->ds_phys->ds_creation_txg); + ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; + } else if (next_clones_obj != 0) { + dsl_dataset_remove_from_next_clones(ds->ds_prev, + dsphys->ds_next_snap_obj, tx); + VERIFY0(zap_add_int(mos, + next_clones_obj, dsobj, tx)); + } + } + + /* + * If we have a reference-reservation on this dataset, we will + * need to increase the amount of refreservation being charged + * since our unique space is going to zero. + */ + if (ds->ds_reserved) { + int64_t delta; + ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); + delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); + dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, + delta, 0, 0, tx); + } + + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist, + UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx); + dsl_deadlist_close(&ds->ds_deadlist); + dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); + dsl_deadlist_add_key(&ds->ds_deadlist, + ds->ds_phys->ds_prev_snap_txg, tx); + + ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); + ds->ds_phys->ds_prev_snap_obj = dsobj; + ds->ds_phys->ds_prev_snap_txg = crtxg; + ds->ds_phys->ds_unique_bytes = 0; + if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) + ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; + + VERIFY0(zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, + snapname, 8, 1, &dsobj, tx)); + + if (ds->ds_prev) + dsl_dataset_rele(ds->ds_prev, ds); + VERIFY0(dsl_dataset_hold_obj(dp, + ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); + + dsl_scan_ds_snapshotted(ds, tx); + + dsl_dir_snap_cmtime_update(ds->ds_dir); + + spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, ""); +} + +static void +dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_snapshot_arg_t *ddsa = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + nvpair_t *pair; + + for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); + pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { + dsl_dataset_t *ds; + char *name, *atp; + char dsname[MAXNAMELEN]; + + name = nvpair_name(pair); + atp = strchr(name, '@'); + (void) strlcpy(dsname, name, atp - name + 1); + VERIFY0(dsl_dataset_hold(dp, dsname, FTAG, &ds)); + + dsl_dataset_snapshot_sync_impl(ds, atp + 1, tx); + if (ddsa->ddsa_props != NULL) { + dsl_props_set_sync_impl(ds->ds_prev, + ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx); + } + dsl_dataset_rele(ds, FTAG); + } +} + +/* + * The snapshots must all be in the same pool. + * All-or-nothing: if there are any failures, nothing will be modified. + */ +int +dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) +{ + dsl_dataset_snapshot_arg_t ddsa; + nvpair_t *pair; + boolean_t needsuspend; + int error; + spa_t *spa; + char *firstname; + nvlist_t *suspended = NULL; + + pair = nvlist_next_nvpair(snaps, NULL); + if (pair == NULL) + return (0); + firstname = nvpair_name(pair); + + error = spa_open(firstname, &spa, FTAG); + if (error != 0) + return (error); + needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); + spa_close(spa, FTAG); + + if (needsuspend) { + suspended = fnvlist_alloc(); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { + char fsname[MAXNAMELEN]; + char *snapname = nvpair_name(pair); + char *atp; + void *cookie; + + atp = strchr(snapname, '@'); + if (atp == NULL) { + error = SET_ERROR(EINVAL); + break; + } + (void) strlcpy(fsname, snapname, atp - snapname + 1); + + error = zil_suspend(fsname, &cookie); + if (error != 0) + break; + fnvlist_add_uint64(suspended, fsname, + (uintptr_t)cookie); + } + } + + ddsa.ddsa_snaps = snaps; + ddsa.ddsa_props = props; + ddsa.ddsa_errors = errors; + + if (error == 0) { + error = dsl_sync_task(firstname, dsl_dataset_snapshot_check, + dsl_dataset_snapshot_sync, &ddsa, + fnvlist_num_pairs(snaps) * 3); + } + + if (suspended != NULL) { + for (pair = nvlist_next_nvpair(suspended, NULL); pair != NULL; + pair = nvlist_next_nvpair(suspended, pair)) { + zil_resume((void *)(uintptr_t) + fnvpair_value_uint64(pair)); } + fnvlist_free(suspended); } - /* - * If we have a reference-reservation on this dataset, we will - * need to increase the amount of refreservation being charged - * since our unique space is going to zero. - */ - if (ds->ds_reserved) { - int64_t delta; - ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); - delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); - dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, - delta, 0, 0, tx); +#ifdef __FreeBSD__ +#ifdef _KERNEL + if (error == 0) { + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { + char *snapname = nvpair_name(pair); + zvol_create_minors(snapname); + } } +#endif +#endif + return (error); +} - dmu_buf_will_dirty(ds->ds_dbuf, tx); - zfs_dbgmsg("taking snapshot %s@%s/%llu; newkey=%llu", - ds->ds_dir->dd_myname, snapname, dsobj, - ds->ds_phys->ds_prev_snap_txg); - ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist, - UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx); - dsl_deadlist_close(&ds->ds_deadlist); - dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); - dsl_deadlist_add_key(&ds->ds_deadlist, - ds->ds_phys->ds_prev_snap_txg, tx); +typedef struct dsl_dataset_snapshot_tmp_arg { + const char *ddsta_fsname; + const char *ddsta_snapname; + minor_t ddsta_cleanup_minor; + const char *ddsta_htag; +} dsl_dataset_snapshot_tmp_arg_t; - ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); - ds->ds_phys->ds_prev_snap_obj = dsobj; - ds->ds_phys->ds_prev_snap_txg = crtxg; - ds->ds_phys->ds_unique_bytes = 0; - if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) - ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; +static int +dsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + int error; - err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, - snapname, 8, 1, &dsobj, tx); - ASSERT(err == 0); + error = dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds); + if (error != 0) + return (error); - if (ds->ds_prev) - dsl_dataset_drop_ref(ds->ds_prev, ds); - VERIFY(0 == dsl_dataset_get_ref(dp, - ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); + error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname, tx); + if (error != 0) { + dsl_dataset_rele(ds, FTAG); + return (error); + } - dsl_scan_ds_snapshotted(ds, tx); + if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(ENOTSUP)); + } + error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag, + B_TRUE, tx); + if (error != 0) { + dsl_dataset_rele(ds, FTAG); + return (error); + } - dsl_dir_snap_cmtime_update(ds->ds_dir); + dsl_dataset_rele(ds, FTAG); + return (0); +} + +static void +dsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + + VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds)); + + dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx); + dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag, + ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx); + dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx); + + dsl_dataset_rele(ds, FTAG); +} - spa_history_log_internal(LOG_DS_SNAPSHOT, dp->dp_spa, tx, - "dataset = %llu", dsobj); +int +dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, + minor_t cleanup_minor, const char *htag) +{ + dsl_dataset_snapshot_tmp_arg_t ddsta; + int error; + spa_t *spa; + boolean_t needsuspend; + void *cookie; + + ddsta.ddsta_fsname = fsname; + ddsta.ddsta_snapname = snapname; + ddsta.ddsta_cleanup_minor = cleanup_minor; + ddsta.ddsta_htag = htag; + + error = spa_open(fsname, &spa, FTAG); + if (error != 0) + return (error); + needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); + spa_close(spa, FTAG); + + if (needsuspend) { + error = zil_suspend(fsname, &cookie); + if (error != 0) + return (error); + } + + error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check, + dsl_dataset_snapshot_tmp_sync, &ddsta, 3); + + if (needsuspend) + zil_resume(cookie); + return (error); } + void dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) { @@ -2238,66 +1418,65 @@ get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; zap_cursor_t zc; zap_attribute_t za; - nvlist_t *propval; - nvlist_t *val; + nvlist_t *propval = fnvlist_alloc(); + nvlist_t *val = fnvlist_alloc(); - rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); - VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0); + ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); /* - * There may me missing entries in ds_next_clones_obj + * There may be missing entries in ds_next_clones_obj * due to a bug in a previous version of the code. * Only trust it if it has the right number of entries. */ if (ds->ds_phys->ds_next_clones_obj != 0) { - ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj, + ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, &count)); } - if (count != ds->ds_phys->ds_num_children - 1) { + if (count != ds->ds_phys->ds_num_children - 1) goto fail; - } for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { dsl_dataset_t *clone; char buf[ZFS_MAXNAMELEN]; - /* - * Even though we hold the dp_config_rwlock, the dataset - * may fail to open, returning ENOENT. If there is a - * thread concurrently attempting to destroy this - * dataset, it will have the ds_rwlock held for - * RW_WRITER. Our call to dsl_dataset_hold_obj() -> - * dsl_dataset_hold_ref() will fail its - * rw_tryenter(&ds->ds_rwlock, RW_READER), drop the - * dp_config_rwlock, and wait for the destroy progress - * and signal ds_exclusive_cv. If the destroy was - * successful, we will see that - * DSL_DATASET_IS_DESTROYED(), and return ENOENT. - */ - if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool, - za.za_first_integer, FTAG, &clone) != 0) - continue; + VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, + za.za_first_integer, FTAG, &clone)); dsl_dir_name(clone->ds_dir, buf); - VERIFY(nvlist_add_boolean(val, buf) == 0); + fnvlist_add_boolean(val, buf); dsl_dataset_rele(clone, FTAG); } zap_cursor_fini(&zc); - VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0); - VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), - propval) == 0); + fnvlist_add_nvlist(propval, ZPROP_VALUE, val); + fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval); fail: nvlist_free(val); nvlist_free(propval); - rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); } void dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) { + dsl_pool_t *dp = ds->ds_dir->dd_pool; uint64_t refd, avail, uobjs, aobjs, ratio; - dsl_dir_stats(ds->ds_dir, nv); + ASSERT(dsl_pool_config_held(dp)); + + ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : + (ds->ds_phys->ds_uncompressed_bytes * 100 / + ds->ds_phys->ds_compressed_bytes); + + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, + ds->ds_phys->ds_uncompressed_bytes); + + if (dsl_dataset_is_snapshot(ds)) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, + ds->ds_phys->ds_unique_bytes); + get_clones_stat(ds, nv); + } else { + dsl_dir_stats(ds->ds_dir, nv); + } dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); @@ -2327,10 +1506,8 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) dsl_pool_t *dp = ds->ds_dir->dd_pool; dsl_dataset_t *prev; - rw_enter(&dp->dp_config_rwlock, RW_READER); int err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); - rw_exit(&dp->dp_config_rwlock); if (err == 0) { err = dsl_dataset_space_written(prev, ds, &written, &comp, &uncomp); @@ -2341,53 +1518,34 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) } } } - ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : - (ds->ds_phys->ds_uncompressed_bytes * 100 / - ds->ds_phys->ds_compressed_bytes); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, - ds->ds_phys->ds_uncompressed_bytes); - - if (ds->ds_phys->ds_next_snap_obj) { - /* - * This is a snapshot; override the dd's space used with - * our unique space and compression ratio. - */ - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, - ds->ds_phys->ds_unique_bytes); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); - - get_clones_stat(ds, nv); - } } void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) { + dsl_pool_t *dp = ds->ds_dir->dd_pool; + ASSERT(dsl_pool_config_held(dp)); + stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; stat->dds_guid = ds->ds_phys->ds_guid; - if (ds->ds_phys->ds_next_snap_obj) { + stat->dds_origin[0] = '\0'; + if (dsl_dataset_is_snapshot(ds)) { stat->dds_is_snapshot = B_TRUE; stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; } else { stat->dds_is_snapshot = B_FALSE; stat->dds_num_clones = 0; - } - /* clone origin is really a dsl_dir thing... */ - rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); - if (dsl_dir_is_clone(ds->ds_dir)) { - dsl_dataset_t *ods; + if (dsl_dir_is_clone(ds->ds_dir)) { + dsl_dataset_t *ods; - VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, - ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); - dsl_dataset_name(ods, stat->dds_origin); - dsl_dataset_drop_ref(ods, FTAG); - } else { - stat->dds_origin[0] = '\0'; + VERIFY0(dsl_dataset_hold_obj(dp, + ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); + dsl_dataset_name(ods, stat->dds_origin); + dsl_dataset_rele(ods, FTAG); + } } - rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); } uint64_t @@ -2424,8 +1582,7 @@ dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) { dsl_pool_t *dp = ds->ds_dir->dd_pool; - ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || - dsl_pool_sync_context(dp)); + ASSERT(dsl_pool_config_held(dp)); if (ds->ds_prev == NULL) return (B_FALSE); if (ds->ds_phys->ds_bp.blk_birth > @@ -2447,249 +1604,245 @@ dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) return (B_FALSE); } +typedef struct dsl_dataset_rename_snapshot_arg { + const char *ddrsa_fsname; + const char *ddrsa_oldsnapname; + const char *ddrsa_newsnapname; + boolean_t ddrsa_recursive; + dmu_tx_t *ddrsa_tx; +} dsl_dataset_rename_snapshot_arg_t; + /* ARGSUSED */ static int -dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp, + dsl_dataset_t *hds, void *arg) { - dsl_dataset_t *ds = arg1; - char *newsnapname = arg2; - dsl_dir_t *dd = ds->ds_dir; - dsl_dataset_t *hds; + dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; + int error; uint64_t val; - int err; - - err = dsl_dataset_hold_obj(dd->dd_pool, - dd->dd_phys->dd_head_dataset_obj, FTAG, &hds); - if (err) - return (err); - /* new name better not be in use */ - err = dsl_dataset_snap_lookup(hds, newsnapname, &val); - dsl_dataset_rele(hds, FTAG); + error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); + if (error != 0) { + /* ignore nonexistent snapshots */ + return (error == ENOENT ? 0 : error); + } - if (err == 0) - err = EEXIST; - else if (err == ENOENT) - err = 0; + /* new name should not exist */ + error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val); + if (error == 0) + error = SET_ERROR(EEXIST); + else if (error == ENOENT) + error = 0; /* dataset name + 1 for the "@" + the new snapshot name must fit */ - if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) - err = ENAMETOOLONG; + if (dsl_dir_namelen(hds->ds_dir) + 1 + + strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN) + error = SET_ERROR(ENAMETOOLONG); - return (err); + return (error); } -static void -dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) +static int +dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx) { - char oldname[MAXPATHLEN], newname[MAXPATHLEN]; - dsl_dataset_t *ds = arg1; - const char *newsnapname = arg2; - dsl_dir_t *dd = ds->ds_dir; - objset_t *mos = dd->dd_pool->dp_meta_objset; + dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *hds; - int err; + int error; + + error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds); + if (error != 0) + return (error); + + if (ddrsa->ddrsa_recursive) { + error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object, + dsl_dataset_rename_snapshot_check_impl, ddrsa, + DS_FIND_CHILDREN); + } else { + error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa); + } + dsl_dataset_rele(hds, FTAG); + return (error); +} + +static int +dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp, + dsl_dataset_t *hds, void *arg) +{ +#ifdef __FreeBSD__ +#ifdef _KERNEL + char *oldname, *newname; +#endif +#endif + dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; + dsl_dataset_t *ds; + uint64_t val; + dmu_tx_t *tx = ddrsa->ddrsa_tx; + int error; - ASSERT(ds->ds_phys->ds_next_snap_obj != 0); + error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); + ASSERT(error == 0 || error == ENOENT); + if (error == ENOENT) { + /* ignore nonexistent snapshots */ + return (0); + } - VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, - dd->dd_phys->dd_head_dataset_obj, FTAG, &hds)); + VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds)); - VERIFY(0 == dsl_dataset_get_snapname(ds)); - err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx); - ASSERT0(err); - dsl_dataset_name(ds, oldname); + /* log before we change the name */ + spa_history_log_internal_ds(ds, "rename", tx, + "-> @%s", ddrsa->ddrsa_newsnapname); + + VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx)); mutex_enter(&ds->ds_lock); - (void) strcpy(ds->ds_snapname, newsnapname); + (void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname); mutex_exit(&ds->ds_lock); - err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, - ds->ds_snapname, 8, 1, &ds->ds_object, tx); - ASSERT0(err); - dsl_dataset_name(ds, newname); + VERIFY0(zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj, + ds->ds_snapname, 8, 1, &ds->ds_object, tx)); + +#ifdef __FreeBSD__ #ifdef _KERNEL + oldname = kmem_alloc(MAXPATHLEN, KM_SLEEP); + newname = kmem_alloc(MAXPATHLEN, KM_SLEEP); + snprintf(oldname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname, + ddrsa->ddrsa_oldsnapname); + snprintf(newname, MAXPATHLEN, "%s@%s", ddrsa->ddrsa_fsname, + ddrsa->ddrsa_newsnapname); + zfsvfs_update_fromname(oldname, newname); zvol_rename_minors(oldname, newname); + kmem_free(newname, MAXPATHLEN); + kmem_free(oldname, MAXPATHLEN); +#endif #endif + dsl_dataset_rele(ds, FTAG); - spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, - "dataset = %llu", ds->ds_object); - dsl_dataset_rele(hds, FTAG); + return (0); } -struct renamesnaparg { - dsl_sync_task_group_t *dstg; - char failed[MAXPATHLEN]; - char *oldsnap; - char *newsnap; - int error; -}; - -static int -dsl_snapshot_rename_one(const char *name, void *arg) +static void +dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx) { - struct renamesnaparg *ra = arg; - dsl_dataset_t *ds = NULL; - char *snapname; - int err; - - snapname = kmem_asprintf("%s@%s", name, ra->oldsnap); - (void) strlcpy(ra->failed, snapname, sizeof (ra->failed)); + dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *hds; - /* - * For recursive snapshot renames the parent won't be changing - * so we just pass name for both the to/from argument. - */ - err = zfs_secpolicy_rename_perms(snapname, snapname, CRED()); - if (err != 0) { - strfree(snapname); - return (err == ENOENT ? 0 : err); + VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds)); + ddrsa->ddrsa_tx = tx; + if (ddrsa->ddrsa_recursive) { + VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object, + dsl_dataset_rename_snapshot_sync_impl, ddrsa, + DS_FIND_CHILDREN)); + } else { + VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa)); } + dsl_dataset_rele(hds, FTAG); +} -#ifdef _KERNEL - /* - * For all filesystems undergoing rename, we'll need to unmount it. - */ - (void) zfs_unmount_snap(snapname, NULL); -#endif - err = dsl_dataset_hold(snapname, ra->dstg, &ds); - strfree(snapname); - if (err != 0) - return (err == ENOENT ? 0 : err); - - dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, - dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); +int +dsl_dataset_rename_snapshot(const char *fsname, + const char *oldsnapname, const char *newsnapname, boolean_t recursive) +{ + dsl_dataset_rename_snapshot_arg_t ddrsa; - /* First successful rename clears the error. */ - ra->error = 0; + ddrsa.ddrsa_fsname = fsname; + ddrsa.ddrsa_oldsnapname = oldsnapname; + ddrsa.ddrsa_newsnapname = newsnapname; + ddrsa.ddrsa_recursive = recursive; - return (0); + return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check, + dsl_dataset_rename_snapshot_sync, &ddrsa, 1)); } static int -dsl_recursive_rename(char *oldname, const char *newname) +dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx) { - int err; - struct renamesnaparg *ra; - dsl_sync_task_t *dst; - spa_t *spa; - char *cp, *fsname = spa_strdup(oldname); - int len = strlen(oldname) + 1; + const char *fsname = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + int64_t unused_refres_delta; + int error; - /* truncate the snapshot name to get the fsname */ - cp = strchr(fsname, '@'); - *cp = '\0'; + error = dsl_dataset_hold(dp, fsname, FTAG, &ds); + if (error != 0) + return (error); - err = spa_open(fsname, &spa, FTAG); - if (err) { - kmem_free(fsname, len); - return (err); + /* must not be a snapshot */ + if (dsl_dataset_is_snapshot(ds)) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EINVAL)); } - ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); - ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - - ra->oldsnap = strchr(oldname, '@') + 1; - ra->newsnap = strchr(newname, '@') + 1; - *ra->failed = '\0'; - ra->error = ENOENT; - err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, - DS_FIND_CHILDREN); - kmem_free(fsname, len); - if (err == 0) - err = ra->error; - - if (err == 0) - err = dsl_sync_task_group_wait(ra->dstg); - - for (dst = list_head(&ra->dstg->dstg_tasks); dst; - dst = list_next(&ra->dstg->dstg_tasks, dst)) { - dsl_dataset_t *ds = dst->dst_arg1; - if (dst->dst_err) { - dsl_dir_name(ds->ds_dir, ra->failed); - (void) strlcat(ra->failed, "@", sizeof (ra->failed)); - (void) strlcat(ra->failed, ra->newsnap, - sizeof (ra->failed)); - } - dsl_dataset_rele(ds, ra->dstg); + /* must have a most recent snapshot */ + if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EINVAL)); } - if (err) - (void) strlcpy(oldname, ra->failed, sizeof (ra->failed)); + if (dsl_dataset_long_held(ds)) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EBUSY)); + } - dsl_sync_task_group_destroy(ra->dstg); - kmem_free(ra, sizeof (struct renamesnaparg)); - spa_close(spa, FTAG); - return (err); -} + /* + * Check if the snap we are rolling back to uses more than + * the refquota. + */ + if (ds->ds_quota != 0 && + ds->ds_prev->ds_phys->ds_referenced_bytes > ds->ds_quota) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EDQUOT)); + } -static int -dsl_valid_rename(const char *oldname, void *arg) -{ - int delta = *(int *)arg; + /* + * When we do the clone swap, we will temporarily use more space + * due to the refreservation (the head will no longer have any + * unique space, so the entire amount of the refreservation will need + * to be free). We will immediately destroy the clone, freeing + * this space, but the freeing happens over many txg's. + */ + unused_refres_delta = (int64_t)MIN(ds->ds_reserved, + ds->ds_phys->ds_unique_bytes); - if (strlen(oldname) + delta >= MAXNAMELEN) - return (ENAMETOOLONG); + if (unused_refres_delta > 0 && + unused_refres_delta > + dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(ENOSPC)); + } + dsl_dataset_rele(ds, FTAG); return (0); } -#pragma weak dmu_objset_rename = dsl_dataset_rename -int -dsl_dataset_rename(char *oldname, const char *newname, int flags) +static void +dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) { - dsl_dir_t *dd; - dsl_dataset_t *ds; - const char *tail; - int err; - - err = dsl_dir_open(oldname, FTAG, &dd, &tail); - if (err) - return (err); - - if (tail == NULL) { - int delta = strlen(newname) - strlen(oldname); + const char *fsname = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds, *clone; + uint64_t cloneobj; - /* if we're growing, validate child name lengths */ - if (delta > 0) - err = dmu_objset_find(oldname, dsl_valid_rename, - &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); + VERIFY0(dsl_dataset_hold(dp, fsname, FTAG, &ds)); - if (err == 0) - err = dsl_dir_rename(dd, newname, flags); - dsl_dir_close(dd, FTAG); - return (err); - } - - if (tail[0] != '@') { - /* the name ended in a nonexistent component */ - dsl_dir_close(dd, FTAG); - return (ENOENT); - } - - dsl_dir_close(dd, FTAG); + cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", + ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx); - /* new name must be snapshot in same filesystem */ - tail = strchr(newname, '@'); - if (tail == NULL) - return (EINVAL); - tail++; - if (strncmp(oldname, newname, tail - newname) != 0) - return (EXDEV); + VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone)); - if (flags & ZFS_RENAME_RECURSIVE) { - err = dsl_recursive_rename(oldname, newname); - } else { - err = dsl_dataset_hold(oldname, FTAG, &ds); - if (err) - return (err); + dsl_dataset_clone_swap_sync_impl(clone, ds, tx); + dsl_dataset_zero_zil(ds, tx); - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - dsl_dataset_snapshot_rename_check, - dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); + dsl_destroy_head_sync_impl(clone, tx); - dsl_dataset_rele(ds, FTAG); - } + dsl_dataset_rele(clone, FTAG); + dsl_dataset_rele(ds, FTAG); +} - return (err); +int +dsl_dataset_rollback(const char *fsname) +{ + return (dsl_sync_task(fsname, dsl_dataset_rollback_check, + dsl_dataset_rollback_sync, (void *)fsname, 1)); } struct promotenode { @@ -2697,49 +1850,66 @@ struct promotenode { dsl_dataset_t *ds; }; -struct promotearg { +typedef struct dsl_dataset_promote_arg { + const char *ddpa_clonename; + dsl_dataset_t *ddpa_clone; list_t shared_snaps, origin_snaps, clone_snaps; - dsl_dataset_t *origin_origin; + dsl_dataset_t *origin_origin; /* origin of the origin */ uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; char *err_ds; -}; +} dsl_dataset_promote_arg_t; static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); -static boolean_t snaplist_unstable(list_t *l); +static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, + void *tag); +static void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag); static int -dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) { - dsl_dataset_t *hds = arg1; - struct promotearg *pa = arg2; - struct promotenode *snap = list_head(&pa->shared_snaps); - dsl_dataset_t *origin_ds = snap->ds; + dsl_dataset_promote_arg_t *ddpa = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *hds; + struct promotenode *snap; + dsl_dataset_t *origin_ds; int err; uint64_t unused; - /* Check that it is a real clone */ - if (!dsl_dir_is_clone(hds->ds_dir)) - return (EINVAL); + err = promote_hold(ddpa, dp, FTAG); + if (err != 0) + return (err); + + hds = ddpa->ddpa_clone; - /* Since this is so expensive, don't do the preliminary check */ - if (!dmu_tx_is_syncing(tx)) + if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { + promote_rele(ddpa, FTAG); + return (SET_ERROR(EXDEV)); + } + + /* + * Compute and check the amount of space to transfer. Since this is + * so expensive, don't do the preliminary check. + */ + if (!dmu_tx_is_syncing(tx)) { + promote_rele(ddpa, FTAG); return (0); + } - if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) - return (EXDEV); + snap = list_head(&ddpa->shared_snaps); + origin_ds = snap->ds; /* compute origin's new unique space */ - snap = list_tail(&pa->clone_snaps); + snap = list_tail(&ddpa->clone_snaps); ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); dsl_deadlist_space_range(&snap->ds->ds_deadlist, origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, - &pa->unique, &unused, &unused); + &ddpa->unique, &unused, &unused); /* * Walk the snapshots that we are moving * * Compute space to transfer. Consider the incremental changes - * to used for each snapshot: + * to used by each snapshot: * (my used) = (prev's used) + (blocks born) - (blocks killed) * So each snapshot gave birth to: * (blocks born) = (my used) - (prev's used) + (blocks killed) @@ -2750,19 +1920,29 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) * Note however, if we stop before we reach the ORIGIN we get: * uN + kN + kN-1 + ... + kM - uM-1 */ - pa->used = origin_ds->ds_phys->ds_referenced_bytes; - pa->comp = origin_ds->ds_phys->ds_compressed_bytes; - pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; - for (snap = list_head(&pa->shared_snaps); snap; - snap = list_next(&pa->shared_snaps, snap)) { + ddpa->used = origin_ds->ds_phys->ds_referenced_bytes; + ddpa->comp = origin_ds->ds_phys->ds_compressed_bytes; + ddpa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; + for (snap = list_head(&ddpa->shared_snaps); snap; + snap = list_next(&ddpa->shared_snaps, snap)) { uint64_t val, dlused, dlcomp, dluncomp; dsl_dataset_t *ds = snap->ds; + /* + * If there are long holds, we won't be able to evict + * the objset. + */ + if (dsl_dataset_long_held(ds)) { + err = SET_ERROR(EBUSY); + goto out; + } + /* Check that the snapshot name does not conflict */ - VERIFY(0 == dsl_dataset_get_snapname(ds)); + VERIFY0(dsl_dataset_get_snapname(ds)); err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); if (err == 0) { - err = EEXIST; + (void) strcpy(ddpa->err_ds, snap->ds->ds_snapname); + err = SET_ERROR(EEXIST); goto out; } if (err != ENOENT) @@ -2774,26 +1954,27 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); - pa->used += dlused; - pa->comp += dlcomp; - pa->uncomp += dluncomp; + ddpa->used += dlused; + ddpa->comp += dlcomp; + ddpa->uncomp += dluncomp; } /* * If we are a clone of a clone then we never reached ORIGIN, * so we need to subtract out the clone origin's used space. */ - if (pa->origin_origin) { - pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes; - pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes; - pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes; + if (ddpa->origin_origin) { + ddpa->used -= ddpa->origin_origin->ds_phys->ds_referenced_bytes; + ddpa->comp -= ddpa->origin_origin->ds_phys->ds_compressed_bytes; + ddpa->uncomp -= + ddpa->origin_origin->ds_phys->ds_uncompressed_bytes; } /* Check that there is enough space here */ err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, - pa->used); - if (err) - return (err); + ddpa->used); + if (err != 0) + goto out; /* * Compute the amounts of space that will be used by snapshots @@ -2811,68 +1992,75 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) * calls will be fast because they do not have to * iterate over all bps. */ - snap = list_head(&pa->origin_snaps); - err = snaplist_space(&pa->shared_snaps, - snap->ds->ds_dir->dd_origin_txg, &pa->cloneusedsnap); - if (err) - return (err); + snap = list_head(&ddpa->origin_snaps); + err = snaplist_space(&ddpa->shared_snaps, + snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap); + if (err != 0) + goto out; - err = snaplist_space(&pa->clone_snaps, + err = snaplist_space(&ddpa->clone_snaps, snap->ds->ds_dir->dd_origin_txg, &space); - if (err) - return (err); - pa->cloneusedsnap += space; + if (err != 0) + goto out; + ddpa->cloneusedsnap += space; } if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { - err = snaplist_space(&pa->origin_snaps, - origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap); - if (err) - return (err); + err = snaplist_space(&ddpa->origin_snaps, + origin_ds->ds_phys->ds_creation_txg, &ddpa->originusedsnap); + if (err != 0) + goto out; } - return (0); out: - pa->err_ds = snap->ds->ds_snapname; + promote_rele(ddpa, FTAG); return (err); } static void -dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) { - dsl_dataset_t *hds = arg1; - struct promotearg *pa = arg2; - struct promotenode *snap = list_head(&pa->shared_snaps); - dsl_dataset_t *origin_ds = snap->ds; + dsl_dataset_promote_arg_t *ddpa = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *hds; + struct promotenode *snap; + dsl_dataset_t *origin_ds; dsl_dataset_t *origin_head; - dsl_dir_t *dd = hds->ds_dir; - dsl_pool_t *dp = hds->ds_dir->dd_pool; + dsl_dir_t *dd; dsl_dir_t *odd = NULL; uint64_t oldnext_obj; int64_t delta; - ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); + VERIFY0(promote_hold(ddpa, dp, FTAG)); + hds = ddpa->ddpa_clone; - snap = list_head(&pa->origin_snaps); + ASSERT0(hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE); + + snap = list_head(&ddpa->shared_snaps); + origin_ds = snap->ds; + dd = hds->ds_dir; + + snap = list_head(&ddpa->origin_snaps); origin_head = snap->ds; /* * We need to explicitly open odd, since origin_ds's dd will be * changing. */ - VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, + VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object, NULL, FTAG, &odd)); /* change origin's next snap */ dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; - snap = list_tail(&pa->clone_snaps); + snap = list_tail(&ddpa->clone_snaps); ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; /* change the origin's next clone */ if (origin_ds->ds_phys->ds_next_clones_obj) { - remove_from_next_clones(origin_ds, snap->ds->ds_object, tx); - VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, + dsl_dataset_remove_from_next_clones(origin_ds, + snap->ds->ds_object, tx); + VERIFY0(zap_add_int(dp->dp_meta_objset, origin_ds->ds_phys->ds_next_clones_obj, oldnext_obj, tx)); } @@ -2889,39 +2077,43 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) /* change dd_clone entries */ if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { - VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + VERIFY0(zap_remove_int(dp->dp_meta_objset, odd->dd_phys->dd_clones, hds->ds_object, tx)); - VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, - pa->origin_origin->ds_dir->dd_phys->dd_clones, + VERIFY0(zap_add_int(dp->dp_meta_objset, + ddpa->origin_origin->ds_dir->dd_phys->dd_clones, hds->ds_object, tx)); - VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, - pa->origin_origin->ds_dir->dd_phys->dd_clones, + VERIFY0(zap_remove_int(dp->dp_meta_objset, + ddpa->origin_origin->ds_dir->dd_phys->dd_clones, origin_head->ds_object, tx)); if (dd->dd_phys->dd_clones == 0) { dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset, DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); } - VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, + VERIFY0(zap_add_int(dp->dp_meta_objset, dd->dd_phys->dd_clones, origin_head->ds_object, tx)); - } /* move snapshots to this dir */ - for (snap = list_head(&pa->shared_snaps); snap; - snap = list_next(&pa->shared_snaps, snap)) { + for (snap = list_head(&ddpa->shared_snaps); snap; + snap = list_next(&ddpa->shared_snaps, snap)) { dsl_dataset_t *ds = snap->ds; - /* unregister props as dsl_dir is changing */ + /* + * Property callbacks are registered to a particular + * dsl_dir. Since ours is changing, evict the objset + * so that they will be unregistered from the old dsl_dir. + */ if (ds->ds_objset) { dmu_objset_evict(ds->ds_objset); ds->ds_objset = NULL; } + /* move snap name entry */ - VERIFY(0 == dsl_dataset_get_snapname(ds)); - VERIFY(0 == dsl_dataset_snap_remove(origin_head, + VERIFY0(dsl_dataset_get_snapname(ds)); + VERIFY0(dsl_dataset_snap_remove(origin_head, ds->ds_snapname, tx)); - VERIFY(0 == zap_add(dp->dp_meta_objset, + VERIFY0(zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 8, 1, &ds->ds_object, tx)); @@ -2930,8 +2122,8 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); ds->ds_phys->ds_dir_obj = dd->dd_object; ASSERT3P(ds->ds_dir, ==, odd); - dsl_dir_close(ds->ds_dir, ds); - VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, + dsl_dir_rele(ds->ds_dir, ds); + VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object, NULL, ds, &ds->ds_dir)); /* move any clone references */ @@ -2955,1259 +2147,674 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) continue; } - VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, - za.za_first_integer, FTAG, &cnds)); - o = cnds->ds_dir->dd_phys->dd_head_dataset_obj; - - VERIFY3U(zap_remove_int(dp->dp_meta_objset, - odd->dd_phys->dd_clones, o, tx), ==, 0); - VERIFY3U(zap_add_int(dp->dp_meta_objset, - dd->dd_phys->dd_clones, o, tx), ==, 0); - dsl_dataset_rele(cnds, FTAG); - } - zap_cursor_fini(&zc); - } - - ASSERT0(dsl_prop_numcb(ds)); - } - - /* - * Change space accounting. - * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either - * both be valid, or both be 0 (resulting in delta == 0). This - * is true for each of {clone,origin} independently. - */ - - delta = pa->cloneusedsnap - - dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; - ASSERT3S(delta, >=, 0); - ASSERT3U(pa->used, >=, delta); - dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); - dsl_dir_diduse_space(dd, DD_USED_HEAD, - pa->used - delta, pa->comp, pa->uncomp, tx); - - delta = pa->originusedsnap - - odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; - ASSERT3S(delta, <=, 0); - ASSERT3U(pa->used, >=, -delta); - dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); - dsl_dir_diduse_space(odd, DD_USED_HEAD, - -pa->used - delta, -pa->comp, -pa->uncomp, tx); - - origin_ds->ds_phys->ds_unique_bytes = pa->unique; - - /* log history record */ - spa_history_log_internal(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, - "dataset = %llu", hds->ds_object); - - dsl_dir_close(odd, FTAG); -} - -static char *snaplist_tag = "snaplist"; -/* - * Make a list of dsl_dataset_t's for the snapshots between first_obj - * (exclusive) and last_obj (inclusive). The list will be in reverse - * order (last_obj will be the list_head()). If first_obj == 0, do all - * snapshots back to this dataset's origin. - */ -static int -snaplist_make(dsl_pool_t *dp, boolean_t own, - uint64_t first_obj, uint64_t last_obj, list_t *l) -{ - uint64_t obj = last_obj; - - ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock)); - - list_create(l, sizeof (struct promotenode), - offsetof(struct promotenode, link)); - - while (obj != first_obj) { - dsl_dataset_t *ds; - struct promotenode *snap; - int err; - - if (own) { - err = dsl_dataset_own_obj(dp, obj, - 0, snaplist_tag, &ds); - if (err == 0) - dsl_dataset_make_exclusive(ds, snaplist_tag); - } else { - err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds); - } - if (err == ENOENT) { - /* lost race with snapshot destroy */ - struct promotenode *last = list_tail(l); - ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj); - obj = last->ds->ds_phys->ds_prev_snap_obj; - continue; - } else if (err) { - return (err); - } - - if (first_obj == 0) - first_obj = ds->ds_dir->dd_phys->dd_origin_obj; - - snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP); - snap->ds = ds; - list_insert_tail(l, snap); - obj = ds->ds_phys->ds_prev_snap_obj; - } - - return (0); -} - -static int -snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) -{ - struct promotenode *snap; - - *spacep = 0; - for (snap = list_head(l); snap; snap = list_next(l, snap)) { - uint64_t used, comp, uncomp; - dsl_deadlist_space_range(&snap->ds->ds_deadlist, - mintxg, UINT64_MAX, &used, &comp, &uncomp); - *spacep += used; - } - return (0); -} - -static void -snaplist_destroy(list_t *l, boolean_t own) -{ - struct promotenode *snap; - - if (!l || !list_link_active(&l->list_head)) - return; - - while ((snap = list_tail(l)) != NULL) { - list_remove(l, snap); - if (own) - dsl_dataset_disown(snap->ds, snaplist_tag); - else - dsl_dataset_rele(snap->ds, snaplist_tag); - kmem_free(snap, sizeof (struct promotenode)); - } - list_destroy(l); -} - -/* - * Promote a clone. Nomenclature note: - * "clone" or "cds": the original clone which is being promoted - * "origin" or "ods": the snapshot which is originally clone's origin - * "origin head" or "ohds": the dataset which is the head - * (filesystem/volume) for the origin - * "origin origin": the origin of the origin's filesystem (typically - * NULL, indicating that the clone is not a clone of a clone). - */ -int -dsl_dataset_promote(const char *name, char *conflsnap) -{ - dsl_dataset_t *ds; - dsl_dir_t *dd; - dsl_pool_t *dp; - dmu_object_info_t doi; - struct promotearg pa = { 0 }; - struct promotenode *snap; - int err; - - err = dsl_dataset_hold(name, FTAG, &ds); - if (err) - return (err); - dd = ds->ds_dir; - dp = dd->dd_pool; - - err = dmu_object_info(dp->dp_meta_objset, - ds->ds_phys->ds_snapnames_zapobj, &doi); - if (err) { - dsl_dataset_rele(ds, FTAG); - return (err); - } - - if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) { - dsl_dataset_rele(ds, FTAG); - return (EINVAL); - } - - /* - * We are going to inherit all the snapshots taken before our - * origin (i.e., our new origin will be our parent's origin). - * Take ownership of them so that we can rename them into our - * namespace. - */ - rw_enter(&dp->dp_config_rwlock, RW_READER); - - err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj, - &pa.shared_snaps); - if (err != 0) - goto out; - - err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps); - if (err != 0) - goto out; - - snap = list_head(&pa.shared_snaps); - ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); - err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj, - snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps); - if (err != 0) - goto out; - - if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) { - err = dsl_dataset_hold_obj(dp, - snap->ds->ds_dir->dd_phys->dd_origin_obj, - FTAG, &pa.origin_origin); - if (err != 0) - goto out; - } - -out: - rw_exit(&dp->dp_config_rwlock); - - /* - * Add in 128x the snapnames zapobj size, since we will be moving - * a bunch of snapnames to the promoted ds, and dirtying their - * bonus buffers. - */ - if (err == 0) { - err = dsl_sync_task_do(dp, dsl_dataset_promote_check, - dsl_dataset_promote_sync, ds, &pa, - 2 + 2 * doi.doi_physical_blocks_512); - if (err && pa.err_ds && conflsnap) - (void) strncpy(conflsnap, pa.err_ds, MAXNAMELEN); - } - - snaplist_destroy(&pa.shared_snaps, B_TRUE); - snaplist_destroy(&pa.clone_snaps, B_FALSE); - snaplist_destroy(&pa.origin_snaps, B_FALSE); - if (pa.origin_origin) - dsl_dataset_rele(pa.origin_origin, FTAG); - dsl_dataset_rele(ds, FTAG); - return (err); -} - -struct cloneswaparg { - dsl_dataset_t *cds; /* clone dataset */ - dsl_dataset_t *ohds; /* origin's head dataset */ - boolean_t force; - int64_t unused_refres_delta; /* change in unconsumed refreservation */ -}; - -/* ARGSUSED */ -static int -dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) -{ - struct cloneswaparg *csa = arg1; - - /* they should both be heads */ - if (dsl_dataset_is_snapshot(csa->cds) || - dsl_dataset_is_snapshot(csa->ohds)) - return (EINVAL); - - /* the branch point should be just before them */ - if (csa->cds->ds_prev != csa->ohds->ds_prev) - return (EINVAL); - - /* cds should be the clone (unless they are unrelated) */ - if (csa->cds->ds_prev != NULL && - csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap && - csa->ohds->ds_object != - csa->cds->ds_prev->ds_phys->ds_next_snap_obj) - return (EINVAL); - - /* the clone should be a child of the origin */ - if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) - return (EINVAL); - - /* ohds shouldn't be modified unless 'force' */ - if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) - return (ETXTBSY); - - /* adjust amount of any unconsumed refreservation */ - csa->unused_refres_delta = - (int64_t)MIN(csa->ohds->ds_reserved, - csa->ohds->ds_phys->ds_unique_bytes) - - (int64_t)MIN(csa->ohds->ds_reserved, - csa->cds->ds_phys->ds_unique_bytes); - - if (csa->unused_refres_delta > 0 && - csa->unused_refres_delta > - dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE)) - return (ENOSPC); - - if (csa->ohds->ds_quota != 0 && - csa->cds->ds_phys->ds_unique_bytes > csa->ohds->ds_quota) - return (EDQUOT); - - return (0); -} - -/* ARGSUSED */ -static void -dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx) -{ - struct cloneswaparg *csa = arg1; - dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; - - ASSERT(csa->cds->ds_reserved == 0); - ASSERT(csa->ohds->ds_quota == 0 || - csa->cds->ds_phys->ds_unique_bytes <= csa->ohds->ds_quota); - - dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); - dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); - - if (csa->cds->ds_objset != NULL) { - dmu_objset_evict(csa->cds->ds_objset); - csa->cds->ds_objset = NULL; - } - - if (csa->ohds->ds_objset != NULL) { - dmu_objset_evict(csa->ohds->ds_objset); - csa->ohds->ds_objset = NULL; - } - - /* - * Reset origin's unique bytes, if it exists. - */ - if (csa->cds->ds_prev) { - dsl_dataset_t *origin = csa->cds->ds_prev; - uint64_t comp, uncomp; - - dmu_buf_will_dirty(origin->ds_dbuf, tx); - dsl_deadlist_space_range(&csa->cds->ds_deadlist, - origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, - &origin->ds_phys->ds_unique_bytes, &comp, &uncomp); - } - - /* swap blkptrs */ - { - blkptr_t tmp; - tmp = csa->ohds->ds_phys->ds_bp; - csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; - csa->cds->ds_phys->ds_bp = tmp; - } - - /* set dd_*_bytes */ - { - int64_t dused, dcomp, duncomp; - uint64_t cdl_used, cdl_comp, cdl_uncomp; - uint64_t odl_used, odl_comp, odl_uncomp; - - ASSERT3U(csa->cds->ds_dir->dd_phys-> - dd_used_breakdown[DD_USED_SNAP], ==, 0); - - dsl_deadlist_space(&csa->cds->ds_deadlist, - &cdl_used, &cdl_comp, &cdl_uncomp); - dsl_deadlist_space(&csa->ohds->ds_deadlist, - &odl_used, &odl_comp, &odl_uncomp); - - dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used - - (csa->ohds->ds_phys->ds_referenced_bytes + odl_used); - dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - - (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); - duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + - cdl_uncomp - - (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); - - dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD, - dused, dcomp, duncomp, tx); - dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD, - -dused, -dcomp, -duncomp, tx); - - /* - * The difference in the space used by snapshots is the - * difference in snapshot space due to the head's - * deadlist (since that's the only thing that's - * changing that affects the snapused). - */ - dsl_deadlist_space_range(&csa->cds->ds_deadlist, - csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, - &cdl_used, &cdl_comp, &cdl_uncomp); - dsl_deadlist_space_range(&csa->ohds->ds_deadlist, - csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, - &odl_used, &odl_comp, &odl_uncomp); - dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used, - DD_USED_HEAD, DD_USED_SNAP, tx); - } - - /* swap ds_*_bytes */ - SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes, - csa->cds->ds_phys->ds_referenced_bytes); - SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, - csa->cds->ds_phys->ds_compressed_bytes); - SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, - csa->cds->ds_phys->ds_uncompressed_bytes); - SWITCH64(csa->ohds->ds_phys->ds_unique_bytes, - csa->cds->ds_phys->ds_unique_bytes); + VERIFY0(dsl_dataset_hold_obj(dp, + za.za_first_integer, FTAG, &cnds)); + o = cnds->ds_dir->dd_phys->dd_head_dataset_obj; - /* apply any parent delta for change in unconsumed refreservation */ - dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV, - csa->unused_refres_delta, 0, 0, tx); + VERIFY0(zap_remove_int(dp->dp_meta_objset, + odd->dd_phys->dd_clones, o, tx)); + VERIFY0(zap_add_int(dp->dp_meta_objset, + dd->dd_phys->dd_clones, o, tx)); + dsl_dataset_rele(cnds, FTAG); + } + zap_cursor_fini(&zc); + } + + ASSERT(!dsl_prop_hascb(ds)); + } /* - * Swap deadlists. + * Change space accounting. + * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either + * both be valid, or both be 0 (resulting in delta == 0). This + * is true for each of {clone,origin} independently. */ - dsl_deadlist_close(&csa->cds->ds_deadlist); - dsl_deadlist_close(&csa->ohds->ds_deadlist); - SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, - csa->cds->ds_phys->ds_deadlist_obj); - dsl_deadlist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, - csa->cds->ds_phys->ds_deadlist_obj); - dsl_deadlist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, - csa->ohds->ds_phys->ds_deadlist_obj); - - dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx); -} -/* - * Swap 'clone' with its origin head datasets. Used at the end of "zfs - * recv" into an existing fs to swizzle the file system to the new - * version, and by "zfs rollback". Can also be used to swap two - * independent head datasets if neither has any snapshots. - */ -int -dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, - boolean_t force) -{ - struct cloneswaparg csa; - int error; + delta = ddpa->cloneusedsnap - + dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; + ASSERT3S(delta, >=, 0); + ASSERT3U(ddpa->used, >=, delta); + dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); + dsl_dir_diduse_space(dd, DD_USED_HEAD, + ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx); - ASSERT(clone->ds_owner); - ASSERT(origin_head->ds_owner); -retry: - /* - * Need exclusive access for the swap. If we're swapping these - * datasets back after an error, we already hold the locks. - */ - if (!RW_WRITE_HELD(&clone->ds_rwlock)) - rw_enter(&clone->ds_rwlock, RW_WRITER); - if (!RW_WRITE_HELD(&origin_head->ds_rwlock) && - !rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) { - rw_exit(&clone->ds_rwlock); - rw_enter(&origin_head->ds_rwlock, RW_WRITER); - if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) { - rw_exit(&origin_head->ds_rwlock); - goto retry; - } - } - csa.cds = clone; - csa.ohds = origin_head; - csa.force = force; - error = dsl_sync_task_do(clone->ds_dir->dd_pool, - dsl_dataset_clone_swap_check, - dsl_dataset_clone_swap_sync, &csa, NULL, 9); - return (error); -} + delta = ddpa->originusedsnap - + odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; + ASSERT3S(delta, <=, 0); + ASSERT3U(ddpa->used, >=, -delta); + dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); + dsl_dir_diduse_space(odd, DD_USED_HEAD, + -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx); -/* - * Given a pool name and a dataset object number in that pool, - * return the name of that dataset. - */ -int -dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) -{ - spa_t *spa; - dsl_pool_t *dp; - dsl_dataset_t *ds; - int error; + origin_ds->ds_phys->ds_unique_bytes = ddpa->unique; - if ((error = spa_open(pname, &spa, FTAG)) != 0) - return (error); - dp = spa_get_dsl(spa); - rw_enter(&dp->dp_config_rwlock, RW_READER); - if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) { - dsl_dataset_name(ds, buf); - dsl_dataset_rele(ds, FTAG); - } - rw_exit(&dp->dp_config_rwlock); - spa_close(spa, FTAG); + /* log history record */ + spa_history_log_internal_ds(hds, "promote", tx, ""); - return (error); + dsl_dir_rele(odd, FTAG); + promote_rele(ddpa, FTAG); } -int -dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, - uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) +/* + * Make a list of dsl_dataset_t's for the snapshots between first_obj + * (exclusive) and last_obj (inclusive). The list will be in reverse + * order (last_obj will be the list_head()). If first_obj == 0, do all + * snapshots back to this dataset's origin. + */ +static int +snaplist_make(dsl_pool_t *dp, + uint64_t first_obj, uint64_t last_obj, list_t *l, void *tag) { - int error = 0; + uint64_t obj = last_obj; - ASSERT3S(asize, >, 0); + list_create(l, sizeof (struct promotenode), + offsetof(struct promotenode, link)); - /* - * *ref_rsrv is the portion of asize that will come from any - * unconsumed refreservation space. - */ - *ref_rsrv = 0; + while (obj != first_obj) { + dsl_dataset_t *ds; + struct promotenode *snap; + int err; - mutex_enter(&ds->ds_lock); - /* - * Make a space adjustment for reserved bytes. - */ - if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { - ASSERT3U(*used, >=, - ds->ds_reserved - ds->ds_phys->ds_unique_bytes); - *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); - *ref_rsrv = - asize - MIN(asize, parent_delta(ds, asize + inflight)); - } + err = dsl_dataset_hold_obj(dp, obj, tag, &ds); + ASSERT(err != ENOENT); + if (err != 0) + return (err); - if (!check_quota || ds->ds_quota == 0) { - mutex_exit(&ds->ds_lock); - return (0); - } - /* - * If they are requesting more space, and our current estimate - * is over quota, they get to try again unless the actual - * on-disk is over quota and there are no pending changes (which - * may free up space for us). - */ - if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) { - if (inflight > 0 || - ds->ds_phys->ds_referenced_bytes < ds->ds_quota) - error = ERESTART; - else - error = EDQUOT; + if (first_obj == 0) + first_obj = ds->ds_dir->dd_phys->dd_origin_obj; + + snap = kmem_alloc(sizeof (*snap), KM_SLEEP); + snap->ds = ds; + list_insert_tail(l, snap); + obj = ds->ds_phys->ds_prev_snap_obj; } - mutex_exit(&ds->ds_lock); - return (error); + return (0); } -/* ARGSUSED */ static int -dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) +snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) { - dsl_dataset_t *ds = arg1; - dsl_prop_setarg_t *psa = arg2; - int err; - - if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) - return (ENOTSUP); - - if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) - return (err); - - if (psa->psa_effective_value == 0) - return (0); - - if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes || - psa->psa_effective_value < ds->ds_reserved) - return (ENOSPC); + struct promotenode *snap; + *spacep = 0; + for (snap = list_head(l); snap; snap = list_next(l, snap)) { + uint64_t used, comp, uncomp; + dsl_deadlist_space_range(&snap->ds->ds_deadlist, + mintxg, UINT64_MAX, &used, &comp, &uncomp); + *spacep += used; + } return (0); } -extern void dsl_prop_set_sync(void *, void *, dmu_tx_t *); - -void -dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) +static void +snaplist_destroy(list_t *l, void *tag) { - dsl_dataset_t *ds = arg1; - dsl_prop_setarg_t *psa = arg2; - uint64_t effective_value = psa->psa_effective_value; + struct promotenode *snap; - dsl_prop_set_sync(ds, psa, tx); - DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa); + if (l == NULL || !list_link_active(&l->list_head)) + return; - if (ds->ds_quota != effective_value) { - dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_quota = effective_value; + while ((snap = list_tail(l)) != NULL) { + list_remove(l, snap); + dsl_dataset_rele(snap->ds, tag); + kmem_free(snap, sizeof (*snap)); } + list_destroy(l); } -int -dsl_dataset_set_quota(const char *dsname, zprop_source_t source, uint64_t quota) +static int +promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag) { - dsl_dataset_t *ds; - dsl_prop_setarg_t psa; - int err; + int error; + dsl_dir_t *dd; + struct promotenode *snap; - dsl_prop_setarg_init_uint64(&psa, "refquota", source, "a); + error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag, + &ddpa->ddpa_clone); + if (error != 0) + return (error); + dd = ddpa->ddpa_clone->ds_dir; - err = dsl_dataset_hold(dsname, FTAG, &ds); - if (err) - return (err); + if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) || + !dsl_dir_is_clone(dd)) { + dsl_dataset_rele(ddpa->ddpa_clone, tag); + return (SET_ERROR(EINVAL)); + } - /* - * If someone removes a file, then tries to set the quota, we - * want to make sure the file freeing takes effect. - */ - txg_wait_open(ds->ds_dir->dd_pool, 0); + error = snaplist_make(dp, 0, dd->dd_phys->dd_origin_obj, + &ddpa->shared_snaps, tag); + if (error != 0) + goto out; - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync, - ds, &psa, 0); + error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object, + &ddpa->clone_snaps, tag); + if (error != 0) + goto out; - dsl_dataset_rele(ds, FTAG); - return (err); + snap = list_head(&ddpa->shared_snaps); + ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); + error = snaplist_make(dp, dd->dd_phys->dd_origin_obj, + snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, + &ddpa->origin_snaps, tag); + if (error != 0) + goto out; + + if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) { + error = dsl_dataset_hold_obj(dp, + snap->ds->ds_dir->dd_phys->dd_origin_obj, + tag, &ddpa->origin_origin); + if (error != 0) + goto out; + } +out: + if (error != 0) + promote_rele(ddpa, tag); + return (error); } -static int -dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) +static void +promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag) { - dsl_dataset_t *ds = arg1; - dsl_prop_setarg_t *psa = arg2; - uint64_t effective_value; - uint64_t unique; - int err; - - if (spa_version(ds->ds_dir->dd_pool->dp_spa) < - SPA_VERSION_REFRESERVATION) - return (ENOTSUP); - - if (dsl_dataset_is_snapshot(ds)) - return (EINVAL); - - if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) - return (err); + snaplist_destroy(&ddpa->shared_snaps, tag); + snaplist_destroy(&ddpa->clone_snaps, tag); + snaplist_destroy(&ddpa->origin_snaps, tag); + if (ddpa->origin_origin != NULL) + dsl_dataset_rele(ddpa->origin_origin, tag); + dsl_dataset_rele(ddpa->ddpa_clone, tag); +} - effective_value = psa->psa_effective_value; +/* + * Promote a clone. + * + * If it fails due to a conflicting snapshot name, "conflsnap" will be filled + * in with the name. (It must be at least MAXNAMELEN bytes long.) + */ +int +dsl_dataset_promote(const char *name, char *conflsnap) +{ + dsl_dataset_promote_arg_t ddpa = { 0 }; + uint64_t numsnaps; + int error; + objset_t *os; /* - * If we are doing the preliminary check in open context, the - * space estimates may be inaccurate. + * We will modify space proportional to the number of + * snapshots. Compute numsnaps. */ - if (!dmu_tx_is_syncing(tx)) - return (0); - - mutex_enter(&ds->ds_lock); - if (!DS_UNIQUE_IS_ACCURATE(ds)) - dsl_dataset_recalc_head_uniq(ds); - unique = ds->ds_phys->ds_unique_bytes; - mutex_exit(&ds->ds_lock); - - if (MAX(unique, effective_value) > MAX(unique, ds->ds_reserved)) { - uint64_t delta = MAX(unique, effective_value) - - MAX(unique, ds->ds_reserved); + error = dmu_objset_hold(name, FTAG, &os); + if (error != 0) + return (error); + error = zap_count(dmu_objset_pool(os)->dp_meta_objset, + dmu_objset_ds(os)->ds_phys->ds_snapnames_zapobj, &numsnaps); + dmu_objset_rele(os, FTAG); + if (error != 0) + return (error); - if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) - return (ENOSPC); - if (ds->ds_quota > 0 && - effective_value > ds->ds_quota) - return (ENOSPC); - } + ddpa.ddpa_clonename = name; + ddpa.err_ds = conflsnap; - return (0); + return (dsl_sync_task(name, dsl_dataset_promote_check, + dsl_dataset_promote_sync, &ddpa, 2 + numsnaps)); } -static void -dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) +int +dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, + dsl_dataset_t *origin_head, boolean_t force) { - dsl_dataset_t *ds = arg1; - dsl_prop_setarg_t *psa = arg2; - uint64_t effective_value = psa->psa_effective_value; - uint64_t unique; - int64_t delta; + int64_t unused_refres_delta; - dsl_prop_set_sync(ds, psa, tx); - DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa); + /* they should both be heads */ + if (dsl_dataset_is_snapshot(clone) || + dsl_dataset_is_snapshot(origin_head)) + return (SET_ERROR(EINVAL)); - dmu_buf_will_dirty(ds->ds_dbuf, tx); + /* the branch point should be just before them */ + if (clone->ds_prev != origin_head->ds_prev) + return (SET_ERROR(EINVAL)); - mutex_enter(&ds->ds_dir->dd_lock); - mutex_enter(&ds->ds_lock); - ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); - unique = ds->ds_phys->ds_unique_bytes; - delta = MAX(0, (int64_t)(effective_value - unique)) - - MAX(0, (int64_t)(ds->ds_reserved - unique)); - ds->ds_reserved = effective_value; - mutex_exit(&ds->ds_lock); + /* clone should be the clone (unless they are unrelated) */ + if (clone->ds_prev != NULL && + clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap && + origin_head->ds_object != + clone->ds_prev->ds_phys->ds_next_snap_obj) + return (SET_ERROR(EINVAL)); - dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); - mutex_exit(&ds->ds_dir->dd_lock); + /* the clone should be a child of the origin */ + if (clone->ds_dir->dd_parent != origin_head->ds_dir) + return (SET_ERROR(EINVAL)); + + /* origin_head shouldn't be modified unless 'force' */ + if (!force && dsl_dataset_modified_since_lastsnap(origin_head)) + return (SET_ERROR(ETXTBSY)); + + /* origin_head should have no long holds (e.g. is not mounted) */ + if (dsl_dataset_long_held(origin_head)) + return (SET_ERROR(EBUSY)); + + /* check amount of any unconsumed refreservation */ + unused_refres_delta = + (int64_t)MIN(origin_head->ds_reserved, + origin_head->ds_phys->ds_unique_bytes) - + (int64_t)MIN(origin_head->ds_reserved, + clone->ds_phys->ds_unique_bytes); + + if (unused_refres_delta > 0 && + unused_refres_delta > + dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE)) + return (SET_ERROR(ENOSPC)); + + /* clone can't be over the head's refquota */ + if (origin_head->ds_quota != 0 && + clone->ds_phys->ds_referenced_bytes > origin_head->ds_quota) + return (SET_ERROR(EDQUOT)); + + return (0); } -int -dsl_dataset_set_reservation(const char *dsname, zprop_source_t source, - uint64_t reservation) +void +dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, + dsl_dataset_t *origin_head, dmu_tx_t *tx) { - dsl_dataset_t *ds; - dsl_prop_setarg_t psa; - int err; - - dsl_prop_setarg_init_uint64(&psa, "refreservation", source, - &reservation); + dsl_pool_t *dp = dmu_tx_pool(tx); + int64_t unused_refres_delta; - err = dsl_dataset_hold(dsname, FTAG, &ds); - if (err) - return (err); - - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - dsl_dataset_set_reservation_check, - dsl_dataset_set_reservation_sync, ds, &psa, 0); + ASSERT(clone->ds_reserved == 0); + ASSERT(origin_head->ds_quota == 0 || + clone->ds_phys->ds_unique_bytes <= origin_head->ds_quota); - dsl_dataset_rele(ds, FTAG); - return (err); -} + dmu_buf_will_dirty(clone->ds_dbuf, tx); + dmu_buf_will_dirty(origin_head->ds_dbuf, tx); -typedef struct zfs_hold_cleanup_arg { - dsl_pool_t *dp; - uint64_t dsobj; - char htag[MAXNAMELEN]; -} zfs_hold_cleanup_arg_t; + if (clone->ds_objset != NULL) { + dmu_objset_evict(clone->ds_objset); + clone->ds_objset = NULL; + } -static void -dsl_dataset_user_release_onexit(void *arg) -{ - zfs_hold_cleanup_arg_t *ca = arg; + if (origin_head->ds_objset != NULL) { + dmu_objset_evict(origin_head->ds_objset); + origin_head->ds_objset = NULL; + } - (void) dsl_dataset_user_release_tmp(ca->dp, ca->dsobj, ca->htag, - B_TRUE); - kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t)); -} + unused_refres_delta = + (int64_t)MIN(origin_head->ds_reserved, + origin_head->ds_phys->ds_unique_bytes) - + (int64_t)MIN(origin_head->ds_reserved, + clone->ds_phys->ds_unique_bytes); -void -dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, - minor_t minor) -{ - zfs_hold_cleanup_arg_t *ca; - - ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP); - ca->dp = ds->ds_dir->dd_pool; - ca->dsobj = ds->ds_object; - (void) strlcpy(ca->htag, htag, sizeof (ca->htag)); - VERIFY3U(0, ==, zfs_onexit_add_cb(minor, - dsl_dataset_user_release_onexit, ca, NULL)); -} + /* + * Reset origin's unique bytes, if it exists. + */ + if (clone->ds_prev) { + dsl_dataset_t *origin = clone->ds_prev; + uint64_t comp, uncomp; -/* - * If you add new checks here, you may need to add - * additional checks to the "temporary" case in - * snapshot_check() in dmu_objset.c. - */ -static int -dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx) -{ - dsl_dataset_t *ds = arg1; - struct dsl_ds_holdarg *ha = arg2; - char *htag = ha->htag; - objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - int error = 0; + dmu_buf_will_dirty(origin->ds_dbuf, tx); + dsl_deadlist_space_range(&clone->ds_deadlist, + origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, + &origin->ds_phys->ds_unique_bytes, &comp, &uncomp); + } - if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) - return (ENOTSUP); + /* swap blkptrs */ + { + blkptr_t tmp; + tmp = origin_head->ds_phys->ds_bp; + origin_head->ds_phys->ds_bp = clone->ds_phys->ds_bp; + clone->ds_phys->ds_bp = tmp; + } - if (!dsl_dataset_is_snapshot(ds)) - return (EINVAL); + /* set dd_*_bytes */ + { + int64_t dused, dcomp, duncomp; + uint64_t cdl_used, cdl_comp, cdl_uncomp; + uint64_t odl_used, odl_comp, odl_uncomp; - /* tags must be unique */ - mutex_enter(&ds->ds_lock); - if (ds->ds_phys->ds_userrefs_obj) { - error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag, - 8, 1, tx); - if (error == 0) - error = EEXIST; - else if (error == ENOENT) - error = 0; - } - mutex_exit(&ds->ds_lock); + ASSERT3U(clone->ds_dir->dd_phys-> + dd_used_breakdown[DD_USED_SNAP], ==, 0); - if (error == 0 && ha->temphold && - strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) - error = E2BIG; + dsl_deadlist_space(&clone->ds_deadlist, + &cdl_used, &cdl_comp, &cdl_uncomp); + dsl_deadlist_space(&origin_head->ds_deadlist, + &odl_used, &odl_comp, &odl_uncomp); - return (error); -} + dused = clone->ds_phys->ds_referenced_bytes + cdl_used - + (origin_head->ds_phys->ds_referenced_bytes + odl_used); + dcomp = clone->ds_phys->ds_compressed_bytes + cdl_comp - + (origin_head->ds_phys->ds_compressed_bytes + odl_comp); + duncomp = clone->ds_phys->ds_uncompressed_bytes + + cdl_uncomp - + (origin_head->ds_phys->ds_uncompressed_bytes + odl_uncomp); -void -dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx) -{ - dsl_dataset_t *ds = arg1; - struct dsl_ds_holdarg *ha = arg2; - char *htag = ha->htag; - dsl_pool_t *dp = ds->ds_dir->dd_pool; - objset_t *mos = dp->dp_meta_objset; - uint64_t now = gethrestime_sec(); - uint64_t zapobj; + dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD, + dused, dcomp, duncomp, tx); + dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD, + -dused, -dcomp, -duncomp, tx); - mutex_enter(&ds->ds_lock); - if (ds->ds_phys->ds_userrefs_obj == 0) { /* - * This is the first user hold for this dataset. Create - * the userrefs zap object. + * The difference in the space used by snapshots is the + * difference in snapshot space due to the head's + * deadlist (since that's the only thing that's + * changing that affects the snapused). */ - dmu_buf_will_dirty(ds->ds_dbuf, tx); - zapobj = ds->ds_phys->ds_userrefs_obj = - zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx); - } else { - zapobj = ds->ds_phys->ds_userrefs_obj; + dsl_deadlist_space_range(&clone->ds_deadlist, + origin_head->ds_dir->dd_origin_txg, UINT64_MAX, + &cdl_used, &cdl_comp, &cdl_uncomp); + dsl_deadlist_space_range(&origin_head->ds_deadlist, + origin_head->ds_dir->dd_origin_txg, UINT64_MAX, + &odl_used, &odl_comp, &odl_uncomp); + dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used, + DD_USED_HEAD, DD_USED_SNAP, tx); } - ds->ds_userrefs++; - mutex_exit(&ds->ds_lock); - VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx)); + /* swap ds_*_bytes */ + SWITCH64(origin_head->ds_phys->ds_referenced_bytes, + clone->ds_phys->ds_referenced_bytes); + SWITCH64(origin_head->ds_phys->ds_compressed_bytes, + clone->ds_phys->ds_compressed_bytes); + SWITCH64(origin_head->ds_phys->ds_uncompressed_bytes, + clone->ds_phys->ds_uncompressed_bytes); + SWITCH64(origin_head->ds_phys->ds_unique_bytes, + clone->ds_phys->ds_unique_bytes); - if (ha->temphold) { - VERIFY(0 == dsl_pool_user_hold(dp, ds->ds_object, - htag, &now, tx)); - } + /* apply any parent delta for change in unconsumed refreservation */ + dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV, + unused_refres_delta, 0, 0, tx); - spa_history_log_internal(LOG_DS_USER_HOLD, - dp->dp_spa, tx, "<%s> temp = %d dataset = %llu", htag, - (int)ha->temphold, ds->ds_object); -} + /* + * Swap deadlists. + */ + dsl_deadlist_close(&clone->ds_deadlist); + dsl_deadlist_close(&origin_head->ds_deadlist); + SWITCH64(origin_head->ds_phys->ds_deadlist_obj, + clone->ds_phys->ds_deadlist_obj); + dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset, + clone->ds_phys->ds_deadlist_obj); + dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset, + origin_head->ds_phys->ds_deadlist_obj); -static int -dsl_dataset_user_hold_one(const char *dsname, void *arg) -{ - struct dsl_ds_holdarg *ha = arg; - dsl_dataset_t *ds; - int error; - char *name; + dsl_scan_ds_clone_swapped(origin_head, clone, tx); - /* alloc a buffer to hold dsname@snapname plus terminating NULL */ - name = kmem_asprintf("%s@%s", dsname, ha->snapname); - error = dsl_dataset_hold(name, ha->dstg, &ds); - strfree(name); - if (error == 0) { - ha->gotone = B_TRUE; - dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check, - dsl_dataset_user_hold_sync, ds, ha, 0); - } else if (error == ENOENT && ha->recursive) { - error = 0; - } else { - (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); - } - return (error); + spa_history_log_internal_ds(clone, "clone swap", tx, + "parent=%s", origin_head->ds_dir->dd_myname); } +/* + * Given a pool name and a dataset object number in that pool, + * return the name of that dataset. + */ int -dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag, - boolean_t temphold) +dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) { - struct dsl_ds_holdarg *ha; + dsl_pool_t *dp; + dsl_dataset_t *ds; int error; - ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); - ha->htag = htag; - ha->temphold = temphold; - error = dsl_sync_task_do(ds->ds_dir->dd_pool, - dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync, - ds, ha, 0); - kmem_free(ha, sizeof (struct dsl_ds_holdarg)); + error = dsl_pool_hold(pname, FTAG, &dp); + if (error != 0) + return (error); + + error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); + if (error == 0) { + dsl_dataset_name(ds, buf); + dsl_dataset_rele(ds, FTAG); + } + dsl_pool_rele(dp, FTAG); return (error); } int -dsl_dataset_user_hold(char *dsname, char *snapname, char *htag, - boolean_t recursive, boolean_t temphold, int cleanup_fd) +dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, + uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) { - struct dsl_ds_holdarg *ha; - dsl_sync_task_t *dst; - spa_t *spa; - int error; - minor_t minor = 0; - - if (cleanup_fd != -1) { - /* Currently we only support cleanup-on-exit of tempholds. */ - if (!temphold) - return (EINVAL); - error = zfs_onexit_fd_hold(cleanup_fd, &minor); - if (error) - return (error); - } + int error = 0; - ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); + ASSERT3S(asize, >, 0); - (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); + /* + * *ref_rsrv is the portion of asize that will come from any + * unconsumed refreservation space. + */ + *ref_rsrv = 0; - error = spa_open(dsname, &spa, FTAG); - if (error) { - kmem_free(ha, sizeof (struct dsl_ds_holdarg)); - if (cleanup_fd != -1) - zfs_onexit_fd_rele(cleanup_fd); - return (error); + mutex_enter(&ds->ds_lock); + /* + * Make a space adjustment for reserved bytes. + */ + if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { + ASSERT3U(*used, >=, + ds->ds_reserved - ds->ds_phys->ds_unique_bytes); + *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); + *ref_rsrv = + asize - MIN(asize, parent_delta(ds, asize + inflight)); } - ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - ha->htag = htag; - ha->snapname = snapname; - ha->recursive = recursive; - ha->temphold = temphold; - - if (recursive) { - error = dmu_objset_find(dsname, dsl_dataset_user_hold_one, - ha, DS_FIND_CHILDREN); - } else { - error = dsl_dataset_user_hold_one(dsname, ha); + if (!check_quota || ds->ds_quota == 0) { + mutex_exit(&ds->ds_lock); + return (0); } - if (error == 0) - error = dsl_sync_task_group_wait(ha->dstg); - - for (dst = list_head(&ha->dstg->dstg_tasks); dst; - dst = list_next(&ha->dstg->dstg_tasks, dst)) { - dsl_dataset_t *ds = dst->dst_arg1; - - if (dst->dst_err) { - dsl_dataset_name(ds, ha->failed); - *strchr(ha->failed, '@') = '\0'; - } else if (error == 0 && minor != 0 && temphold) { - /* - * If this hold is to be released upon process exit, - * register that action now. - */ - dsl_register_onexit_hold_cleanup(ds, htag, minor); - } - dsl_dataset_rele(ds, ha->dstg); + /* + * If they are requesting more space, and our current estimate + * is over quota, they get to try again unless the actual + * on-disk is over quota and there are no pending changes (which + * may free up space for us). + */ + if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) { + if (inflight > 0 || + ds->ds_phys->ds_referenced_bytes < ds->ds_quota) + error = SET_ERROR(ERESTART); + else + error = SET_ERROR(EDQUOT); } + mutex_exit(&ds->ds_lock); - if (error == 0 && recursive && !ha->gotone) - error = ENOENT; - - if (error) - (void) strlcpy(dsname, ha->failed, sizeof (ha->failed)); - - dsl_sync_task_group_destroy(ha->dstg); - - kmem_free(ha, sizeof (struct dsl_ds_holdarg)); - spa_close(spa, FTAG); - if (cleanup_fd != -1) - zfs_onexit_fd_rele(cleanup_fd); return (error); } -struct dsl_ds_releasearg { - dsl_dataset_t *ds; - const char *htag; - boolean_t own; /* do we own or just hold ds? */ -}; +typedef struct dsl_dataset_set_qr_arg { + const char *ddsqra_name; + zprop_source_t ddsqra_source; + uint64_t ddsqra_value; +} dsl_dataset_set_qr_arg_t; + +/* ARGSUSED */ static int -dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag, - boolean_t *might_destroy) +dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx) { - objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - uint64_t zapobj; - uint64_t tmp; + dsl_dataset_set_qr_arg_t *ddsqra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; int error; + uint64_t newval; - *might_destroy = B_FALSE; + if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA) + return (SET_ERROR(ENOTSUP)); - mutex_enter(&ds->ds_lock); - zapobj = ds->ds_phys->ds_userrefs_obj; - if (zapobj == 0) { - /* The tag can't possibly exist */ - mutex_exit(&ds->ds_lock); - return (ESRCH); + error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); + if (error != 0) + return (error); + + if (dsl_dataset_is_snapshot(ds)) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EINVAL)); } - /* Make sure the tag exists */ - error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp); - if (error) { - mutex_exit(&ds->ds_lock); - if (error == ENOENT) - error = ESRCH; + error = dsl_prop_predict(ds->ds_dir, + zfs_prop_to_name(ZFS_PROP_REFQUOTA), + ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); + if (error != 0) { + dsl_dataset_rele(ds, FTAG); return (error); } - if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 && - DS_IS_DEFER_DESTROY(ds)) - *might_destroy = B_TRUE; + if (newval == 0) { + dsl_dataset_rele(ds, FTAG); + return (0); + } - mutex_exit(&ds->ds_lock); + if (newval < ds->ds_phys->ds_referenced_bytes || + newval < ds->ds_reserved) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(ENOSPC)); + } + + dsl_dataset_rele(ds, FTAG); return (0); } -static int -dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx) +static void +dsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx) { - struct dsl_ds_releasearg *ra = arg1; - dsl_dataset_t *ds = ra->ds; - boolean_t might_destroy; - int error; + dsl_dataset_set_qr_arg_t *ddsqra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + uint64_t newval; - if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) - return (ENOTSUP); + VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); - error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy); - if (error) - return (error); + dsl_prop_set_sync_impl(ds, + zfs_prop_to_name(ZFS_PROP_REFQUOTA), + ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, + &ddsqra->ddsqra_value, tx); - if (might_destroy) { - struct dsl_ds_destroyarg dsda = {0}; + VERIFY0(dsl_prop_get_int_ds(ds, + zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval)); - if (dmu_tx_is_syncing(tx)) { - /* - * If we're not prepared to remove the snapshot, - * we can't allow the release to happen right now. - */ - if (!ra->own) - return (EBUSY); - } - dsda.ds = ds; - dsda.releasing = B_TRUE; - return (dsl_dataset_destroy_check(&dsda, tag, tx)); + if (ds->ds_quota != newval) { + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_quota = newval; } - - return (0); + dsl_dataset_rele(ds, FTAG); } -static void -dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx) +int +dsl_dataset_set_refquota(const char *dsname, zprop_source_t source, + uint64_t refquota) { - struct dsl_ds_releasearg *ra = arg1; - dsl_dataset_t *ds = ra->ds; - dsl_pool_t *dp = ds->ds_dir->dd_pool; - objset_t *mos = dp->dp_meta_objset; - uint64_t zapobj; - uint64_t dsobj = ds->ds_object; - uint64_t refs; - int error; + dsl_dataset_set_qr_arg_t ddsqra; - mutex_enter(&ds->ds_lock); - ds->ds_userrefs--; - refs = ds->ds_userrefs; - mutex_exit(&ds->ds_lock); - error = dsl_pool_user_release(dp, ds->ds_object, ra->htag, tx); - VERIFY(error == 0 || error == ENOENT); - zapobj = ds->ds_phys->ds_userrefs_obj; - VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx)); - - spa_history_log_internal(LOG_DS_USER_RELEASE, - dp->dp_spa, tx, "<%s> %lld dataset = %llu", - ra->htag, (longlong_t)refs, dsobj); - - if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 && - DS_IS_DEFER_DESTROY(ds)) { - struct dsl_ds_destroyarg dsda = {0}; - - ASSERT(ra->own); - dsda.ds = ds; - dsda.releasing = B_TRUE; - /* We already did the destroy_check */ - dsl_dataset_destroy_sync(&dsda, tag, tx); - } + ddsqra.ddsqra_name = dsname; + ddsqra.ddsqra_source = source; + ddsqra.ddsqra_value = refquota; + + return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check, + dsl_dataset_set_refquota_sync, &ddsqra, 0)); } static int -dsl_dataset_user_release_one(const char *dsname, void *arg) +dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx) { - struct dsl_ds_holdarg *ha = arg; - struct dsl_ds_releasearg *ra; + dsl_dataset_set_qr_arg_t *ddsqra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; int error; - void *dtag = ha->dstg; - char *name; - boolean_t own = B_FALSE; - boolean_t might_destroy; - - /* alloc a buffer to hold dsname@snapname, plus the terminating NULL */ - name = kmem_asprintf("%s@%s", dsname, ha->snapname); - error = dsl_dataset_hold(name, dtag, &ds); - strfree(name); - if (error == ENOENT && ha->recursive) - return (0); - (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); - if (error) - return (error); - - ha->gotone = B_TRUE; + uint64_t newval, unique; - ASSERT(dsl_dataset_is_snapshot(ds)); + if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION) + return (SET_ERROR(ENOTSUP)); - error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy); - if (error) { - dsl_dataset_rele(ds, dtag); + error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); + if (error != 0) return (error); - } - if (might_destroy) { -#ifdef _KERNEL - name = kmem_asprintf("%s@%s", dsname, ha->snapname); - error = zfs_unmount_snap(name, NULL); - strfree(name); - if (error) { - dsl_dataset_rele(ds, dtag); - return (error); - } -#endif - if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) { - dsl_dataset_rele(ds, dtag); - return (EBUSY); - } else { - own = B_TRUE; - dsl_dataset_make_exclusive(ds, dtag); - } + if (dsl_dataset_is_snapshot(ds)) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EINVAL)); } - ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP); - ra->ds = ds; - ra->htag = ha->htag; - ra->own = own; - dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check, - dsl_dataset_user_release_sync, ra, dtag, 0); - - return (0); -} - -int -dsl_dataset_user_release(char *dsname, char *snapname, char *htag, - boolean_t recursive) -{ - struct dsl_ds_holdarg *ha; - dsl_sync_task_t *dst; - spa_t *spa; - int error; - -top: - ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); - - (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); - - error = spa_open(dsname, &spa, FTAG); - if (error) { - kmem_free(ha, sizeof (struct dsl_ds_holdarg)); + error = dsl_prop_predict(ds->ds_dir, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), + ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); + if (error != 0) { + dsl_dataset_rele(ds, FTAG); return (error); } - ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - ha->htag = htag; - ha->snapname = snapname; - ha->recursive = recursive; - if (recursive) { - error = dmu_objset_find(dsname, dsl_dataset_user_release_one, - ha, DS_FIND_CHILDREN); - } else { - error = dsl_dataset_user_release_one(dsname, ha); + /* + * If we are doing the preliminary check in open context, the + * space estimates may be inaccurate. + */ + if (!dmu_tx_is_syncing(tx)) { + dsl_dataset_rele(ds, FTAG); + return (0); } - if (error == 0) - error = dsl_sync_task_group_wait(ha->dstg); - - for (dst = list_head(&ha->dstg->dstg_tasks); dst; - dst = list_next(&ha->dstg->dstg_tasks, dst)) { - struct dsl_ds_releasearg *ra = dst->dst_arg1; - dsl_dataset_t *ds = ra->ds; - if (dst->dst_err) - dsl_dataset_name(ds, ha->failed); + mutex_enter(&ds->ds_lock); + if (!DS_UNIQUE_IS_ACCURATE(ds)) + dsl_dataset_recalc_head_uniq(ds); + unique = ds->ds_phys->ds_unique_bytes; + mutex_exit(&ds->ds_lock); - if (ra->own) - dsl_dataset_disown(ds, ha->dstg); - else - dsl_dataset_rele(ds, ha->dstg); + if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) { + uint64_t delta = MAX(unique, newval) - + MAX(unique, ds->ds_reserved); - kmem_free(ra, sizeof (struct dsl_ds_releasearg)); + if (delta > + dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) || + (ds->ds_quota > 0 && newval > ds->ds_quota)) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(ENOSPC)); + } } - if (error == 0 && recursive && !ha->gotone) - error = ENOENT; - - if (error && error != EBUSY) - (void) strlcpy(dsname, ha->failed, sizeof (ha->failed)); - - dsl_sync_task_group_destroy(ha->dstg); - kmem_free(ha, sizeof (struct dsl_ds_holdarg)); - spa_close(spa, FTAG); - - /* - * We can get EBUSY if we were racing with deferred destroy and - * dsl_dataset_user_release_check() hadn't done the necessary - * open context setup. We can also get EBUSY if we're racing - * with destroy and that thread is the ds_owner. Either way - * the busy condition should be transient, and we should retry - * the release operation. - */ - if (error == EBUSY) - goto top; - - return (error); + dsl_dataset_rele(ds, FTAG); + return (0); } -/* - * Called at spa_load time (with retry == B_FALSE) to release a stale - * temporary user hold. Also called by the onexit code (with retry == B_TRUE). - */ -int -dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag, - boolean_t retry) +void +dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, + zprop_source_t source, uint64_t value, dmu_tx_t *tx) { - dsl_dataset_t *ds; - char *snap; - char *name; - int namelen; - int error; + uint64_t newval; + uint64_t unique; + int64_t delta; - do { - rw_enter(&dp->dp_config_rwlock, RW_READER); - error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); - rw_exit(&dp->dp_config_rwlock); - if (error) - return (error); - namelen = dsl_dataset_namelen(ds)+1; - name = kmem_alloc(namelen, KM_SLEEP); - dsl_dataset_name(ds, name); - dsl_dataset_rele(ds, FTAG); + dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), + source, sizeof (value), 1, &value, tx); - snap = strchr(name, '@'); - *snap = '\0'; - ++snap; - error = dsl_dataset_user_release(name, snap, htag, B_FALSE); - kmem_free(name, namelen); + VERIFY0(dsl_prop_get_int_ds(ds, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval)); - /* - * The object can't have been destroyed because we have a hold, - * but it might have been renamed, resulting in ENOENT. Retry - * if we've been requested to do so. - * - * It would be nice if we could use the dsobj all the way - * through and avoid ENOENT entirely. But we might need to - * unmount the snapshot, and there's currently no way to lookup - * a vfsp using a ZFS object id. - */ - } while ((error == ENOENT) && retry); + dmu_buf_will_dirty(ds->ds_dbuf, tx); + mutex_enter(&ds->ds_dir->dd_lock); + mutex_enter(&ds->ds_lock); + ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); + unique = ds->ds_phys->ds_unique_bytes; + delta = MAX(0, (int64_t)(newval - unique)) - + MAX(0, (int64_t)(ds->ds_reserved - unique)); + ds->ds_reserved = newval; + mutex_exit(&ds->ds_lock); - return (error); + dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); + mutex_exit(&ds->ds_dir->dd_lock); } -int -dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp) +static void +dsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx) { + dsl_dataset_set_qr_arg_t *ddsqra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; - int err; - err = dsl_dataset_hold(dsname, FTAG, &ds); - if (err) - return (err); - - VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP)); - if (ds->ds_phys->ds_userrefs_obj != 0) { - zap_attribute_t *za; - zap_cursor_t zc; - - za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); - for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset, - ds->ds_phys->ds_userrefs_obj); - zap_cursor_retrieve(&zc, za) == 0; - zap_cursor_advance(&zc)) { - VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name, - za->za_first_integer)); - } - zap_cursor_fini(&zc); - kmem_free(za, sizeof (zap_attribute_t)); - } + VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); + dsl_dataset_set_refreservation_sync_impl(ds, + ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx); dsl_dataset_rele(ds, FTAG); - return (0); } -/* - * Note, this function is used as the callback for dmu_objset_find(). We - * always return 0 so that we will continue to find and process - * inconsistent datasets, even if we encounter an error trying to - * process one of them. - */ -/* ARGSUSED */ int -dsl_destroy_inconsistent(const char *dsname, void *arg) +dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source, + uint64_t refreservation) { - dsl_dataset_t *ds; + dsl_dataset_set_qr_arg_t ddsqra; - if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) { - if (DS_IS_INCONSISTENT(ds)) - (void) dsl_dataset_destroy(ds, FTAG, B_FALSE); - else - dsl_dataset_disown(ds, FTAG); - } - return (0); + ddsqra.ddsqra_name = dsname; + ddsqra.ddsqra_source = source; + ddsqra.ddsqra_value = refreservation; + + return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check, + dsl_dataset_set_refreservation_sync, &ddsqra, 0)); } /* @@ -4235,6 +2842,8 @@ dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, uint64_t snapobj; dsl_pool_t *dp = new->ds_dir->dd_pool; + ASSERT(dsl_pool_config_held(dp)); + *usedp = 0; *usedp += new->ds_phys->ds_referenced_bytes; *usedp -= oldsnap->ds_phys->ds_referenced_bytes; @@ -4247,7 +2856,6 @@ dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, *uncompp += new->ds_phys->ds_uncompressed_bytes; *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes; - rw_enter(&dp->dp_config_rwlock, RW_READER); snapobj = new->ds_object; while (snapobj != oldsnap->ds_object) { dsl_dataset_t *snap; @@ -4291,12 +2899,11 @@ dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, if (snap != new) dsl_dataset_rele(snap, FTAG); if (snapobj == 0) { - err = EINVAL; + err = SET_ERROR(EINVAL); break; } } - rw_exit(&dp->dp_config_rwlock); return (err); } @@ -4334,11 +2941,10 @@ dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, if (firstsnap->ds_dir != lastsnap->ds_dir || firstsnap->ds_phys->ds_creation_txg > lastsnap->ds_phys->ds_creation_txg) - return (EINVAL); + return (SET_ERROR(EINVAL)); *usedp = *compp = *uncompp = 0; - rw_enter(&dp->dp_config_rwlock, RW_READER); snapobj = lastsnap->ds_phys->ds_next_snap_obj; while (snapobj != firstsnap->ds_object) { dsl_dataset_t *ds; @@ -4359,6 +2965,42 @@ dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, ASSERT3U(snapobj, !=, 0); dsl_dataset_rele(ds, FTAG); } - rw_exit(&dp->dp_config_rwlock); return (err); } + +/* + * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. + * For example, they could both be snapshots of the same filesystem, and + * 'earlier' is before 'later'. Or 'earlier' could be the origin of + * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's + * filesystem. Or 'earlier' could be the origin's origin. + */ +boolean_t +dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier) +{ + dsl_pool_t *dp = later->ds_dir->dd_pool; + int error; + boolean_t ret; + + ASSERT(dsl_pool_config_held(dp)); + + if (earlier->ds_phys->ds_creation_txg >= + later->ds_phys->ds_creation_txg) + return (B_FALSE); + + if (later->ds_dir == earlier->ds_dir) + return (B_TRUE); + if (!dsl_dir_is_clone(later->ds_dir)) + return (B_FALSE); + + if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) + return (B_TRUE); + dsl_dataset_t *origin; + error = dsl_dataset_hold_obj(dp, + later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin); + if (error != 0) + return (B_FALSE); + ret = dsl_dataset_is_before(origin, earlier); + dsl_dataset_rele(origin, FTAG); + return (ret); +} diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c index 8f9d2c5f8..623eaab85 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ /* @@ -107,7 +107,7 @@ dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr) const char *perm = nvpair_name(permpair); if (strcmp(perm, ZFS_DELEG_PERM_ALLOW) == 0) - return (EPERM); + return (SET_ERROR(EPERM)); if ((error = dsl_deleg_access(ddname, perm, cr)) != 0) return (error); @@ -139,36 +139,45 @@ dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr) if (type != ZFS_DELEG_USER && type != ZFS_DELEG_USER_SETS) - return (EPERM); + return (SET_ERROR(EPERM)); if (strcmp(idstr, &nvpair_name(whopair)[3]) != 0) - return (EPERM); + return (SET_ERROR(EPERM)); } return (0); } +typedef struct dsl_deleg_arg { + const char *dda_name; + nvlist_t *dda_nvlist; +} dsl_deleg_arg_t; + static void -dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_deleg_set_sync(void *arg, dmu_tx_t *tx) { - dsl_dir_t *dd = arg1; - nvlist_t *nvp = arg2; - objset_t *mos = dd->dd_pool->dp_meta_objset; + dsl_deleg_arg_t *dda = arg; + dsl_dir_t *dd; + dsl_pool_t *dp = dmu_tx_pool(tx); + objset_t *mos = dp->dp_meta_objset; nvpair_t *whopair = NULL; - uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj; + uint64_t zapobj; + + VERIFY0(dsl_dir_hold(dp, dda->dda_name, FTAG, &dd, NULL)); + zapobj = dd->dd_phys->dd_deleg_zapobj; if (zapobj == 0) { dmu_buf_will_dirty(dd->dd_dbuf, tx); zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx); } - while (whopair = nvlist_next_nvpair(nvp, whopair)) { + while (whopair = nvlist_next_nvpair(dda->dda_nvlist, whopair)) { const char *whokey = nvpair_name(whopair); nvlist_t *perms; nvpair_t *permpair = NULL; uint64_t jumpobj; - VERIFY(nvpair_value_nvlist(whopair, &perms) == 0); + perms = fnvpair_value_nvlist(whopair); if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) { jumpobj = zap_create_link(mos, DMU_OT_DSL_PERMS, @@ -181,27 +190,31 @@ dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(zap_update(mos, jumpobj, perm, 8, 1, &n, tx) == 0); - spa_history_log_internal(LOG_DS_PERM_UPDATE, - dd->dd_pool->dp_spa, tx, - "%s %s dataset = %llu", whokey, perm, - dd->dd_phys->dd_head_dataset_obj); + spa_history_log_internal_dd(dd, "permission update", tx, + "%s %s", whokey, perm); } } + dsl_dir_rele(dd, FTAG); } static void -dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_deleg_unset_sync(void *arg, dmu_tx_t *tx) { - dsl_dir_t *dd = arg1; - nvlist_t *nvp = arg2; - objset_t *mos = dd->dd_pool->dp_meta_objset; + dsl_deleg_arg_t *dda = arg; + dsl_dir_t *dd; + dsl_pool_t *dp = dmu_tx_pool(tx); + objset_t *mos = dp->dp_meta_objset; nvpair_t *whopair = NULL; - uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj; + uint64_t zapobj; - if (zapobj == 0) + VERIFY0(dsl_dir_hold(dp, dda->dda_name, FTAG, &dd, NULL)); + zapobj = dd->dd_phys->dd_deleg_zapobj; + if (zapobj == 0) { + dsl_dir_rele(dd, FTAG); return; + } - while (whopair = nvlist_next_nvpair(nvp, whopair)) { + while (whopair = nvlist_next_nvpair(dda->dda_nvlist, whopair)) { const char *whokey = nvpair_name(whopair); nvlist_t *perms; nvpair_t *permpair = NULL; @@ -213,10 +226,8 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx) (void) zap_remove(mos, zapobj, whokey, tx); VERIFY(0 == zap_destroy(mos, jumpobj, tx)); } - spa_history_log_internal(LOG_DS_PERM_WHO_REMOVE, - dd->dd_pool->dp_spa, tx, - "%s dataset = %llu", whokey, - dd->dd_phys->dd_head_dataset_obj); + spa_history_log_internal_dd(dd, "permission who remove", + tx, "%s", whokey); continue; } @@ -234,41 +245,44 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(0 == zap_destroy(mos, jumpobj, tx)); } - spa_history_log_internal(LOG_DS_PERM_REMOVE, - dd->dd_pool->dp_spa, tx, - "%s %s dataset = %llu", whokey, perm, - dd->dd_phys->dd_head_dataset_obj); + spa_history_log_internal_dd(dd, "permission remove", tx, + "%s %s", whokey, perm); } } + dsl_dir_rele(dd, FTAG); } -int -dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset) +static int +dsl_deleg_check(void *arg, dmu_tx_t *tx) { + dsl_deleg_arg_t *dda = arg; dsl_dir_t *dd; int error; - nvpair_t *whopair = NULL; - int blocks_modified = 0; - error = dsl_dir_open(ddname, FTAG, &dd, NULL); - if (error) - return (error); - - if (spa_version(dmu_objset_spa(dd->dd_pool->dp_meta_objset)) < + if (spa_version(dmu_tx_pool(tx)->dp_spa) < SPA_VERSION_DELEGATED_PERMS) { - dsl_dir_close(dd, FTAG); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } - while (whopair = nvlist_next_nvpair(nvp, whopair)) - blocks_modified++; + error = dsl_dir_hold(dmu_tx_pool(tx), dda->dda_name, FTAG, &dd, NULL); + if (error == 0) + dsl_dir_rele(dd, FTAG); + return (error); +} - error = dsl_sync_task_do(dd->dd_pool, NULL, - unset ? dsl_deleg_unset_sync : dsl_deleg_set_sync, - dd, nvp, blocks_modified); - dsl_dir_close(dd, FTAG); +int +dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset) +{ + dsl_deleg_arg_t dda; - return (error); + /* nvp must already have been verified to be valid */ + + dda.dda_name = ddname; + dda.dda_nvlist = nvp; + + return (dsl_sync_task(ddname, dsl_deleg_check, + unset ? dsl_deleg_unset_sync : dsl_deleg_set_sync, + &dda, fnvlist_num_pairs(nvp))); } /* @@ -296,16 +310,21 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp) int error; objset_t *mos; - error = dsl_dir_open(ddname, FTAG, &startdd, NULL); - if (error) + error = dsl_pool_hold(ddname, FTAG, &dp); + if (error != 0) + return (error); + + error = dsl_dir_hold(dp, ddname, FTAG, &startdd, NULL); + if (error != 0) { + dsl_pool_rele(dp, FTAG); return (error); + } dp = startdd->dd_pool; mos = dp->dp_meta_objset; VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); - rw_enter(&dp->dp_config_rwlock, RW_READER); for (dd = startdd; dd != NULL; dd = dd->dd_parent) { zap_cursor_t basezc; zap_attribute_t baseza; @@ -313,15 +332,12 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp) uint64_t n; char source[MAXNAMELEN]; - if (dd->dd_phys->dd_deleg_zapobj && - (zap_count(mos, dd->dd_phys->dd_deleg_zapobj, - &n) == 0) && n) { - VERIFY(nvlist_alloc(&sp_nvp, - NV_UNIQUE_NAME, KM_SLEEP) == 0); - } else { + if (dd->dd_phys->dd_deleg_zapobj == 0 || + zap_count(mos, dd->dd_phys->dd_deleg_zapobj, &n) != 0 || + n == 0) continue; - } + sp_nvp = fnvlist_alloc(); for (zap_cursor_init(&basezc, mos, dd->dd_phys->dd_deleg_zapobj); zap_cursor_retrieve(&basezc, &baseza) == 0; @@ -333,29 +349,26 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp) ASSERT(baseza.za_integer_length == 8); ASSERT(baseza.za_num_integers == 1); - VERIFY(nvlist_alloc(&perms_nvp, - NV_UNIQUE_NAME, KM_SLEEP) == 0); + perms_nvp = fnvlist_alloc(); for (zap_cursor_init(&zc, mos, baseza.za_first_integer); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { - VERIFY(nvlist_add_boolean(perms_nvp, - za.za_name) == 0); + fnvlist_add_boolean(perms_nvp, za.za_name); } zap_cursor_fini(&zc); - VERIFY(nvlist_add_nvlist(sp_nvp, baseza.za_name, - perms_nvp) == 0); - nvlist_free(perms_nvp); + fnvlist_add_nvlist(sp_nvp, baseza.za_name, perms_nvp); + fnvlist_free(perms_nvp); } zap_cursor_fini(&basezc); dsl_dir_name(dd, source); - VERIFY(nvlist_add_nvlist(*nvp, source, sp_nvp) == 0); + fnvlist_add_nvlist(*nvp, source, sp_nvp); nvlist_free(sp_nvp); } - rw_exit(&dp->dp_config_rwlock); - dsl_dir_close(startdd, FTAG); + dsl_dir_rele(startdd, FTAG); + dsl_pool_rele(dp, FTAG); return (0); } @@ -404,7 +417,7 @@ dsl_check_access(objset_t *mos, uint64_t zapobj, if (error == 0) { error = zap_lookup(mos, jumpobj, perm, 8, 1, &zero); if (error == ENOENT) - error = EPERM; + error = SET_ERROR(EPERM); } return (error); } @@ -449,7 +462,7 @@ dsl_check_user_access(objset_t *mos, uint64_t zapobj, const char *perm, return (0); } - return (EPERM); + return (SET_ERROR(EPERM)); } /* @@ -524,12 +537,10 @@ dsl_load_user_sets(objset_t *mos, uint64_t zapobj, avl_tree_t *avl, } /* - * Check if user has requested permission. If descendent is set, must have - * descendent perms. + * Check if user has requested permission. */ int -dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm, - cred_t *cr) +dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr) { dsl_dir_t *dd; dsl_pool_t *dp; @@ -544,13 +555,13 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm, mos = dp->dp_meta_objset; if (dsl_delegation_on(mos) == B_FALSE) - return (ECANCELED); + return (SET_ERROR(ECANCELED)); if (spa_version(dmu_objset_spa(dp->dp_meta_objset)) < SPA_VERSION_DELEGATED_PERMS) - return (EPERM); + return (SET_ERROR(EPERM)); - if (dsl_dataset_is_snapshot(ds) || descendent) { + if (dsl_dataset_is_snapshot(ds)) { /* * Snapshots are treated as descendents only, * local permissions do not apply. @@ -563,7 +574,7 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm, avl_create(&permsets, perm_set_compare, sizeof (perm_set_t), offsetof(perm_set_t, p_node)); - rw_enter(&dp->dp_config_rwlock, RW_READER); + ASSERT(dsl_pool_config_held(dp)); for (dd = ds->ds_dir; dd != NULL; dd = dd->dd_parent, checkflag = ZFS_DELEG_DESCENDENT) { uint64_t zapobj; @@ -622,9 +633,8 @@ again: if (error == 0) goto success; } - error = EPERM; + error = SET_ERROR(EPERM); success: - rw_exit(&dp->dp_config_rwlock); cookie = NULL; while ((setnode = avl_destroy_nodes(&permsets, &cookie)) != NULL) @@ -636,15 +646,19 @@ success: int dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr) { + dsl_pool_t *dp; dsl_dataset_t *ds; int error; - error = dsl_dataset_hold(dsname, FTAG, &ds); - if (error) + error = dsl_pool_hold(dsname, FTAG, &dp); + if (error != 0) return (error); - - error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr); - dsl_dataset_rele(ds, FTAG); + error = dsl_dataset_hold(dp, dsname, FTAG, &ds); + if (error == 0) { + error = dsl_deleg_access_impl(ds, perm, cr); + dsl_dataset_rele(ds, FTAG); + } + dsl_pool_rele(dp, FTAG); return (error); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c new file mode 100644 index 000000000..d465e7183 --- /dev/null +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c @@ -0,0 +1,926 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct dmu_snapshots_destroy_arg { + nvlist_t *dsda_snaps; + nvlist_t *dsda_successful_snaps; + boolean_t dsda_defer; + nvlist_t *dsda_errlist; +} dmu_snapshots_destroy_arg_t; + +/* + * ds must be owned. + */ +static int +dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer) +{ + if (!dsl_dataset_is_snapshot(ds)) + return (SET_ERROR(EINVAL)); + + if (dsl_dataset_long_held(ds)) + return (SET_ERROR(EBUSY)); + + /* + * Only allow deferred destroy on pools that support it. + * NOTE: deferred destroy is only supported on snapshots. + */ + if (defer) { + if (spa_version(ds->ds_dir->dd_pool->dp_spa) < + SPA_VERSION_USERREFS) + return (SET_ERROR(ENOTSUP)); + return (0); + } + + /* + * If this snapshot has an elevated user reference count, + * we can't destroy it yet. + */ + if (ds->ds_userrefs > 0) + return (SET_ERROR(EBUSY)); + + /* + * Can't delete a branch point. + */ + if (ds->ds_phys->ds_num_children > 1) + return (SET_ERROR(EEXIST)); + + return (0); +} + +static int +dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx) +{ + dmu_snapshots_destroy_arg_t *dsda = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + nvpair_t *pair; + int error = 0; + + if (!dmu_tx_is_syncing(tx)) + return (0); + + for (pair = nvlist_next_nvpair(dsda->dsda_snaps, NULL); + pair != NULL; pair = nvlist_next_nvpair(dsda->dsda_snaps, pair)) { + dsl_dataset_t *ds; + + error = dsl_dataset_hold(dp, nvpair_name(pair), + FTAG, &ds); + + /* + * If the snapshot does not exist, silently ignore it + * (it's "already destroyed"). + */ + if (error == ENOENT) + continue; + + if (error == 0) { + error = dsl_destroy_snapshot_check_impl(ds, + dsda->dsda_defer); + dsl_dataset_rele(ds, FTAG); + } + + if (error == 0) { + fnvlist_add_boolean(dsda->dsda_successful_snaps, + nvpair_name(pair)); + } else { + fnvlist_add_int32(dsda->dsda_errlist, + nvpair_name(pair), error); + } + } + + pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL); + if (pair != NULL) + return (fnvpair_value_int32(pair)); + return (0); +} + +struct process_old_arg { + dsl_dataset_t *ds; + dsl_dataset_t *ds_prev; + boolean_t after_branch_point; + zio_t *pio; + uint64_t used, comp, uncomp; +}; + +static int +process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) +{ + struct process_old_arg *poa = arg; + dsl_pool_t *dp = poa->ds->ds_dir->dd_pool; + + if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) { + dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx); + if (poa->ds_prev && !poa->after_branch_point && + bp->blk_birth > + poa->ds_prev->ds_phys->ds_prev_snap_txg) { + poa->ds_prev->ds_phys->ds_unique_bytes += + bp_get_dsize_sync(dp->dp_spa, bp); + } + } else { + poa->used += bp_get_dsize_sync(dp->dp_spa, bp); + poa->comp += BP_GET_PSIZE(bp); + poa->uncomp += BP_GET_UCSIZE(bp); + dsl_free_sync(poa->pio, dp, tx->tx_txg, bp); + } + return (0); +} + +static void +process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev, + dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx) +{ + struct process_old_arg poa = { 0 }; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + objset_t *mos = dp->dp_meta_objset; + uint64_t deadlist_obj; + + ASSERT(ds->ds_deadlist.dl_oldfmt); + ASSERT(ds_next->ds_deadlist.dl_oldfmt); + + poa.ds = ds; + poa.ds_prev = ds_prev; + poa.after_branch_point = after_branch_point; + poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); + VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj, + process_old_cb, &poa, tx)); + VERIFY0(zio_wait(poa.pio)); + ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes); + + /* change snapused */ + dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, + -poa.used, -poa.comp, -poa.uncomp, tx); + + /* swap next's deadlist to our deadlist */ + dsl_deadlist_close(&ds->ds_deadlist); + dsl_deadlist_close(&ds_next->ds_deadlist); + deadlist_obj = ds->ds_phys->ds_deadlist_obj; + ds->ds_phys->ds_deadlist_obj = ds_next->ds_phys->ds_deadlist_obj; + ds_next->ds_phys->ds_deadlist_obj = deadlist_obj; + dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); + dsl_deadlist_open(&ds_next->ds_deadlist, mos, + ds_next->ds_phys->ds_deadlist_obj); +} + +static void +dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx) +{ + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + zap_cursor_t zc; + zap_attribute_t za; + + /* + * If it is the old version, dd_clones doesn't exist so we can't + * find the clones, but dsl_deadlist_remove_key() is a no-op so it + * doesn't matter. + */ + if (ds->ds_dir->dd_phys->dd_clones == 0) + return; + + for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + dsl_dataset_t *clone; + + VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, + za.za_first_integer, FTAG, &clone)); + if (clone->ds_dir->dd_origin_txg > mintxg) { + dsl_deadlist_remove_key(&clone->ds_deadlist, + mintxg, tx); + dsl_dataset_remove_clones_key(clone, mintxg, tx); + } + dsl_dataset_rele(clone, FTAG); + } + zap_cursor_fini(&zc); +} + +void +dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx) +{ + int err; + int after_branch_point = FALSE; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + objset_t *mos = dp->dp_meta_objset; + dsl_dataset_t *ds_prev = NULL; + uint64_t obj; + + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); + ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); + ASSERT(refcount_is_zero(&ds->ds_longholds)); + + if (defer && + (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)) { + ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; + spa_history_log_internal_ds(ds, "defer_destroy", tx, ""); + return; + } + + ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); + + /* We need to log before removing it from the namespace. */ + spa_history_log_internal_ds(ds, "destroy", tx, ""); + + dsl_scan_ds_destroyed(ds, tx); + + obj = ds->ds_object; + + if (ds->ds_phys->ds_prev_snap_obj != 0) { + ASSERT3P(ds->ds_prev, ==, NULL); + VERIFY0(dsl_dataset_hold_obj(dp, + ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); + after_branch_point = + (ds_prev->ds_phys->ds_next_snap_obj != obj); + + dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); + if (after_branch_point && + ds_prev->ds_phys->ds_next_clones_obj != 0) { + dsl_dataset_remove_from_next_clones(ds_prev, obj, tx); + if (ds->ds_phys->ds_next_snap_obj != 0) { + VERIFY0(zap_add_int(mos, + ds_prev->ds_phys->ds_next_clones_obj, + ds->ds_phys->ds_next_snap_obj, tx)); + } + } + if (!after_branch_point) { + ds_prev->ds_phys->ds_next_snap_obj = + ds->ds_phys->ds_next_snap_obj; + } + } + + dsl_dataset_t *ds_next; + uint64_t old_unique; + uint64_t used = 0, comp = 0, uncomp = 0; + + VERIFY0(dsl_dataset_hold_obj(dp, + ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); + ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); + + old_unique = ds_next->ds_phys->ds_unique_bytes; + + dmu_buf_will_dirty(ds_next->ds_dbuf, tx); + ds_next->ds_phys->ds_prev_snap_obj = + ds->ds_phys->ds_prev_snap_obj; + ds_next->ds_phys->ds_prev_snap_txg = + ds->ds_phys->ds_prev_snap_txg; + ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, + ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); + + if (ds_next->ds_deadlist.dl_oldfmt) { + process_old_deadlist(ds, ds_prev, ds_next, + after_branch_point, tx); + } else { + /* Adjust prev's unique space. */ + if (ds_prev && !after_branch_point) { + dsl_deadlist_space_range(&ds_next->ds_deadlist, + ds_prev->ds_phys->ds_prev_snap_txg, + ds->ds_phys->ds_prev_snap_txg, + &used, &comp, &uncomp); + ds_prev->ds_phys->ds_unique_bytes += used; + } + + /* Adjust snapused. */ + dsl_deadlist_space_range(&ds_next->ds_deadlist, + ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, + &used, &comp, &uncomp); + dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, + -used, -comp, -uncomp, tx); + + /* Move blocks to be freed to pool's free list. */ + dsl_deadlist_move_bpobj(&ds_next->ds_deadlist, + &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg, + tx); + dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, + DD_USED_HEAD, used, comp, uncomp, tx); + + /* Merge our deadlist into next's and free it. */ + dsl_deadlist_merge(&ds_next->ds_deadlist, + ds->ds_phys->ds_deadlist_obj, tx); + } + dsl_deadlist_close(&ds->ds_deadlist); + dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_deadlist_obj = 0; + + /* Collapse range in clone heads */ + dsl_dataset_remove_clones_key(ds, + ds->ds_phys->ds_creation_txg, tx); + + if (dsl_dataset_is_snapshot(ds_next)) { + dsl_dataset_t *ds_nextnext; + + /* + * Update next's unique to include blocks which + * were previously shared by only this snapshot + * and it. Those blocks will be born after the + * prev snap and before this snap, and will have + * died after the next snap and before the one + * after that (ie. be on the snap after next's + * deadlist). + */ + VERIFY0(dsl_dataset_hold_obj(dp, + ds_next->ds_phys->ds_next_snap_obj, FTAG, &ds_nextnext)); + dsl_deadlist_space_range(&ds_nextnext->ds_deadlist, + ds->ds_phys->ds_prev_snap_txg, + ds->ds_phys->ds_creation_txg, + &used, &comp, &uncomp); + ds_next->ds_phys->ds_unique_bytes += used; + dsl_dataset_rele(ds_nextnext, FTAG); + ASSERT3P(ds_next->ds_prev, ==, NULL); + + /* Collapse range in this head. */ + dsl_dataset_t *hds; + VERIFY0(dsl_dataset_hold_obj(dp, + ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &hds)); + dsl_deadlist_remove_key(&hds->ds_deadlist, + ds->ds_phys->ds_creation_txg, tx); + dsl_dataset_rele(hds, FTAG); + + } else { + ASSERT3P(ds_next->ds_prev, ==, ds); + dsl_dataset_rele(ds_next->ds_prev, ds_next); + ds_next->ds_prev = NULL; + if (ds_prev) { + VERIFY0(dsl_dataset_hold_obj(dp, + ds->ds_phys->ds_prev_snap_obj, + ds_next, &ds_next->ds_prev)); + } + + dsl_dataset_recalc_head_uniq(ds_next); + + /* + * Reduce the amount of our unconsumed refreservation + * being charged to our parent by the amount of + * new unique data we have gained. + */ + if (old_unique < ds_next->ds_reserved) { + int64_t mrsdelta; + uint64_t new_unique = + ds_next->ds_phys->ds_unique_bytes; + + ASSERT(old_unique <= new_unique); + mrsdelta = MIN(new_unique - old_unique, + ds_next->ds_reserved - old_unique); + dsl_dir_diduse_space(ds->ds_dir, + DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); + } + } + dsl_dataset_rele(ds_next, FTAG); + + /* + * This must be done after the dsl_traverse(), because it will + * re-open the objset. + */ + if (ds->ds_objset) { + dmu_objset_evict(ds->ds_objset); + ds->ds_objset = NULL; + } + + /* remove from snapshot namespace */ + dsl_dataset_t *ds_head; + ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); + VERIFY0(dsl_dataset_hold_obj(dp, + ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); + VERIFY0(dsl_dataset_get_snapname(ds)); +#ifdef ZFS_DEBUG + { + uint64_t val; + + err = dsl_dataset_snap_lookup(ds_head, + ds->ds_snapname, &val); + ASSERT0(err); + ASSERT3U(val, ==, obj); + } +#endif + VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx)); + dsl_dataset_rele(ds_head, FTAG); + + if (ds_prev != NULL) + dsl_dataset_rele(ds_prev, FTAG); + + spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); + + if (ds->ds_phys->ds_next_clones_obj != 0) { + uint64_t count; + ASSERT0(zap_count(mos, + ds->ds_phys->ds_next_clones_obj, &count) && count == 0); + VERIFY0(dmu_object_free(mos, + ds->ds_phys->ds_next_clones_obj, tx)); + } + if (ds->ds_phys->ds_props_obj != 0) + VERIFY0(zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); + if (ds->ds_phys->ds_userrefs_obj != 0) + VERIFY0(zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx)); + dsl_dir_rele(ds->ds_dir, ds); + ds->ds_dir = NULL; + VERIFY0(dmu_object_free(mos, obj, tx)); +} + +static void +dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx) +{ + dmu_snapshots_destroy_arg_t *dsda = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + nvpair_t *pair; + + for (pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, NULL); + pair != NULL; + pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, pair)) { + dsl_dataset_t *ds; + + VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds)); + + dsl_destroy_snapshot_sync_impl(ds, dsda->dsda_defer, tx); + dsl_dataset_rele(ds, FTAG); + } +} + +/* + * The semantics of this function are described in the comment above + * lzc_destroy_snaps(). To summarize: + * + * The snapshots must all be in the same pool. + * + * Snapshots that don't exist will be silently ignored (considered to be + * "already deleted"). + * + * On success, all snaps will be destroyed and this will return 0. + * On failure, no snaps will be destroyed, the errlist will be filled in, + * and this will return an errno. + */ +int +dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer, + nvlist_t *errlist) +{ + dmu_snapshots_destroy_arg_t dsda; + int error; + nvpair_t *pair; + + pair = nvlist_next_nvpair(snaps, NULL); + if (pair == NULL) + return (0); + + dsda.dsda_snaps = snaps; + dsda.dsda_successful_snaps = fnvlist_alloc(); + dsda.dsda_defer = defer; + dsda.dsda_errlist = errlist; + + error = dsl_sync_task(nvpair_name(pair), + dsl_destroy_snapshot_check, dsl_destroy_snapshot_sync, + &dsda, 0); + fnvlist_free(dsda.dsda_successful_snaps); + + return (error); +} + +int +dsl_destroy_snapshot(const char *name, boolean_t defer) +{ + int error; + nvlist_t *nvl = fnvlist_alloc(); + nvlist_t *errlist = fnvlist_alloc(); + + fnvlist_add_boolean(nvl, name); + error = dsl_destroy_snapshots_nvl(nvl, defer, errlist); + fnvlist_free(errlist); + fnvlist_free(nvl); + return (error); +} + +struct killarg { + dsl_dataset_t *ds; + dmu_tx_t *tx; +}; + +/* ARGSUSED */ +static int +kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, + const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) +{ + struct killarg *ka = arg; + dmu_tx_t *tx = ka->tx; + + if (bp == NULL) + return (0); + + if (zb->zb_level == ZB_ZIL_LEVEL) { + ASSERT(zilog != NULL); + /* + * It's a block in the intent log. It has no + * accounting, so just free it. + */ + dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); + } else { + ASSERT(zilog == NULL); + ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); + (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE); + } + + return (0); +} + +static void +old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx) +{ + struct killarg ka; + + /* + * Free everything that we point to (that's born after + * the previous snapshot, if we are a clone) + * + * NB: this should be very quick, because we already + * freed all the objects in open context. + */ + ka.ds = ds; + ka.tx = tx; + VERIFY0(traverse_dataset(ds, + ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST, + kill_blkptr, &ka)); + ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0); +} + +typedef struct dsl_destroy_head_arg { + const char *ddha_name; +} dsl_destroy_head_arg_t; + +int +dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds) +{ + int error; + uint64_t count; + objset_t *mos; + + if (dsl_dataset_is_snapshot(ds)) + return (SET_ERROR(EINVAL)); + + if (refcount_count(&ds->ds_longholds) != expected_holds) + return (SET_ERROR(EBUSY)); + + mos = ds->ds_dir->dd_pool->dp_meta_objset; + + /* + * Can't delete a head dataset if there are snapshots of it. + * (Except if the only snapshots are from the branch we cloned + * from.) + */ + if (ds->ds_prev != NULL && + ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) + return (SET_ERROR(EBUSY)); + + /* + * Can't delete if there are children of this fs. + */ + error = zap_count(mos, + ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); + if (error != 0) + return (error); + if (count != 0) + return (SET_ERROR(EEXIST)); + + if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) && + ds->ds_prev->ds_phys->ds_num_children == 2 && + ds->ds_prev->ds_userrefs == 0) { + /* We need to remove the origin snapshot as well. */ + if (!refcount_is_zero(&ds->ds_prev->ds_longholds)) + return (SET_ERROR(EBUSY)); + } + return (0); +} + +static int +dsl_destroy_head_check(void *arg, dmu_tx_t *tx) +{ + dsl_destroy_head_arg_t *ddha = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + int error; + + error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds); + if (error != 0) + return (error); + + error = dsl_destroy_head_check_impl(ds, 0); + dsl_dataset_rele(ds, FTAG); + return (error); +} + +static void +dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx) +{ + dsl_dir_t *dd; + dsl_pool_t *dp = dmu_tx_pool(tx); + objset_t *mos = dp->dp_meta_objset; + dd_used_t t; + + ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock)); + + VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd)); + + ASSERT0(dd->dd_phys->dd_head_dataset_obj); + + /* + * Remove our reservation. The impl() routine avoids setting the + * actual property, which would require the (already destroyed) ds. + */ + dsl_dir_set_reservation_sync_impl(dd, 0, tx); + + ASSERT0(dd->dd_phys->dd_used_bytes); + ASSERT0(dd->dd_phys->dd_reserved); + for (t = 0; t < DD_USED_NUM; t++) + ASSERT0(dd->dd_phys->dd_used_breakdown[t]); + + VERIFY0(zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx)); + VERIFY0(zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx)); + VERIFY0(dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx)); + VERIFY0(zap_remove(mos, + dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx)); + + dsl_dir_rele(dd, FTAG); + VERIFY0(dmu_object_free(mos, ddobj, tx)); +} + +void +dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) +{ + dsl_pool_t *dp = dmu_tx_pool(tx); + objset_t *mos = dp->dp_meta_objset; + uint64_t obj, ddobj, prevobj = 0; + boolean_t rmorigin; + + ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); + ASSERT(ds->ds_prev == NULL || + ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); + ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); + + /* We need to log before removing it from the namespace. */ + spa_history_log_internal_ds(ds, "destroy", tx, ""); + + rmorigin = (dsl_dir_is_clone(ds->ds_dir) && + DS_IS_DEFER_DESTROY(ds->ds_prev) && + ds->ds_prev->ds_phys->ds_num_children == 2 && + ds->ds_prev->ds_userrefs == 0); + + /* Remove our reservation */ + if (ds->ds_reserved != 0) { + dsl_dataset_set_refreservation_sync_impl(ds, + (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), + 0, tx); + ASSERT0(ds->ds_reserved); + } + + dsl_scan_ds_destroyed(ds, tx); + + obj = ds->ds_object; + + if (ds->ds_phys->ds_prev_snap_obj != 0) { + /* This is a clone */ + ASSERT(ds->ds_prev != NULL); + ASSERT3U(ds->ds_prev->ds_phys->ds_next_snap_obj, !=, obj); + ASSERT0(ds->ds_phys->ds_next_snap_obj); + + dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); + if (ds->ds_prev->ds_phys->ds_next_clones_obj != 0) { + dsl_dataset_remove_from_next_clones(ds->ds_prev, + obj, tx); + } + + ASSERT3U(ds->ds_prev->ds_phys->ds_num_children, >, 1); + ds->ds_prev->ds_phys->ds_num_children--; + } + + zfeature_info_t *async_destroy = + &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]; + objset_t *os; + + /* + * Destroy the deadlist. Unless it's a clone, the + * deadlist should be empty. (If it's a clone, it's + * safe to ignore the deadlist contents.) + */ + dsl_deadlist_close(&ds->ds_deadlist); + dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_deadlist_obj = 0; + + VERIFY0(dmu_objset_from_ds(ds, &os)); + + if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) { + old_synchronous_dataset_destroy(ds, tx); + } else { + /* + * Move the bptree into the pool's list of trees to + * clean up and update space accounting information. + */ + uint64_t used, comp, uncomp; + + zil_destroy_sync(dmu_objset_zil(os), tx); + + if (!spa_feature_is_active(dp->dp_spa, async_destroy)) { + spa_feature_incr(dp->dp_spa, async_destroy, tx); + dp->dp_bptree_obj = bptree_alloc(mos, tx); + VERIFY0(zap_add(mos, + DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, + &dp->dp_bptree_obj, tx)); + } + + used = ds->ds_dir->dd_phys->dd_used_bytes; + comp = ds->ds_dir->dd_phys->dd_compressed_bytes; + uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes; + + ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || + ds->ds_phys->ds_unique_bytes == used); + + bptree_add(mos, dp->dp_bptree_obj, + &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg, + used, comp, uncomp, tx); + dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, + -used, -comp, -uncomp, tx); + dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD, + used, comp, uncomp, tx); + } + + if (ds->ds_prev != NULL) { + if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { + VERIFY0(zap_remove_int(mos, + ds->ds_prev->ds_dir->dd_phys->dd_clones, + ds->ds_object, tx)); + } + prevobj = ds->ds_prev->ds_object; + dsl_dataset_rele(ds->ds_prev, ds); + ds->ds_prev = NULL; + } + + /* + * This must be done after the dsl_traverse(), because it will + * re-open the objset. + */ + if (ds->ds_objset) { + dmu_objset_evict(ds->ds_objset); + ds->ds_objset = NULL; + } + + /* Erase the link in the dir */ + dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); + ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; + ddobj = ds->ds_dir->dd_object; + ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); + VERIFY0(zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx)); + + spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); + + ASSERT0(ds->ds_phys->ds_next_clones_obj); + ASSERT0(ds->ds_phys->ds_props_obj); + ASSERT0(ds->ds_phys->ds_userrefs_obj); + dsl_dir_rele(ds->ds_dir, ds); + ds->ds_dir = NULL; + VERIFY0(dmu_object_free(mos, obj, tx)); + + dsl_dir_destroy_sync(ddobj, tx); + + if (rmorigin) { + dsl_dataset_t *prev; + VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev)); + dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx); + dsl_dataset_rele(prev, FTAG); + } +} + +static void +dsl_destroy_head_sync(void *arg, dmu_tx_t *tx) +{ + dsl_destroy_head_arg_t *ddha = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + + VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds)); + dsl_destroy_head_sync_impl(ds, tx); + dsl_dataset_rele(ds, FTAG); +} + +static void +dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx) +{ + dsl_destroy_head_arg_t *ddha = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + + VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds)); + + /* Mark it as inconsistent on-disk, in case we crash */ + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; + + spa_history_log_internal_ds(ds, "destroy begin", tx, ""); + dsl_dataset_rele(ds, FTAG); +} + +int +dsl_destroy_head(const char *name) +{ + dsl_destroy_head_arg_t ddha; + int error; + spa_t *spa; + boolean_t isenabled; + +#ifdef _KERNEL + zfs_destroy_unmount_origin(name); +#endif + + error = spa_open(name, &spa, FTAG); + if (error != 0) + return (error); + isenabled = spa_feature_is_enabled(spa, + &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]); + spa_close(spa, FTAG); + + ddha.ddha_name = name; + + if (!isenabled) { + objset_t *os; + + error = dsl_sync_task(name, dsl_destroy_head_check, + dsl_destroy_head_begin_sync, &ddha, 0); + if (error != 0) + return (error); + + /* + * Head deletion is processed in one txg on old pools; + * remove the objects from open context so that the txg sync + * is not too long. + */ + error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os); + if (error == 0) { + uint64_t prev_snap_txg = + dmu_objset_ds(os)->ds_phys->ds_prev_snap_txg; + for (uint64_t obj = 0; error == 0; + error = dmu_object_next(os, &obj, FALSE, + prev_snap_txg)) + (void) dmu_free_object(os, obj); + /* sync out all frees */ + txg_wait_synced(dmu_objset_pool(os), 0); + dmu_objset_disown(os, FTAG); + } + } + + return (dsl_sync_task(name, dsl_destroy_head_check, + dsl_destroy_head_sync, &ddha, 0)); +} + +/* + * Note, this function is used as the callback for dmu_objset_find(). We + * always return 0 so that we will continue to find and process + * inconsistent datasets, even if we encounter an error trying to + * process one of them. + */ +/* ARGSUSED */ +int +dsl_destroy_inconsistent(const char *dsname, void *arg) +{ + objset_t *os; + + if (dmu_objset_hold(dsname, FTAG, &os) == 0) { + boolean_t inconsistent = DS_IS_INCONSISTENT(dmu_objset_ds(os)); + dmu_objset_rele(os, FTAG); + if (inconsistent) + (void) dsl_destroy_head(dsname); + } + return (0); +} diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c index 79e5121ac..91351df17 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c @@ -22,6 +22,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Pawel Jakub Dawidek . * All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -45,8 +46,6 @@ #include "zfs_namecheck.h" static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd); -static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx); - /* ARGSUSED */ static void @@ -63,7 +62,7 @@ dsl_dir_evict(dmu_buf_t *db, void *arg) } if (dd->dd_parent) - dsl_dir_close(dd->dd_parent, dd); + dsl_dir_rele(dd->dd_parent, dd); spa_close(dd->dd_pool->dp_spa, dd); @@ -77,18 +76,17 @@ dsl_dir_evict(dmu_buf_t *db, void *arg) } int -dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, +dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, const char *tail, void *tag, dsl_dir_t **ddp) { dmu_buf_t *dbuf; dsl_dir_t *dd; int err; - ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || - dsl_pool_sync_context(dp)); + ASSERT(dsl_pool_config_held(dp)); err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf); - if (err) + if (err != 0) return (err); dd = dmu_buf_get_user(dbuf); #ifdef ZFS_DEBUG @@ -115,9 +113,9 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, dsl_dir_snap_cmtime_update(dd); if (dd->dd_phys->dd_parent_obj) { - err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj, + err = dsl_dir_hold_obj(dp, dd->dd_phys->dd_parent_obj, NULL, dd, &dd->dd_parent); - if (err) + if (err != 0) goto errout; if (tail) { #ifdef ZFS_DEBUG @@ -134,7 +132,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, dd->dd_parent->dd_phys->dd_child_dir_zapobj, ddobj, 0, dd->dd_myname); } - if (err) + if (err != 0) goto errout; } else { (void) strcpy(dd->dd_myname, spa_name(dp->dp_spa)); @@ -151,7 +149,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, */ err = dmu_bonus_hold(dp->dp_meta_objset, dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus); - if (err) + if (err != 0) goto errout; origin_phys = origin_bonus->db_data; dd->dd_origin_txg = @@ -163,7 +161,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, dsl_dir_evict); if (winner) { if (dd->dd_parent) - dsl_dir_close(dd->dd_parent, dd); + dsl_dir_rele(dd->dd_parent, dd); mutex_destroy(&dd->dd_lock); kmem_free(dd, sizeof (dsl_dir_t)); dd = winner; @@ -190,7 +188,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, errout: if (dd->dd_parent) - dsl_dir_close(dd->dd_parent, dd); + dsl_dir_rele(dd->dd_parent, dd); mutex_destroy(&dd->dd_lock); kmem_free(dd, sizeof (dsl_dir_t)); dmu_buf_rele(dbuf, tag); @@ -198,7 +196,7 @@ errout: } void -dsl_dir_close(dsl_dir_t *dd, void *tag) +dsl_dir_rele(dsl_dir_t *dd, void *tag) { dprintf_dd(dd, "%s\n", ""); spa_close(dd->dd_pool->dp_spa, tag); @@ -255,13 +253,14 @@ static int getcomponent(const char *path, char *component, const char **nextp) { char *p; + if ((path == NULL) || (path[0] == '\0')) - return (ENOENT); + return (SET_ERROR(ENOENT)); /* This would be a good place to reserve some namespace... */ p = strpbrk(path, "/@"); if (p && (p[1] == '/' || p[1] == '@')) { /* two separators in a row */ - return (EINVAL); + return (SET_ERROR(EINVAL)); } if (p == NULL || p == path) { /* @@ -271,16 +270,16 @@ getcomponent(const char *path, char *component, const char **nextp) */ if (p != NULL && (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0')) - return (EINVAL); + return (SET_ERROR(EINVAL)); if (strlen(path) >= MAXNAMELEN) - return (ENAMETOOLONG); + return (SET_ERROR(ENAMETOOLONG)); (void) strcpy(component, path); p = NULL; } else if (p[0] == '/') { - if (p-path >= MAXNAMELEN) - return (ENAMETOOLONG); + if (p - path >= MAXNAMELEN) + return (SET_ERROR(ENAMETOOLONG)); (void) strncpy(component, path, p - path); - component[p-path] = '\0'; + component[p - path] = '\0'; p++; } else if (p[0] == '@') { /* @@ -288,66 +287,55 @@ getcomponent(const char *path, char *component, const char **nextp) * any more slashes. */ if (strchr(path, '/')) - return (EINVAL); - if (p-path >= MAXNAMELEN) - return (ENAMETOOLONG); + return (SET_ERROR(EINVAL)); + if (p - path >= MAXNAMELEN) + return (SET_ERROR(ENAMETOOLONG)); (void) strncpy(component, path, p - path); - component[p-path] = '\0'; + component[p - path] = '\0'; } else { - ASSERT(!"invalid p"); + panic("invalid p=%p", (void *)p); } *nextp = p; return (0); } /* - * same as dsl_open_dir, ignore the first component of name and use the - * spa instead + * Return the dsl_dir_t, and possibly the last component which couldn't + * be found in *tail. The name must be in the specified dsl_pool_t. This + * thread must hold the dp_config_rwlock for the pool. Returns NULL if the + * path is bogus, or if tail==NULL and we couldn't parse the whole name. + * (*tail)[0] == '@' means that the last component is a snapshot. */ int -dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, +dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag, dsl_dir_t **ddp, const char **tailp) { char buf[MAXNAMELEN]; - const char *next, *nextnext = NULL; + const char *spaname, *next, *nextnext = NULL; int err; dsl_dir_t *dd; - dsl_pool_t *dp; uint64_t ddobj; - int openedspa = FALSE; - - dprintf("%s\n", name); err = getcomponent(name, buf, &next); - if (err) + if (err != 0) return (err); - if (spa == NULL) { - err = spa_open(buf, &spa, FTAG); - if (err) { - dprintf("spa_open(%s) failed\n", buf); - return (err); - } - openedspa = TRUE; - /* XXX this assertion belongs in spa_open */ - ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa))); - } + /* Make sure the name is in the specified pool. */ + spaname = spa_name(dp->dp_spa); + if (strcmp(buf, spaname) != 0) + return (SET_ERROR(EINVAL)); - dp = spa_get_dsl(spa); + ASSERT(dsl_pool_config_held(dp)); - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd); - if (err) { - rw_exit(&dp->dp_config_rwlock); - if (openedspa) - spa_close(spa, FTAG); + err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd); + if (err != 0) { return (err); } while (next != NULL) { dsl_dir_t *child_ds; err = getcomponent(next, buf, &nextnext); - if (err) + if (err != 0) break; ASSERT(next[0] != '\0'); if (next[0] == '@') @@ -358,25 +346,22 @@ dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, err = zap_lookup(dp->dp_meta_objset, dd->dd_phys->dd_child_dir_zapobj, buf, sizeof (ddobj), 1, &ddobj); - if (err) { + if (err != 0) { if (err == ENOENT) err = 0; break; } - err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds); - if (err) + err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_ds); + if (err != 0) break; - dsl_dir_close(dd, tag); + dsl_dir_rele(dd, tag); dd = child_ds; next = nextnext; } - rw_exit(&dp->dp_config_rwlock); - if (err) { - dsl_dir_close(dd, tag); - if (openedspa) - spa_close(spa, FTAG); + if (err != 0) { + dsl_dir_rele(dd, tag); return (err); } @@ -387,30 +372,16 @@ dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, if (next != NULL && (tailp == NULL || (nextnext && nextnext[0] != '\0'))) { /* bad path name */ - dsl_dir_close(dd, tag); + dsl_dir_rele(dd, tag); dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp); - err = ENOENT; + err = SET_ERROR(ENOENT); } - if (tailp) + if (tailp != NULL) *tailp = next; - if (openedspa) - spa_close(spa, FTAG); *ddp = dd; return (err); } -/* - * Return the dsl_dir_t, and possibly the last component which couldn't - * be found in *tail. Return NULL if the path is bogus, or if - * tail==NULL and we couldn't parse the whole name. (*tail)[0] == '@' - * means that the last component is a snapshot. - */ -int -dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp) -{ - return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp)); -} - uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, dmu_tx_t *tx) @@ -448,77 +419,6 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, return (ddobj); } -/* ARGSUSED */ -int -dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) -{ - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; - dsl_pool_t *dp = dd->dd_pool; - objset_t *mos = dp->dp_meta_objset; - int err; - uint64_t count; - - /* - * There should be exactly two holds, both from - * dsl_dataset_destroy: one on the dd directory, and one on its - * head ds. If there are more holds, then a concurrent thread is - * performing a lookup inside this dir while we're trying to destroy - * it. To minimize this possibility, we perform this check only - * in syncing context and fail the operation if we encounter - * additional holds. The dp_config_rwlock ensures that nobody else - * opens it after we check. - */ - if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 2) - return (EBUSY); - - err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count); - if (err) - return (err); - if (count != 0) - return (EEXIST); - - return (0); -} - -void -dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) -{ - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; - objset_t *mos = dd->dd_pool->dp_meta_objset; - dsl_prop_setarg_t psa; - uint64_t value = 0; - uint64_t obj; - dd_used_t t; - - ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock)); - ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); - - /* Remove our reservation. */ - dsl_prop_setarg_init_uint64(&psa, "reservation", - (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), - &value); - psa.psa_effective_value = 0; /* predict default value */ - - dsl_dir_set_reservation_sync(ds, &psa, tx); - - ASSERT0(dd->dd_phys->dd_used_bytes); - ASSERT0(dd->dd_phys->dd_reserved); - for (t = 0; t < DD_USED_NUM; t++) - ASSERT0(dd->dd_phys->dd_used_breakdown[t]); - - VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx)); - VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx)); - VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx)); - VERIFY(0 == zap_remove(mos, - dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx)); - - obj = dd->dd_object; - dsl_dir_close(dd, tag); - VERIFY(0 == dmu_object_free(mos, obj, tx)); -} - boolean_t dsl_dir_is_clone(dsl_dir_t *dd) { @@ -556,18 +456,16 @@ dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv) } mutex_exit(&dd->dd_lock); - rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); if (dsl_dir_is_clone(dd)) { dsl_dataset_t *ds; char buf[MAXNAMELEN]; - VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, + VERIFY0(dsl_dataset_hold_obj(dd->dd_pool, dd->dd_phys->dd_origin_obj, FTAG, &ds)); dsl_dataset_name(ds, buf); dsl_dataset_rele(ds, FTAG); dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf); } - rw_exit(&dd->dd_pool->dp_config_rwlock); } void @@ -577,7 +475,7 @@ dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx) ASSERT(dd->dd_phys); - if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) { + if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg)) { /* up the hold count until we can be written out */ dmu_buf_add_ref(dd->dd_dbuf, dd); } @@ -785,7 +683,7 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree, used_on_disk>>10, est_inflight>>10, quota>>10, asize>>10, retval); mutex_exit(&dd->dd_lock); - return (retval); + return (SET_ERROR(retval)); } /* We need to up our estimated delta before dropping dd_lock */ @@ -847,7 +745,7 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize, } else { if (err == EAGAIN) { txg_delay(dd->dd_pool, tx->tx_txg, 1); - err = ERESTART; + err = SET_ERROR(ERESTART); } dsl_pool_memory_pressure(dd->dd_pool); } @@ -864,7 +762,7 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize, FALSE, asize > usize, tr_list, tx, TRUE); } - if (err) + if (err != 0) dsl_dir_tempreserve_clear(tr_list, tx); else *tr_cookiep = tr_list; @@ -1015,115 +913,123 @@ dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta, mutex_exit(&dd->dd_lock); } +typedef struct dsl_dir_set_qr_arg { + const char *ddsqra_name; + zprop_source_t ddsqra_source; + uint64_t ddsqra_value; +} dsl_dir_set_qr_arg_t; + static int -dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dir_set_quota_check(void *arg, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; - dsl_prop_setarg_t *psa = arg2; - int err; - uint64_t towrite; + dsl_dir_set_qr_arg_t *ddsqra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + int error; + uint64_t towrite, newval; - if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) - return (err); + error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); + if (error != 0) + return (error); + + error = dsl_prop_predict(ds->ds_dir, "quota", + ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); + if (error != 0) { + dsl_dataset_rele(ds, FTAG); + return (error); + } - if (psa->psa_effective_value == 0) + if (newval == 0) { + dsl_dataset_rele(ds, FTAG); return (0); + } - mutex_enter(&dd->dd_lock); + mutex_enter(&ds->ds_dir->dd_lock); /* * If we are doing the preliminary check in open context, and * there are pending changes, then don't fail it, since the * pending changes could under-estimate the amount of space to be * freed up. */ - towrite = dsl_dir_space_towrite(dd); + towrite = dsl_dir_space_towrite(ds->ds_dir); if ((dmu_tx_is_syncing(tx) || towrite == 0) && - (psa->psa_effective_value < dd->dd_phys->dd_reserved || - psa->psa_effective_value < dd->dd_phys->dd_used_bytes + towrite)) { - err = ENOSPC; + (newval < ds->ds_dir->dd_phys->dd_reserved || + newval < ds->ds_dir->dd_phys->dd_used_bytes + towrite)) { + error = SET_ERROR(ENOSPC); } - mutex_exit(&dd->dd_lock); - return (err); + mutex_exit(&ds->ds_dir->dd_lock); + dsl_dataset_rele(ds, FTAG); + return (error); } -extern dsl_syncfunc_t dsl_prop_set_sync; - static void -dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dir_set_quota_sync(void *arg, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; - dsl_prop_setarg_t *psa = arg2; - uint64_t effective_value = psa->psa_effective_value; + dsl_dir_set_qr_arg_t *ddsqra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + uint64_t newval; - dsl_prop_set_sync(ds, psa, tx); - DSL_PROP_CHECK_PREDICTION(dd, psa); + VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); - dmu_buf_will_dirty(dd->dd_dbuf, tx); + dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_QUOTA), + ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, + &ddsqra->ddsqra_value, tx); - mutex_enter(&dd->dd_lock); - dd->dd_phys->dd_quota = effective_value; - mutex_exit(&dd->dd_lock); + VERIFY0(dsl_prop_get_int_ds(ds, + zfs_prop_to_name(ZFS_PROP_QUOTA), &newval)); + + dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); + mutex_enter(&ds->ds_dir->dd_lock); + ds->ds_dir->dd_phys->dd_quota = newval; + mutex_exit(&ds->ds_dir->dd_lock); + dsl_dataset_rele(ds, FTAG); } int dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota) { - dsl_dir_t *dd; - dsl_dataset_t *ds; - dsl_prop_setarg_t psa; - int err; - - dsl_prop_setarg_init_uint64(&psa, "quota", source, "a); - - err = dsl_dataset_hold(ddname, FTAG, &ds); - if (err) - return (err); + dsl_dir_set_qr_arg_t ddsqra; - err = dsl_dir_open(ddname, FTAG, &dd, NULL); - if (err) { - dsl_dataset_rele(ds, FTAG); - return (err); - } - - ASSERT(ds->ds_dir == dd); - - /* - * If someone removes a file, then tries to set the quota, we want to - * make sure the file freeing takes effect. - */ - txg_wait_open(dd->dd_pool, 0); - - err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check, - dsl_dir_set_quota_sync, ds, &psa, 0); + ddsqra.ddsqra_name = ddname; + ddsqra.ddsqra_source = source; + ddsqra.ddsqra_value = quota; - dsl_dir_close(dd, FTAG); - dsl_dataset_rele(ds, FTAG); - return (err); + return (dsl_sync_task(ddname, dsl_dir_set_quota_check, + dsl_dir_set_quota_sync, &ddsqra, 0)); } int -dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dir_set_reservation_check(void *arg, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; - dsl_prop_setarg_t *psa = arg2; - uint64_t effective_value; - uint64_t used, avail; - int err; - - if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) - return (err); + dsl_dir_set_qr_arg_t *ddsqra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + dsl_dir_t *dd; + uint64_t newval, used, avail; + int error; - effective_value = psa->psa_effective_value; + error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); + if (error != 0) + return (error); + dd = ds->ds_dir; /* * If we are doing the preliminary check in open context, the * space estimates may be inaccurate. */ - if (!dmu_tx_is_syncing(tx)) + if (!dmu_tx_is_syncing(tx)) { + dsl_dataset_rele(ds, FTAG); return (0); + } + + error = dsl_prop_predict(ds->ds_dir, + zfs_prop_to_name(ZFS_PROP_RESERVATION), + ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); + if (error != 0) { + dsl_dataset_rele(ds, FTAG); + return (error); + } mutex_enter(&dd->dd_lock); used = dd->dd_phys->dd_used_bytes; @@ -1136,40 +1042,32 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used; } - if (MAX(used, effective_value) > MAX(used, dd->dd_phys->dd_reserved)) { - uint64_t delta = MAX(used, effective_value) - + if (MAX(used, newval) > MAX(used, dd->dd_phys->dd_reserved)) { + uint64_t delta = MAX(used, newval) - MAX(used, dd->dd_phys->dd_reserved); - if (delta > avail) - return (ENOSPC); - if (dd->dd_phys->dd_quota > 0 && - effective_value > dd->dd_phys->dd_quota) - return (ENOSPC); + if (delta > avail || + (dd->dd_phys->dd_quota > 0 && + newval > dd->dd_phys->dd_quota)) + error = SET_ERROR(ENOSPC); } - return (0); + dsl_dataset_rele(ds, FTAG); + return (error); } -static void -dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) +void +dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; - dsl_prop_setarg_t *psa = arg2; - uint64_t effective_value = psa->psa_effective_value; uint64_t used; int64_t delta; - dsl_prop_set_sync(ds, psa, tx); - DSL_PROP_CHECK_PREDICTION(dd, psa); - dmu_buf_will_dirty(dd->dd_dbuf, tx); mutex_enter(&dd->dd_lock); used = dd->dd_phys->dd_used_bytes; - delta = MAX(used, effective_value) - - MAX(used, dd->dd_phys->dd_reserved); - dd->dd_phys->dd_reserved = effective_value; + delta = MAX(used, value) - MAX(used, dd->dd_phys->dd_reserved); + dd->dd_phys->dd_reserved = value; if (dd->dd_parent != NULL) { /* Roll up this additional usage into our ancestors */ @@ -1179,35 +1077,39 @@ dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) mutex_exit(&dd->dd_lock); } -int -dsl_dir_set_reservation(const char *ddname, zprop_source_t source, - uint64_t reservation) +static void +dsl_dir_set_reservation_sync(void *arg, dmu_tx_t *tx) { - dsl_dir_t *dd; + dsl_dir_set_qr_arg_t *ddsqra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; - dsl_prop_setarg_t psa; - int err; + uint64_t newval; - dsl_prop_setarg_init_uint64(&psa, "reservation", source, &reservation); + VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); - err = dsl_dataset_hold(ddname, FTAG, &ds); - if (err) - return (err); + dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_RESERVATION), + ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, + &ddsqra->ddsqra_value, tx); - err = dsl_dir_open(ddname, FTAG, &dd, NULL); - if (err) { - dsl_dataset_rele(ds, FTAG); - return (err); - } + VERIFY0(dsl_prop_get_int_ds(ds, + zfs_prop_to_name(ZFS_PROP_RESERVATION), &newval)); + + dsl_dir_set_reservation_sync_impl(ds->ds_dir, newval, tx); + dsl_dataset_rele(ds, FTAG); +} - ASSERT(ds->ds_dir == dd); +int +dsl_dir_set_reservation(const char *ddname, zprop_source_t source, + uint64_t reservation) +{ + dsl_dir_set_qr_arg_t ddsqra; - err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check, - dsl_dir_set_reservation_sync, ds, &psa, 0); + ddsqra.ddsqra_name = ddname; + ddsqra.ddsqra_source = source; + ddsqra.ddsqra_value = reservation; - dsl_dir_close(dd, FTAG); - dsl_dataset_rele(ds, FTAG); - return (err); + return (dsl_sync_task(ddname, dsl_dir_set_reservation_check, + dsl_dir_set_reservation_sync, &ddsqra, 0)); } static dsl_dir_t * @@ -1239,78 +1141,125 @@ would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor) return (would_change(dd->dd_parent, delta, ancestor)); } -struct renamearg { - dsl_dir_t *newparent; - const char *mynewname; - boolean_t allowmounted; -}; +typedef struct dsl_dir_rename_arg { + const char *ddra_oldname; + const char *ddra_newname; +} dsl_dir_rename_arg_t; +/* ARGSUSED */ static int -dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) { - dsl_dir_t *dd = arg1; - struct renamearg *ra = arg2; - dsl_pool_t *dp = dd->dd_pool; - objset_t *mos = dp->dp_meta_objset; - int err; - uint64_t val; + int *deltap = arg; + char namebuf[MAXNAMELEN]; - /* - * There should only be one reference, from dmu_objset_rename(). - * Fleeting holds are also possible (eg, from "zfs list" getting - * stats), but any that are present in open context will likely - * be gone by syncing context, so only fail from syncing - * context. - * Don't check if we allow renaming of busy (mounted) dataset. - */ - if (!ra->allowmounted && dmu_tx_is_syncing(tx) && - dmu_buf_refcount(dd->dd_dbuf) > 1) { - return (EBUSY); + dsl_dataset_name(ds, namebuf); + + if (strlen(namebuf) + *deltap >= MAXNAMELEN) + return (SET_ERROR(ENAMETOOLONG)); + return (0); +} + +static int +dsl_dir_rename_check(void *arg, dmu_tx_t *tx) +{ + dsl_dir_rename_arg_t *ddra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dir_t *dd, *newparent; + const char *mynewname; + int error; + int delta = strlen(ddra->ddra_newname) - strlen(ddra->ddra_oldname); + + /* target dir should exist */ + error = dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL); + if (error != 0) + return (error); + + /* new parent should exist */ + error = dsl_dir_hold(dp, ddra->ddra_newname, FTAG, + &newparent, &mynewname); + if (error != 0) { + dsl_dir_rele(dd, FTAG); + return (error); } - /* check for existing name */ - err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj, - ra->mynewname, 8, 1, &val); - if (err == 0) - return (EEXIST); - if (err != ENOENT) - return (err); + /* can't rename to different pool */ + if (dd->dd_pool != newparent->dd_pool) { + dsl_dir_rele(newparent, FTAG); + dsl_dir_rele(dd, FTAG); + return (SET_ERROR(ENXIO)); + } - if (ra->newparent != dd->dd_parent) { + /* new name should not already exist */ + if (mynewname == NULL) { + dsl_dir_rele(newparent, FTAG); + dsl_dir_rele(dd, FTAG); + return (SET_ERROR(EEXIST)); + } + + /* if the name length is growing, validate child name lengths */ + if (delta > 0) { + error = dmu_objset_find_dp(dp, dd->dd_object, dsl_valid_rename, + &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); + if (error != 0) { + dsl_dir_rele(newparent, FTAG); + dsl_dir_rele(dd, FTAG); + return (error); + } + } + + if (newparent != dd->dd_parent) { /* is there enough space? */ uint64_t myspace = MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved); /* no rename into our descendant */ - if (closest_common_ancestor(dd, ra->newparent) == dd) - return (EINVAL); + if (closest_common_ancestor(dd, newparent) == dd) { + dsl_dir_rele(newparent, FTAG); + dsl_dir_rele(dd, FTAG); + return (SET_ERROR(EINVAL)); + } - if (err = dsl_dir_transfer_possible(dd->dd_parent, - ra->newparent, myspace)) - return (err); + error = dsl_dir_transfer_possible(dd->dd_parent, + newparent, myspace); + if (error != 0) { + dsl_dir_rele(newparent, FTAG); + dsl_dir_rele(dd, FTAG); + return (error); + } } + dsl_dir_rele(newparent, FTAG); + dsl_dir_rele(dd, FTAG); return (0); } static void -dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dir_rename_sync(void *arg, dmu_tx_t *tx) { - char oldname[MAXPATHLEN], newname[MAXPATHLEN]; - dsl_dir_t *dd = arg1; - struct renamearg *ra = arg2; - dsl_pool_t *dp = dd->dd_pool; + dsl_dir_rename_arg_t *ddra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dir_t *dd, *newparent; + const char *mynewname; + int error; objset_t *mos = dp->dp_meta_objset; - int err; - ASSERT(ra->allowmounted || dmu_buf_refcount(dd->dd_dbuf) <= 2); + ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2); + + VERIFY0(dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL)); + VERIFY0(dsl_dir_hold(dp, ddra->ddra_newname, FTAG, &newparent, + &mynewname)); - if (ra->newparent != dd->dd_parent) { + /* Log this before we change the name. */ + spa_history_log_internal_dd(dd, "rename", tx, + "-> %s", ddra->ddra_newname); + + if (newparent != dd->dd_parent) { dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, -dd->dd_phys->dd_used_bytes, -dd->dd_phys->dd_compressed_bytes, -dd->dd_phys->dd_uncompressed_bytes, tx); - dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD, + dsl_dir_diduse_space(newparent, DD_USED_CHILD, dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_compressed_bytes, dd->dd_phys->dd_uncompressed_bytes, tx); @@ -1321,7 +1270,7 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV, -unused_rsrv, 0, 0, tx); - dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV, + dsl_dir_diduse_space(newparent, DD_USED_CHILD_RSRV, unused_rsrv, 0, 0, tx); } } @@ -1329,62 +1278,43 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) dmu_buf_will_dirty(dd->dd_dbuf, tx); /* remove from old parent zapobj */ - dsl_dir_name(dd, oldname); - err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj, + error = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx); - ASSERT0(err); + ASSERT0(error); - (void) strcpy(dd->dd_myname, ra->mynewname); - dsl_dir_close(dd->dd_parent, dd); - dd->dd_phys->dd_parent_obj = ra->newparent->dd_object; - VERIFY(0 == dsl_dir_open_obj(dd->dd_pool, - ra->newparent->dd_object, NULL, dd, &dd->dd_parent)); + (void) strcpy(dd->dd_myname, mynewname); + dsl_dir_rele(dd->dd_parent, dd); + dd->dd_phys->dd_parent_obj = newparent->dd_object; + VERIFY0(dsl_dir_hold_obj(dp, + newparent->dd_object, NULL, dd, &dd->dd_parent)); /* add to new parent zapobj */ - err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj, - dd->dd_myname, 8, 1, &dd->dd_object, tx); - ASSERT0(err); - dsl_dir_name(dd, newname); + VERIFY0(zap_add(mos, newparent->dd_phys->dd_child_dir_zapobj, + dd->dd_myname, 8, 1, &dd->dd_object, tx)); + +#ifdef __FreeBSD__ #ifdef _KERNEL - zfsvfs_update_fromname(oldname, newname); - zvol_rename_minors(oldname, newname); + zfsvfs_update_fromname(ddra->ddra_oldname, ddra->ddra_newname); + zvol_rename_minors(ddra->ddra_oldname, ddra->ddra_newname); #endif +#endif + + dsl_prop_notify_all(dd); - spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, - tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj); + dsl_dir_rele(newparent, FTAG); + dsl_dir_rele(dd, FTAG); } int -dsl_dir_rename(dsl_dir_t *dd, const char *newname, int flags) +dsl_dir_rename(const char *oldname, const char *newname) { - struct renamearg ra; - int err; + dsl_dir_rename_arg_t ddra; - /* new parent should exist */ - err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname); - if (err) - return (err); + ddra.ddra_oldname = oldname; + ddra.ddra_newname = newname; - /* can't rename to different pool */ - if (dd->dd_pool != ra.newparent->dd_pool) { - err = ENXIO; - goto out; - } - - /* new name should not already exist */ - if (ra.mynewname == NULL) { - err = EEXIST; - goto out; - } - - ra.allowmounted = !!(flags & ZFS_RENAME_ALLOW_MOUNTED); - - err = dsl_sync_task_do(dd->dd_pool, - dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3); - -out: - dsl_dir_close(ra.newparent, FTAG); - return (err); + return (dsl_sync_task(oldname, + dsl_dir_rename_check, dsl_dir_rename_sync, &ddra, 3)); } int @@ -1398,7 +1328,7 @@ dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space) adelta = would_change(sdd, -space, ancestor); avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE); if (avail < space) - return (ENOSPC); + return (SET_ERROR(ENOSPC)); return (0); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c index 73c43dab8..df016d6b4 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -43,6 +43,7 @@ #include #include #include +#include int zfs_no_write_throttle = 0; int zfs_write_limit_shift = 3; /* 1/8th of physical memory */ @@ -94,7 +95,7 @@ dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp) if (err) return (err); - return (dsl_dir_open_obj(dp, obj, name, dp, ddp)); + return (dsl_dir_hold_obj(dp, obj, name, dp, ddp)); } static dsl_pool_t * @@ -106,7 +107,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg) dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP); dp->dp_spa = spa; dp->dp_meta_rootbp = *bp; - rw_init(&dp->dp_config_rwlock, NULL, RW_DEFAULT, NULL); + rrw_init(&dp->dp_config_rwlock, B_TRUE); dp->dp_write_limit = zfs_write_limit_min; txg_init(dp, txg); @@ -117,7 +118,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg) txg_list_create(&dp->dp_dirty_dirs, offsetof(dsl_dir_t, dd_dirty_link)); txg_list_create(&dp->dp_sync_tasks, - offsetof(dsl_sync_task_group_t, dstg_node)); + offsetof(dsl_sync_task_t, dst_node)); mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL); @@ -151,14 +152,14 @@ dsl_pool_open(dsl_pool_t *dp) dsl_dataset_t *ds; uint64_t obj; - rw_enter(&dp->dp_config_rwlock, RW_WRITER); + rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &dp->dp_root_dir_obj); if (err) goto out; - err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, + err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, dp, &dp->dp_root_dir); if (err) goto out; @@ -179,7 +180,7 @@ dsl_pool_open(dsl_pool_t *dp) &dp->dp_origin_snap); dsl_dataset_rele(ds, FTAG); } - dsl_dir_close(dd, dp); + dsl_dir_rele(dd, dp); if (err) goto out; } @@ -194,7 +195,7 @@ dsl_pool_open(dsl_pool_t *dp) DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj); if (err) goto out; - VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj, + VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } @@ -227,7 +228,7 @@ dsl_pool_open(dsl_pool_t *dp) err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg); out: - rw_exit(&dp->dp_config_rwlock); + rrw_exit(&dp->dp_config_rwlock, FTAG); return (err); } @@ -242,13 +243,13 @@ dsl_pool_close(dsl_pool_t *dp) * and not a hold, so just drop that here. */ if (dp->dp_origin_snap) - dsl_dataset_drop_ref(dp->dp_origin_snap, dp); + dsl_dataset_rele(dp->dp_origin_snap, dp); if (dp->dp_mos_dir) - dsl_dir_close(dp->dp_mos_dir, dp); + dsl_dir_rele(dp->dp_mos_dir, dp); if (dp->dp_free_dir) - dsl_dir_close(dp->dp_free_dir, dp); + dsl_dir_rele(dp->dp_free_dir, dp); if (dp->dp_root_dir) - dsl_dir_close(dp->dp_root_dir, dp); + dsl_dir_rele(dp->dp_root_dir, dp); bpobj_close(&dp->dp_free_bpobj); @@ -264,7 +265,7 @@ dsl_pool_close(dsl_pool_t *dp) arc_flush(dp->dp_spa); txg_fini(dp); dsl_scan_fini(dp); - rw_destroy(&dp->dp_config_rwlock); + rrw_destroy(&dp->dp_config_rwlock); mutex_destroy(&dp->dp_lock); taskq_destroy(dp->dp_vnrele_taskq); if (dp->dp_blkstats) @@ -282,6 +283,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) dsl_dataset_t *ds; uint64_t obj; + rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); + /* create and open the MOS (meta-objset) */ dp->dp_meta_objset = dmu_objset_create_impl(spa, NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx); @@ -292,30 +295,30 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) ASSERT0(err); /* Initialize scan structures */ - VERIFY3U(0, ==, dsl_scan_init(dp, txg)); + VERIFY0(dsl_scan_init(dp, txg)); /* create and open the root dir */ dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx); - VERIFY(0 == dsl_dir_open_obj(dp, dp->dp_root_dir_obj, + VERIFY0(dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, dp, &dp->dp_root_dir)); /* create and open the meta-objset dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx); - VERIFY(0 == dsl_pool_open_special_dir(dp, + VERIFY0(dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir)); if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { /* create and open the free dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx); - VERIFY(0 == dsl_pool_open_special_dir(dp, + VERIFY0(dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir)); /* create and open the free_bplist */ obj = bpobj_alloc(dp->dp_meta_objset, SPA_MAXBLOCKSIZE, tx); VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0); - VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj, + VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } @@ -326,7 +329,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx); /* create the root objset */ - VERIFY(0 == dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); + VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); os = dmu_objset_create_impl(dp->dp_spa, ds, dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx); #ifdef _KERNEL @@ -336,6 +339,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) dmu_tx_commit(tx); + rrw_exit(&dp->dp_config_rwlock, FTAG); + return (dp); } @@ -358,10 +363,7 @@ static int deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { dsl_deadlist_t *dl = arg; - dsl_pool_t *dp = dmu_objset_pool(dl->dl_os); - rw_enter(&dp->dp_config_rwlock, RW_READER); dsl_deadlist_insert(dl, bp, tx); - rw_exit(&dp->dp_config_rwlock); return (0); } @@ -383,7 +385,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) /* * We need to copy dp_space_towrite() before doing - * dsl_sync_task_group_sync(), because + * dsl_sync_task_sync(), because * dsl_dataset_snapshot_reserve_space() will increase * dp_space_towrite but not actually write anything. */ @@ -497,14 +499,14 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) */ DTRACE_PROBE(pool_sync__3task); if (!txg_list_empty(&dp->dp_sync_tasks, txg)) { - dsl_sync_task_group_t *dstg; + dsl_sync_task_t *dst; /* * No more sync tasks should have been added while we * were syncing. */ ASSERT(spa_sync_pass(dp->dp_spa) == 1); - while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg)) - dsl_sync_task_group_sync(dstg, tx); + while (dst = txg_list_remove(&dp->dp_sync_tasks, txg)) + dsl_sync_task_sync(dst, tx); } dmu_tx_commit(tx); @@ -627,7 +629,7 @@ dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx) + dp->dp_tempreserved[tx->tx_txg & TXG_MASK] / 2; if (reserved && reserved > write_limit) - return (ERESTART); + return (SET_ERROR(ERESTART)); } atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK], space); @@ -679,14 +681,13 @@ dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx) /* ARGSUSED */ static int -upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) +upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) { dmu_tx_t *tx = arg; dsl_dataset_t *ds, *prev = NULL; int err; - dsl_pool_t *dp = spa_get_dsl(spa); - err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); + err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds); if (err) return (err); @@ -712,7 +713,7 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) * The $ORIGIN can't have any data, or the accounting * will be wrong. */ - ASSERT(prev->ds_phys->ds_bp.blk_birth == 0); + ASSERT0(prev->ds_phys->ds_bp.blk_birth); /* The origin doesn't get attached to itself */ if (ds->ds_object == prev->ds_object) { @@ -732,13 +733,13 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) if (ds->ds_phys->ds_next_snap_obj == 0) { ASSERT(ds->ds_prev == NULL); - VERIFY(0 == dsl_dataset_hold_obj(dp, + VERIFY0(dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); } } - ASSERT(ds->ds_dir->dd_phys->dd_origin_obj == prev->ds_object); - ASSERT(ds->ds_phys->ds_prev_snap_obj == prev->ds_object); + ASSERT3U(ds->ds_dir->dd_phys->dd_origin_obj, ==, prev->ds_object); + ASSERT3U(ds->ds_phys->ds_prev_snap_obj, ==, prev->ds_object); if (prev->ds_phys->ds_next_clones_obj == 0) { dmu_buf_will_dirty(prev->ds_dbuf, tx); @@ -746,7 +747,7 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) zap_create(dp->dp_meta_objset, DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); } - VERIFY(0 == zap_add_int(dp->dp_meta_objset, + VERIFY0(zap_add_int(dp->dp_meta_objset, prev->ds_phys->ds_next_clones_obj, ds->ds_object, tx)); dsl_dataset_rele(ds, FTAG); @@ -761,25 +762,21 @@ dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx) ASSERT(dmu_tx_is_syncing(tx)); ASSERT(dp->dp_origin_snap != NULL); - VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_clones_cb, + VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, upgrade_clones_cb, tx, DS_FIND_CHILDREN)); } /* ARGSUSED */ static int -upgrade_dir_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) +upgrade_dir_clones_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) { dmu_tx_t *tx = arg; - dsl_dataset_t *ds; - dsl_pool_t *dp = spa_get_dsl(spa); objset_t *mos = dp->dp_meta_objset; - VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); - - if (ds->ds_dir->dd_phys->dd_origin_obj) { + if (ds->ds_dir->dd_phys->dd_origin_obj != 0) { dsl_dataset_t *origin; - VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, + VERIFY0(dsl_dataset_hold_obj(dp, ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin)); if (origin->ds_dir->dd_phys->dd_clones == 0) { @@ -788,13 +785,11 @@ upgrade_dir_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); } - VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, - origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); + VERIFY0(zap_add_int(dp->dp_meta_objset, + origin->ds_dir->dd_phys->dd_clones, ds->ds_object, tx)); dsl_dataset_rele(origin, FTAG); } - - dsl_dataset_rele(ds, FTAG); return (0); } @@ -805,7 +800,7 @@ dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx) uint64_t obj; (void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx); - VERIFY(0 == dsl_pool_open_special_dir(dp, + VERIFY0(dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir)); /* @@ -815,12 +810,11 @@ dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx) */ obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ, SPA_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx); - VERIFY3U(0, ==, zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx)); - VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj, - dp->dp_meta_objset, obj)); + VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); - VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, + VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN)); } @@ -832,17 +826,16 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx) ASSERT(dmu_tx_is_syncing(tx)); ASSERT(dp->dp_origin_snap == NULL); + ASSERT(rrw_held(&dp->dp_config_rwlock, RW_WRITER)); /* create the origin dir, ds, & snap-ds */ - rw_enter(&dp->dp_config_rwlock, RW_WRITER); dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME, NULL, 0, kcred, tx); - VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); - dsl_dataset_snapshot_sync(ds, ORIGIN_DIR_NAME, tx); - VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, + VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); + dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx); + VERIFY0(dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, dp, &dp->dp_origin_snap)); dsl_dataset_rele(ds, FTAG); - rw_exit(&dp->dp_config_rwlock); } taskq_t * @@ -877,7 +870,7 @@ dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp) *htag = '\0'; ++htag; dsobj = strtonum(za.za_name, NULL); - (void) dsl_dataset_user_release_tmp(dp, dsobj, htag, B_FALSE); + dsl_dataset_user_release_tmp(dp, dsobj, htag); } zap_cursor_fini(&zc); } @@ -899,7 +892,7 @@ dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx) static int dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj, - const char *tag, uint64_t *now, dmu_tx_t *tx, boolean_t holding) + const char *tag, uint64_t now, dmu_tx_t *tx, boolean_t holding) { objset_t *mos = dp->dp_meta_objset; uint64_t zapobj = dp->dp_tmp_userrefs_obj; @@ -918,13 +911,13 @@ dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj, dsl_pool_user_hold_create_obj(dp, tx); zapobj = dp->dp_tmp_userrefs_obj; } else { - return (ENOENT); + return (SET_ERROR(ENOENT)); } } name = kmem_asprintf("%llx-%s", (u_longlong_t)dsobj, tag); if (holding) - error = zap_add(mos, zapobj, name, 8, 1, now, tx); + error = zap_add(mos, zapobj, name, 8, 1, &now, tx); else error = zap_remove(mos, zapobj, name, tx); strfree(name); @@ -937,7 +930,7 @@ dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj, */ int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, const char *tag, - uint64_t *now, dmu_tx_t *tx) + uint64_t now, dmu_tx_t *tx) { return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, now, tx, B_TRUE)); } @@ -949,6 +942,109 @@ int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag, dmu_tx_t *tx) { - return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, NULL, + return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, 0, tx, B_FALSE)); } + +/* + * DSL Pool Configuration Lock + * + * The dp_config_rwlock protects against changes to DSL state (e.g. dataset + * creation / destruction / rename / property setting). It must be held for + * read to hold a dataset or dsl_dir. I.e. you must call + * dsl_pool_config_enter() or dsl_pool_hold() before calling + * dsl_{dataset,dir}_hold{_obj}. In most circumstances, the dp_config_rwlock + * must be held continuously until all datasets and dsl_dirs are released. + * + * The only exception to this rule is that if a "long hold" is placed on + * a dataset, then the dp_config_rwlock may be dropped while the dataset + * is still held. The long hold will prevent the dataset from being + * destroyed -- the destroy will fail with EBUSY. A long hold can be + * obtained by calling dsl_dataset_long_hold(), or by "owning" a dataset + * (by calling dsl_{dataset,objset}_{try}own{_obj}). + * + * Legitimate long-holders (including owners) should be long-running, cancelable + * tasks that should cause "zfs destroy" to fail. This includes DMU + * consumers (i.e. a ZPL filesystem being mounted or ZVOL being open), + * "zfs send", and "zfs diff". There are several other long-holders whose + * uses are suboptimal (e.g. "zfs promote", and zil_suspend()). + * + * The usual formula for long-holding would be: + * dsl_pool_hold() + * dsl_dataset_hold() + * ... perform checks ... + * dsl_dataset_long_hold() + * dsl_pool_rele() + * ... perform long-running task ... + * dsl_dataset_long_rele() + * dsl_dataset_rele() + * + * Note that when the long hold is released, the dataset is still held but + * the pool is not held. The dataset may change arbitrarily during this time + * (e.g. it could be destroyed). Therefore you shouldn't do anything to the + * dataset except release it. + * + * User-initiated operations (e.g. ioctls, zfs_ioc_*()) are either read-only + * or modifying operations. + * + * Modifying operations should generally use dsl_sync_task(). The synctask + * infrastructure enforces proper locking strategy with respect to the + * dp_config_rwlock. See the comment above dsl_sync_task() for details. + * + * Read-only operations will manually hold the pool, then the dataset, obtain + * information from the dataset, then release the pool and dataset. + * dmu_objset_{hold,rele}() are convenience routines that also do the pool + * hold/rele. + */ + +int +dsl_pool_hold(const char *name, void *tag, dsl_pool_t **dp) +{ + spa_t *spa; + int error; + + error = spa_open(name, &spa, tag); + if (error == 0) { + *dp = spa_get_dsl(spa); + dsl_pool_config_enter(*dp, tag); + } + return (error); +} + +void +dsl_pool_rele(dsl_pool_t *dp, void *tag) +{ + dsl_pool_config_exit(dp, tag); + spa_close(dp->dp_spa, tag); +} + +void +dsl_pool_config_enter(dsl_pool_t *dp, void *tag) +{ + /* + * We use a "reentrant" reader-writer lock, but not reentrantly. + * + * The rrwlock can (with the track_all flag) track all reading threads, + * which is very useful for debugging which code path failed to release + * the lock, and for verifying that the *current* thread does hold + * the lock. + * + * (Unlike a rwlock, which knows that N threads hold it for + * read, but not *which* threads, so rw_held(RW_READER) returns TRUE + * if any thread holds it for read, even if this thread doesn't). + */ + ASSERT(!rrw_held(&dp->dp_config_rwlock, RW_READER)); + rrw_enter(&dp->dp_config_rwlock, RW_READER, tag); +} + +void +dsl_pool_config_exit(dsl_pool_t *dp, void *tag) +{ + rrw_exit(&dp->dp_config_rwlock, tag); +} + +boolean_t +dsl_pool_config_held(dsl_pool_t *dp) +{ + return (RRW_LOCK_HELD(&dp->dp_config_rwlock)); +} diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c index aa66b32e7..0c288b5fd 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -51,16 +52,16 @@ dodefault(const char *propname, int intsz, int numints, void *buf) */ if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL || (zfs_prop_readonly(prop) && !zfs_prop_setonce(prop))) - return (ENOENT); + return (SET_ERROR(ENOENT)); if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) { if (intsz != 1) - return (EOVERFLOW); + return (SET_ERROR(EOVERFLOW)); (void) strncpy(buf, zfs_prop_default_string(prop), numints); } else { if (intsz != 8 || numints < 1) - return (EOVERFLOW); + return (SET_ERROR(EOVERFLOW)); *(uint64_t *)buf = zfs_prop_default_numeric(prop); } @@ -81,7 +82,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, char *inheritstr; char *recvdstr; - ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock)); + ASSERT(dsl_pool_config_held(dd->dd_pool)); if (setpoint) setpoint[0] = '\0'; @@ -96,8 +97,6 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, * after this loop. */ for (; dd != NULL; dd = dd->dd_parent) { - ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock)); - if (dd != target || snapshot) { if (!inheritable) break; @@ -145,7 +144,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, * at the end of the loop (instead of at the beginning) ensures * that err has a valid post-loop value. */ - err = ENOENT; + err = SET_ERROR(ENOENT); } if (err == ENOENT) @@ -166,7 +165,7 @@ dsl_prop_get_ds(dsl_dataset_t *ds, const char *propname, boolean_t snapshot; uint64_t zapobj; - ASSERT(RW_LOCK_HELD(&ds->ds_dir->dd_pool->dp_config_rwlock)); + ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop)); snapshot = (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds)); zapobj = (ds->ds_phys == NULL ? 0 : ds->ds_phys->ds_props_obj); @@ -234,18 +233,12 @@ dsl_prop_register(dsl_dataset_t *ds, const char *propname, uint64_t value; dsl_prop_cb_record_t *cbr; int err; - int need_rwlock; - need_rwlock = !RW_WRITE_HELD(&dp->dp_config_rwlock); - if (need_rwlock) - rw_enter(&dp->dp_config_rwlock, RW_READER); + ASSERT(dsl_pool_config_held(dp)); - err = dsl_prop_get_ds(ds, propname, 8, 1, &value, NULL); - if (err != 0) { - if (need_rwlock) - rw_exit(&dp->dp_config_rwlock); + err = dsl_prop_get_int_ds(ds, propname, &value); + if (err != 0) return (err); - } cbr = kmem_alloc(sizeof (dsl_prop_cb_record_t), KM_SLEEP); cbr->cbr_ds = ds; @@ -258,9 +251,6 @@ dsl_prop_register(dsl_dataset_t *ds, const char *propname, mutex_exit(&dd->dd_lock); cbr->cbr_func(cbr->cbr_arg, value); - - if (need_rwlock) - rw_exit(&dp->dp_config_rwlock); return (0); } @@ -268,19 +258,18 @@ int dsl_prop_get(const char *dsname, const char *propname, int intsz, int numints, void *buf, char *setpoint) { - dsl_dataset_t *ds; - int err; + objset_t *os; + int error; - err = dsl_dataset_hold(dsname, FTAG, &ds); - if (err) - return (err); + error = dmu_objset_hold(dsname, FTAG, &os); + if (error != 0) + return (error); - rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); - err = dsl_prop_get_ds(ds, propname, intsz, numints, buf, setpoint); - rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); + error = dsl_prop_get_ds(dmu_objset_ds(os), propname, + intsz, numints, buf, setpoint); - dsl_dataset_rele(ds, FTAG); - return (err); + dmu_objset_rele(os, FTAG); + return (error); } /* @@ -298,17 +287,11 @@ dsl_prop_get_integer(const char *ddname, const char *propname, return (dsl_prop_get(ddname, propname, 8, 1, valuep, setpoint)); } -void -dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname, - zprop_source_t source, uint64_t *value) +int +dsl_prop_get_int_ds(dsl_dataset_t *ds, const char *propname, + uint64_t *valuep) { - psa->psa_name = propname; - psa->psa_source = source; - psa->psa_intsz = 8; - psa->psa_numints = 1; - psa->psa_value = value; - - psa->psa_effective_value = -1ULL; + return (dsl_prop_get_ds(ds, propname, 8, 1, valuep, NULL)); } /* @@ -322,11 +305,10 @@ dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname, * a property not handled by this function. */ int -dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa) +dsl_prop_predict(dsl_dir_t *dd, const char *propname, + zprop_source_t source, uint64_t value, uint64_t *newvalp) { - const char *propname = psa->psa_name; zfs_prop_t prop = zfs_name_to_prop(propname); - zprop_source_t source = psa->psa_source; objset_t *mos; uint64_t zapobj; uint64_t version; @@ -358,36 +340,33 @@ dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa) switch (source) { case ZPROP_SRC_NONE: /* Revert to the received value, if any. */ - err = zap_lookup(mos, zapobj, recvdstr, 8, 1, - &psa->psa_effective_value); + err = zap_lookup(mos, zapobj, recvdstr, 8, 1, newvalp); if (err == ENOENT) - psa->psa_effective_value = 0; + *newvalp = 0; break; case ZPROP_SRC_LOCAL: - psa->psa_effective_value = *(uint64_t *)psa->psa_value; + *newvalp = value; break; case ZPROP_SRC_RECEIVED: /* * If there's no local setting, then the new received value will * be the effective value. */ - err = zap_lookup(mos, zapobj, propname, 8, 1, - &psa->psa_effective_value); + err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp); if (err == ENOENT) - psa->psa_effective_value = *(uint64_t *)psa->psa_value; + *newvalp = value; break; case (ZPROP_SRC_NONE | ZPROP_SRC_RECEIVED): /* * We're clearing the received value, so the local setting (if * it exists) remains the effective value. */ - err = zap_lookup(mos, zapobj, propname, 8, 1, - &psa->psa_effective_value); + err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp); if (err == ENOENT) - psa->psa_effective_value = 0; + *newvalp = 0; break; default: - cmn_err(CE_PANIC, "unexpected property source: %d", source); + panic("unexpected property source: %d", source); } strfree(recvdstr); @@ -398,37 +377,6 @@ dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa) return (err); } -#ifdef ZFS_DEBUG -void -dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa) -{ - zfs_prop_t prop = zfs_name_to_prop(psa->psa_name); - uint64_t intval; - char setpoint[MAXNAMELEN]; - uint64_t version = spa_version(dd->dd_pool->dp_spa); - int err; - - if (version < SPA_VERSION_RECVD_PROPS) { - switch (prop) { - case ZFS_PROP_QUOTA: - case ZFS_PROP_RESERVATION: - return; - } - } - - err = dsl_prop_get_dd(dd, psa->psa_name, 8, 1, &intval, - setpoint, B_FALSE); - if (err == 0 && intval != psa->psa_effective_value) { - cmn_err(CE_PANIC, "%s property, source: %x, " - "predicted effective value: %llu, " - "actual effective value: %llu (setpoint: %s)", - psa->psa_name, psa->psa_source, - (unsigned long long)psa->psa_effective_value, - (unsigned long long)intval, setpoint); - } -} -#endif - /* * Unregister this callback. Return 0 on success, ENOENT if ddname is * invalid, ENOMSG if no matching callback registered. @@ -452,7 +400,7 @@ dsl_prop_unregister(dsl_dataset_t *ds, const char *propname, if (cbr == NULL) { mutex_exit(&dd->dd_lock); - return (ENOMSG); + return (SET_ERROR(ENOMSG)); } list_remove(&dd->dd_prop_cbs, cbr); @@ -463,25 +411,57 @@ dsl_prop_unregister(dsl_dataset_t *ds, const char *propname, return (0); } -/* - * Return the number of callbacks that are registered for this dataset. - */ -int -dsl_prop_numcb(dsl_dataset_t *ds) +boolean_t +dsl_prop_hascb(dsl_dataset_t *ds) { dsl_dir_t *dd = ds->ds_dir; + boolean_t rv = B_FALSE; dsl_prop_cb_record_t *cbr; - int num = 0; mutex_enter(&dd->dd_lock); - for (cbr = list_head(&dd->dd_prop_cbs); - cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) { - if (cbr->cbr_ds == ds) - num++; + for (cbr = list_head(&dd->dd_prop_cbs); cbr; + cbr = list_next(&dd->dd_prop_cbs, cbr)) { + if (cbr->cbr_ds == ds) { + rv = B_TRUE; + break; + } + } + mutex_exit(&dd->dd_lock); + return (rv); +} + +/* ARGSUSED */ +static int +dsl_prop_notify_all_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) +{ + dsl_dir_t *dd = ds->ds_dir; + dsl_prop_cb_record_t *cbr; + + mutex_enter(&dd->dd_lock); + for (cbr = list_head(&dd->dd_prop_cbs); cbr; + cbr = list_next(&dd->dd_prop_cbs, cbr)) { + uint64_t value; + + if (dsl_prop_get_ds(cbr->cbr_ds, cbr->cbr_propname, + sizeof (value), 1, &value, NULL) == 0) + cbr->cbr_func(cbr->cbr_arg, value); } mutex_exit(&dd->dd_lock); - return (num); + return (0); +} + +/* + * Update all property values for ddobj & its descendants. This is used + * when renaming the dir. + */ +void +dsl_prop_notify_all(dsl_dir_t *dd) +{ + dsl_pool_t *dp = dd->dd_pool; + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); + (void) dmu_objset_find_dp(dp, dd->dd_object, dsl_prop_notify_all_cb, + NULL, DS_FIND_CHILDREN); } static void @@ -495,8 +475,8 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj, zap_attribute_t *za; int err; - ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); - err = dsl_dir_open_obj(dp, ddobj, NULL, FTAG, &dd); + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); + err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd); if (err) return; @@ -507,7 +487,7 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj, */ err = zap_contains(mos, dd->dd_phys->dd_props_zapobj, propname); if (err == 0) { - dsl_dir_close(dd, FTAG); + dsl_dir_rele(dd, FTAG); return; } ASSERT3U(err, ==, ENOENT); @@ -542,26 +522,24 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj, } kmem_free(za, sizeof (zap_attribute_t)); zap_cursor_fini(&zc); - dsl_dir_close(dd, FTAG); + dsl_dir_rele(dd, FTAG); } void -dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, + zprop_source_t source, int intsz, int numints, const void *value, + dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_prop_setarg_t *psa = arg2; objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; uint64_t zapobj, intval, dummy; int isint; char valbuf[32]; - char *valstr = NULL; + const char *valstr = NULL; char *inheritstr; char *recvdstr; char *tbuf = NULL; int err; uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa); - const char *propname = psa->psa_name; - zprop_source_t source = psa->psa_source; isint = (dodefault(propname, 8, 1, &intval) == 0); @@ -611,8 +589,8 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) */ err = zap_remove(mos, zapobj, inheritstr, tx); ASSERT(err == 0 || err == ENOENT); - VERIFY(0 == zap_update(mos, zapobj, propname, - psa->psa_intsz, psa->psa_numints, psa->psa_value, tx)); + VERIFY0(zap_update(mos, zapobj, propname, + intsz, numints, value, tx)); break; case ZPROP_SRC_INHERITED: /* @@ -623,12 +601,10 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) err = zap_remove(mos, zapobj, propname, tx); ASSERT(err == 0 || err == ENOENT); if (version >= SPA_VERSION_RECVD_PROPS && - dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy, - NULL) == 0) { + dsl_prop_get_int_ds(ds, ZPROP_HAS_RECVD, &dummy) == 0) { dummy = 0; - err = zap_update(mos, zapobj, inheritstr, - 8, 1, &dummy, tx); - ASSERT(err == 0); + VERIFY0(zap_update(mos, zapobj, inheritstr, + 8, 1, &dummy, tx)); } break; case ZPROP_SRC_RECEIVED: @@ -636,7 +612,7 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) * set propname$recvd -> value */ err = zap_update(mos, zapobj, recvdstr, - psa->psa_intsz, psa->psa_numints, psa->psa_value, tx); + intsz, numints, value, tx); ASSERT(err == 0); break; case (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED): @@ -666,7 +642,7 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) strfree(recvdstr); if (isint) { - VERIFY(0 == dsl_prop_get_ds(ds, propname, 8, 1, &intval, NULL)); + VERIFY0(dsl_prop_get_int_ds(ds, propname, &intval)); if (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds)) { dsl_prop_cb_record_t *cbr; @@ -693,7 +669,7 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) valstr = valbuf; } else { if (source == ZPROP_SRC_LOCAL) { - valstr = (char *)psa->psa_value; + valstr = value; } else { tbuf = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP); if (dsl_prop_get_ds(ds, propname, 1, @@ -702,146 +678,81 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) } } - spa_history_log_internal((source == ZPROP_SRC_NONE || - source == ZPROP_SRC_INHERITED) ? LOG_DS_INHERIT : - LOG_DS_PROPSET, ds->ds_dir->dd_pool->dp_spa, tx, - "%s=%s dataset = %llu", propname, - (valstr == NULL ? "" : valstr), ds->ds_object); + spa_history_log_internal_ds(ds, (source == ZPROP_SRC_NONE || + source == ZPROP_SRC_INHERITED) ? "inherit" : "set", tx, + "%s=%s", propname, (valstr == NULL ? "" : valstr)); if (tbuf != NULL) kmem_free(tbuf, ZAP_MAXVALUELEN); } -void -dsl_props_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) +int +dsl_prop_set_int(const char *dsname, const char *propname, + zprop_source_t source, uint64_t value) { - dsl_dataset_t *ds = arg1; - dsl_props_arg_t *pa = arg2; - nvlist_t *props = pa->pa_props; - dsl_prop_setarg_t psa; - nvpair_t *elem = NULL; - - psa.psa_source = pa->pa_source; - - while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { - nvpair_t *pair = elem; - - psa.psa_name = nvpair_name(pair); - - if (nvpair_type(pair) == DATA_TYPE_NVLIST) { - /* - * dsl_prop_get_all_impl() returns properties in this - * format. - */ - nvlist_t *attrs; - VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); - VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, - &pair) == 0); - } + nvlist_t *nvl = fnvlist_alloc(); + int error; - if (nvpair_type(pair) == DATA_TYPE_STRING) { - VERIFY(nvpair_value_string(pair, - (char **)&psa.psa_value) == 0); - psa.psa_intsz = 1; - psa.psa_numints = strlen(psa.psa_value) + 1; - } else { - uint64_t intval; - VERIFY(nvpair_value_uint64(pair, &intval) == 0); - psa.psa_intsz = sizeof (intval); - psa.psa_numints = 1; - psa.psa_value = &intval; - } - dsl_prop_set_sync(ds, &psa, tx); - } + fnvlist_add_uint64(nvl, propname, value); + error = dsl_props_set(dsname, source, nvl); + fnvlist_free(nvl); + return (error); } -void -dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val, - dmu_tx_t *tx) +int +dsl_prop_set_string(const char *dsname, const char *propname, + zprop_source_t source, const char *value) { - objset_t *mos = dd->dd_pool->dp_meta_objset; - uint64_t zapobj = dd->dd_phys->dd_props_zapobj; - - ASSERT(dmu_tx_is_syncing(tx)); - - VERIFY(0 == zap_update(mos, zapobj, name, sizeof (val), 1, &val, tx)); + nvlist_t *nvl = fnvlist_alloc(); + int error; - dsl_prop_changed_notify(dd->dd_pool, dd->dd_object, name, val, TRUE); - - spa_history_log_internal(LOG_DS_PROPSET, dd->dd_pool->dp_spa, tx, - "%s=%llu dataset = %llu", name, (u_longlong_t)val, - dd->dd_phys->dd_head_dataset_obj); + fnvlist_add_string(nvl, propname, value); + error = dsl_props_set(dsname, source, nvl); + fnvlist_free(nvl); + return (error); } int -dsl_prop_set(const char *dsname, const char *propname, zprop_source_t source, - int intsz, int numints, const void *buf) +dsl_prop_inherit(const char *dsname, const char *propname, + zprop_source_t source) { - dsl_dataset_t *ds; - uint64_t version; - int err; - dsl_prop_setarg_t psa; - - /* - * We must do these checks before we get to the syncfunc, since - * it can't fail. - */ - if (strlen(propname) >= ZAP_MAXNAMELEN) - return (ENAMETOOLONG); - - err = dsl_dataset_hold(dsname, FTAG, &ds); - if (err) - return (err); - - version = spa_version(ds->ds_dir->dd_pool->dp_spa); - if (intsz * numints >= (version < SPA_VERSION_STMF_PROP ? - ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) { - dsl_dataset_rele(ds, FTAG); - return (E2BIG); - } - if (dsl_dataset_is_snapshot(ds) && - version < SPA_VERSION_SNAP_PROPS) { - dsl_dataset_rele(ds, FTAG); - return (ENOTSUP); - } - - psa.psa_name = propname; - psa.psa_source = source; - psa.psa_intsz = intsz; - psa.psa_numints = numints; - psa.psa_value = buf; - psa.psa_effective_value = -1ULL; + nvlist_t *nvl = fnvlist_alloc(); + int error; - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - NULL, dsl_prop_set_sync, ds, &psa, 2); - - dsl_dataset_rele(ds, FTAG); - return (err); + fnvlist_add_boolean(nvl, propname); + error = dsl_props_set(dsname, source, nvl); + fnvlist_free(nvl); + return (error); } -int -dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props) +typedef struct dsl_props_set_arg { + const char *dpsa_dsname; + zprop_source_t dpsa_source; + nvlist_t *dpsa_props; +} dsl_props_set_arg_t; + +static int +dsl_props_set_check(void *arg, dmu_tx_t *tx) { + dsl_props_set_arg_t *dpsa = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; uint64_t version; nvpair_t *elem = NULL; - dsl_props_arg_t pa; int err; - if (err = dsl_dataset_hold(dsname, FTAG, &ds)) + err = dsl_dataset_hold(dp, dpsa->dpsa_dsname, FTAG, &ds); + if (err != 0) return (err); - /* - * Do these checks before the syncfunc, since it can't fail. - */ + version = spa_version(ds->ds_dir->dd_pool->dp_spa); - while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { + while ((elem = nvlist_next_nvpair(dpsa->dpsa_props, elem)) != NULL) { if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) { dsl_dataset_rele(ds, FTAG); - return (ENAMETOOLONG); + return (SET_ERROR(ENAMETOOLONG)); } if (nvpair_type(elem) == DATA_TYPE_STRING) { - char *valstr; - VERIFY(nvpair_value_string(elem, &valstr) == 0); + char *valstr = fnvpair_value_string(elem); if (strlen(valstr) >= (version < SPA_VERSION_STMF_PROP ? ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) { @@ -851,20 +762,83 @@ dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props) } } - if (dsl_dataset_is_snapshot(ds) && - version < SPA_VERSION_SNAP_PROPS) { + if (dsl_dataset_is_snapshot(ds) && version < SPA_VERSION_SNAP_PROPS) { dsl_dataset_rele(ds, FTAG); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } + dsl_dataset_rele(ds, FTAG); + return (0); +} + +void +dsl_props_set_sync_impl(dsl_dataset_t *ds, zprop_source_t source, + nvlist_t *props, dmu_tx_t *tx) +{ + nvpair_t *elem = NULL; + + while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { + nvpair_t *pair = elem; + + if (nvpair_type(pair) == DATA_TYPE_NVLIST) { + /* + * dsl_prop_get_all_impl() returns properties in this + * format. + */ + nvlist_t *attrs = fnvpair_value_nvlist(pair); + pair = fnvlist_lookup_nvpair(attrs, ZPROP_VALUE); + } - pa.pa_props = props; - pa.pa_source = source; + if (nvpair_type(pair) == DATA_TYPE_STRING) { + const char *value = fnvpair_value_string(pair); + dsl_prop_set_sync_impl(ds, nvpair_name(pair), + source, 1, strlen(value) + 1, value, tx); + } else if (nvpair_type(pair) == DATA_TYPE_UINT64) { + uint64_t intval = fnvpair_value_uint64(pair); + dsl_prop_set_sync_impl(ds, nvpair_name(pair), + source, sizeof (intval), 1, &intval, tx); + } else if (nvpair_type(pair) == DATA_TYPE_BOOLEAN) { + dsl_prop_set_sync_impl(ds, nvpair_name(pair), + source, 0, 0, NULL, tx); + } else { + panic("invalid nvpair type"); + } + } +} - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - NULL, dsl_props_set_sync, ds, &pa, 2); +static void +dsl_props_set_sync(void *arg, dmu_tx_t *tx) +{ + dsl_props_set_arg_t *dpsa = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + VERIFY0(dsl_dataset_hold(dp, dpsa->dpsa_dsname, FTAG, &ds)); + dsl_props_set_sync_impl(ds, dpsa->dpsa_source, dpsa->dpsa_props, tx); dsl_dataset_rele(ds, FTAG); - return (err); +} + +/* + * All-or-nothing; if any prop can't be set, nothing will be modified. + */ +int +dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props) +{ + dsl_props_set_arg_t dpsa; + int nblks = 0; + + dpsa.dpsa_dsname = dsname; + dpsa.dpsa_source = source; + dpsa.dpsa_props = props; + + /* + * If the source includes NONE, then we will only be removing entries + * from the ZAP object. In that case don't check for ENOSPC. + */ + if ((source & ZPROP_SRC_NONE) == 0) + nblks = 2 * fnvlist_num_pairs(props); + + return (dsl_sync_task(dsname, dsl_props_set_check, dsl_props_set_sync, + &dpsa, nblks)); } typedef enum dsl_prop_getflags { @@ -1014,7 +988,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp, if (dsl_dataset_is_snapshot(ds)) flags |= DSL_PROP_GET_SNAPSHOT; - rw_enter(&dp->dp_config_rwlock, RW_READER); + ASSERT(dsl_pool_config_held(dp)); if (ds->ds_phys->ds_props_obj != 0) { ASSERT(flags & DSL_PROP_GET_SNAPSHOT); @@ -1039,58 +1013,51 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp, break; } out: - rw_exit(&dp->dp_config_rwlock); return (err); } boolean_t -dsl_prop_get_hasrecvd(objset_t *os) +dsl_prop_get_hasrecvd(const char *dsname) { - dsl_dataset_t *ds = os->os_dsl_dataset; - int rc; uint64_t dummy; - rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); - rc = dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy, NULL); - rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); - ASSERT(rc != 0 || spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS); - return (rc == 0); + return (0 == + dsl_prop_get_integer(dsname, ZPROP_HAS_RECVD, &dummy, NULL)); } -static void -dsl_prop_set_hasrecvd_impl(objset_t *os, zprop_source_t source) +static int +dsl_prop_set_hasrecvd_impl(const char *dsname, zprop_source_t source) { - dsl_dataset_t *ds = os->os_dsl_dataset; - uint64_t dummy = 0; - dsl_prop_setarg_t psa; - - if (spa_version(os->os_spa) < SPA_VERSION_RECVD_PROPS) - return; + uint64_t version; + spa_t *spa; + int error = 0; - dsl_prop_setarg_init_uint64(&psa, ZPROP_HAS_RECVD, source, &dummy); + VERIFY0(spa_open(dsname, &spa, FTAG)); + version = spa_version(spa); + spa_close(spa, FTAG); - (void) dsl_sync_task_do(ds->ds_dir->dd_pool, NULL, - dsl_prop_set_sync, ds, &psa, 2); + if (version >= SPA_VERSION_RECVD_PROPS) + error = dsl_prop_set_int(dsname, ZPROP_HAS_RECVD, source, 0); + return (error); } /* * Call after successfully receiving properties to ensure that only the first * receive on or after SPA_VERSION_RECVD_PROPS blows away local properties. */ -void -dsl_prop_set_hasrecvd(objset_t *os) +int +dsl_prop_set_hasrecvd(const char *dsname) { - if (dsl_prop_get_hasrecvd(os)) { - ASSERT(spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS); - return; - } - dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_LOCAL); + int error = 0; + if (!dsl_prop_get_hasrecvd(dsname)) + error = dsl_prop_set_hasrecvd_impl(dsname, ZPROP_SRC_LOCAL); + return (error); } void -dsl_prop_unset_hasrecvd(objset_t *os) +dsl_prop_unset_hasrecvd(const char *dsname) { - dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_NONE); + VERIFY0(dsl_prop_set_hasrecvd_impl(dsname, ZPROP_SRC_NONE)); } int @@ -1100,16 +1067,25 @@ dsl_prop_get_all(objset_t *os, nvlist_t **nvp) } int -dsl_prop_get_received(objset_t *os, nvlist_t **nvp) +dsl_prop_get_received(const char *dsname, nvlist_t **nvp) { + objset_t *os; + int error; + /* * Received properties are not distinguishable from local properties * until the dataset has received properties on or after * SPA_VERSION_RECVD_PROPS. */ - dsl_prop_getflags_t flags = (dsl_prop_get_hasrecvd(os) ? + dsl_prop_getflags_t flags = (dsl_prop_get_hasrecvd(dsname) ? DSL_PROP_GET_RECEIVED : DSL_PROP_GET_LOCAL); - return (dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, flags)); + + error = dmu_objset_hold(dsname, FTAG, &os); + if (error != 0) + return (error); + error = dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, flags); + dmu_objset_rele(os, FTAG); + return (error); } void diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c index 241d596de..7f9fccce5 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -55,7 +55,7 @@ typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *); static scan_cb_t dsl_scan_defrag_cb; static scan_cb_t dsl_scan_scrub_cb; static scan_cb_t dsl_scan_remove_cb; -static dsl_syncfunc_t dsl_scan_cancel_sync; +static void dsl_scan_cancel_sync(void *, dmu_tx_t *); static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx); unsigned int zfs_top_maxinflight = 32; /* maximum I/Os per top-level */ @@ -184,22 +184,21 @@ dsl_scan_fini(dsl_pool_t *dp) /* ARGSUSED */ static int -dsl_scan_setup_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_scan_setup_check(void *arg, dmu_tx_t *tx) { - dsl_scan_t *scn = arg1; + dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; if (scn->scn_phys.scn_state == DSS_SCANNING) - return (EBUSY); + return (SET_ERROR(EBUSY)); return (0); } -/* ARGSUSED */ static void -dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) { - dsl_scan_t *scn = arg1; - pool_scan_func_t *funcp = arg2; + dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; + pool_scan_func_t *funcp = arg; dmu_object_type_t ot = 0; dsl_pool_t *dp = scn->scn_dp; spa_t *spa = dp->dp_spa; @@ -258,7 +257,7 @@ dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_scan_sync_state(scn, tx); - spa_history_log_internal(LOG_POOL_SCAN, spa, tx, + spa_history_log_internal(spa, "scan setup", tx, "func=%u mintxg=%llu maxtxg=%llu", *funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg); } @@ -307,7 +306,7 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) else scn->scn_phys.scn_state = DSS_CANCELED; - spa_history_log_internal(LOG_POOL_SCAN_DONE, spa, tx, + spa_history_log_internal(spa, "scan done", tx, "complete=%u", complete); if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { @@ -345,20 +344,20 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) /* ARGSUSED */ static int -dsl_scan_cancel_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_scan_cancel_check(void *arg, dmu_tx_t *tx) { - dsl_scan_t *scn = arg1; + dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; if (scn->scn_phys.scn_state != DSS_SCANNING) - return (ENOENT); + return (SET_ERROR(ENOENT)); return (0); } /* ARGSUSED */ static void -dsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_scan_cancel_sync(void *arg, dmu_tx_t *tx) { - dsl_scan_t *scn = arg1; + dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; dsl_scan_done(scn, B_FALSE, tx); dsl_scan_sync_state(scn, tx); @@ -367,12 +366,8 @@ dsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx) int dsl_scan_cancel(dsl_pool_t *dp) { - boolean_t complete = B_FALSE; - int err; - - err = dsl_sync_task_do(dp, dsl_scan_cancel_check, - dsl_scan_cancel_sync, dp->dp_scan, &complete, 3); - return (err); + return (dsl_sync_task(spa_name(dp->dp_spa), dsl_scan_cancel_check, + dsl_scan_cancel_sync, NULL, 3)); } static void dsl_scan_visitbp(blkptr_t *bp, @@ -408,7 +403,7 @@ dsl_scan_ds_maxtxg(dsl_dataset_t *ds) static void dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx) { - VERIFY(0 == zap_update(scn->scn_dp->dp_meta_objset, + VERIFY0(zap_update(scn->scn_dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS, &scn->scn_phys, tx)); @@ -980,33 +975,33 @@ struct enqueue_clones_arg { /* ARGSUSED */ static int -enqueue_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) +enqueue_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) { struct enqueue_clones_arg *eca = arg; dsl_dataset_t *ds; int err; - dsl_pool_t *dp = spa->spa_dsl_pool; dsl_scan_t *scn = dp->dp_scan; - err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); + if (hds->ds_dir->dd_phys->dd_origin_obj != eca->originobj) + return (0); + + err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds); if (err) return (err); - if (ds->ds_dir->dd_phys->dd_origin_obj == eca->originobj) { - while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) { - dsl_dataset_t *prev; - err = dsl_dataset_hold_obj(dp, - ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); + while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) { + dsl_dataset_t *prev; + err = dsl_dataset_hold_obj(dp, + ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); - dsl_dataset_rele(ds, FTAG); - if (err) - return (err); - ds = prev; - } - VERIFY(zap_add_int_key(dp->dp_meta_objset, - scn->scn_phys.scn_queue_obj, ds->ds_object, - ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0); + dsl_dataset_rele(ds, FTAG); + if (err) + return (err); + ds = prev; } + VERIFY(zap_add_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds->ds_object, + ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0); dsl_dataset_rele(ds, FTAG); return (0); } @@ -1095,17 +1090,17 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx) } if (usenext) { - VERIFY(zap_join_key(dp->dp_meta_objset, + VERIFY0(zap_join_key(dp->dp_meta_objset, ds->ds_phys->ds_next_clones_obj, scn->scn_phys.scn_queue_obj, - ds->ds_phys->ds_creation_txg, tx) == 0); + ds->ds_phys->ds_creation_txg, tx)); } else { struct enqueue_clones_arg eca; eca.tx = tx; eca.originobj = ds->ds_object; - (void) dmu_objset_find_spa(ds->ds_dir->dd_pool->dp_spa, - NULL, enqueue_clones_cb, &eca, DS_FIND_CHILDREN); + VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, + enqueue_clones_cb, &eca, DS_FIND_CHILDREN)); } } @@ -1115,15 +1110,14 @@ out: /* ARGSUSED */ static int -enqueue_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) +enqueue_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) { dmu_tx_t *tx = arg; dsl_dataset_t *ds; int err; - dsl_pool_t *dp = spa->spa_dsl_pool; dsl_scan_t *scn = dp->dp_scan; - err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); + err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds); if (err) return (err); @@ -1278,8 +1272,8 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) return; if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) { - VERIFY(0 == dmu_objset_find_spa(dp->dp_spa, - NULL, enqueue_cb, tx, DS_FIND_CHILDREN)); + VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, + enqueue_cb, tx, DS_FIND_CHILDREN)); } else { dsl_scan_visitds(scn, dp->dp_origin_snap->ds_object, tx); @@ -1357,7 +1351,7 @@ dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) if (!scn->scn_is_bptree || (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) { if (dsl_scan_free_should_pause(scn)) - return (ERESTART); + return (SET_ERROR(ERESTART)); } zio_nowait(zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa, @@ -1414,7 +1408,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) func = POOL_SCAN_RESILVER; zfs_dbgmsg("restarting scan func=%u txg=%llu", func, tx->tx_txg); - dsl_scan_setup_sync(scn, &func, tx); + dsl_scan_setup_sync(&func, tx); } if (!dsl_scan_active(scn) || @@ -1448,21 +1442,21 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) err = bptree_iterate(dp->dp_meta_objset, dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb, scn, tx); - VERIFY3U(0, ==, zio_wait(scn->scn_zio_root)); - if (err != 0) - return; - - /* disable async destroy feature */ - spa_feature_decr(spa, - &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY], tx); - ASSERT(!spa_feature_is_active(spa, - &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])); - VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset, - DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_BPTREE_OBJ, tx)); - VERIFY3U(0, ==, bptree_free(dp->dp_meta_objset, - dp->dp_bptree_obj, tx)); - dp->dp_bptree_obj = 0; + VERIFY0(zio_wait(scn->scn_zio_root)); + + if (err == 0) { + zfeature_info_t *feat = &spa_feature_table + [SPA_FEATURE_ASYNC_DESTROY]; + /* finished; deactivate async destroy feature */ + spa_feature_decr(spa, feat, tx); + ASSERT(!spa_feature_is_active(spa, feat)); + VERIFY0(zap_remove(dp->dp_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_BPTREE_OBJ, tx)); + VERIFY0(bptree_free(dp->dp_meta_objset, + dp->dp_bptree_obj, tx)); + dp->dp_bptree_obj = 0; + } } if (scn->scn_visited_this_txg) { zfs_dbgmsg("freed %llu blocks in %llums from " @@ -1509,7 +1503,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) scn->scn_zio_root = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_CANFAIL); + dsl_pool_config_enter(dp, FTAG); dsl_scan_visit(scn, tx); + dsl_pool_config_exit(dp, FTAG); (void) zio_wait(scn->scn_zio_root); scn->scn_zio_root = NULL; @@ -1745,6 +1741,6 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) spa->spa_scrub_reopen = B_FALSE; (void) spa_vdev_state_exit(spa, NULL, 0); - return (dsl_sync_task_do(dp, dsl_scan_setup_check, - dsl_scan_setup_sync, dp->dp_scan, &func, 0)); + return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check, + dsl_scan_setup_sync, &func, 0)); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c index b4a3798b4..58f7d3795 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -33,135 +34,115 @@ /* ARGSUSED */ static int -dsl_null_checkfunc(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_null_checkfunc(void *arg, dmu_tx_t *tx) { return (0); } -dsl_sync_task_group_t * -dsl_sync_task_group_create(dsl_pool_t *dp) -{ - dsl_sync_task_group_t *dstg; - - dstg = kmem_zalloc(sizeof (dsl_sync_task_group_t), KM_SLEEP); - list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t), - offsetof(dsl_sync_task_t, dst_node)); - dstg->dstg_pool = dp; - - return (dstg); -} - -void -dsl_sync_task_create(dsl_sync_task_group_t *dstg, - dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, - void *arg1, void *arg2, int blocks_modified) -{ - dsl_sync_task_t *dst; - - if (checkfunc == NULL) - checkfunc = dsl_null_checkfunc; - dst = kmem_zalloc(sizeof (dsl_sync_task_t), KM_SLEEP); - dst->dst_checkfunc = checkfunc; - dst->dst_syncfunc = syncfunc; - dst->dst_arg1 = arg1; - dst->dst_arg2 = arg2; - list_insert_tail(&dstg->dstg_tasks, dst); - - dstg->dstg_space += blocks_modified << DST_AVG_BLKSHIFT; -} - +/* + * Called from open context to perform a callback in syncing context. Waits + * for the operation to complete. + * + * The checkfunc will be called from open context as a preliminary check + * which can quickly fail. If it succeeds, it will be called again from + * syncing context. The checkfunc should generally be designed to work + * properly in either context, but if necessary it can check + * dmu_tx_is_syncing(tx). + * + * The synctask infrastructure enforces proper locking strategy with respect + * to the dp_config_rwlock -- the lock will always be held when the callbacks + * are called. It will be held for read during the open-context (preliminary) + * call to the checkfunc, and then held for write from syncing context during + * the calls to the check and sync funcs. + * + * A dataset or pool name can be passed as the first argument. Typically, + * the check func will hold, check the return value of the hold, and then + * release the dataset. The sync func will VERIFYO(hold()) the dataset. + * This is safe because no changes can be made between the check and sync funcs, + * and the sync func will only be called if the check func successfully opened + * the dataset. + */ int -dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg) +dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc, + dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified) { + spa_t *spa; dmu_tx_t *tx; - uint64_t txg; - dsl_sync_task_t *dst; + int err; + dsl_sync_task_t dst = { 0 }; + dsl_pool_t *dp; -top: - tx = dmu_tx_create_dd(dstg->dstg_pool->dp_mos_dir); - VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT)); - - txg = dmu_tx_get_txg(tx); - - /* Do a preliminary error check. */ - dstg->dstg_err = 0; - rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER); - for (dst = list_head(&dstg->dstg_tasks); dst; - dst = list_next(&dstg->dstg_tasks, dst)) { -#ifdef ZFS_DEBUG - /* - * Only check half the time, otherwise, the sync-context - * check will almost never fail. - */ - if (spa_get_random(2) == 0) - continue; -#endif - dst->dst_err = - dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx); - if (dst->dst_err) - dstg->dstg_err = dst->dst_err; - } - rw_exit(&dstg->dstg_pool->dp_config_rwlock); + err = spa_open(pool, &spa, FTAG); + if (err != 0) + return (err); + dp = spa_get_dsl(spa); - if (dstg->dstg_err) { +top: + tx = dmu_tx_create_dd(dp->dp_mos_dir); + VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); + + dst.dst_pool = dp; + dst.dst_txg = dmu_tx_get_txg(tx); + dst.dst_space = blocks_modified << DST_AVG_BLKSHIFT; + dst.dst_checkfunc = checkfunc != NULL ? checkfunc : dsl_null_checkfunc; + dst.dst_syncfunc = syncfunc; + dst.dst_arg = arg; + dst.dst_error = 0; + dst.dst_nowaiter = B_FALSE; + + dsl_pool_config_enter(dp, FTAG); + err = dst.dst_checkfunc(arg, tx); + dsl_pool_config_exit(dp, FTAG); + + if (err != 0) { dmu_tx_commit(tx); - return (dstg->dstg_err); + spa_close(spa, FTAG); + return (err); } - /* - * We don't generally have many sync tasks, so pay the price of - * add_tail to get the tasks executed in the right order. - */ - VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks, - dstg, txg)); + VERIFY(txg_list_add_tail(&dp->dp_sync_tasks, &dst, dst.dst_txg)); dmu_tx_commit(tx); - txg_wait_synced(dstg->dstg_pool, txg); + txg_wait_synced(dp, dst.dst_txg); - if (dstg->dstg_err == EAGAIN) { - txg_wait_synced(dstg->dstg_pool, txg + TXG_DEFER_SIZE); + if (dst.dst_error == EAGAIN) { + txg_wait_synced(dp, dst.dst_txg + TXG_DEFER_SIZE); goto top; } - return (dstg->dstg_err); + spa_close(spa, FTAG); + return (dst.dst_error); } void -dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx) +dsl_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg, + int blocks_modified, dmu_tx_t *tx) { - uint64_t txg; - - dstg->dstg_nowaiter = B_TRUE; - txg = dmu_tx_get_txg(tx); - /* - * We don't generally have many sync tasks, so pay the price of - * add_tail to get the tasks executed in the right order. - */ - VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks, - dstg, txg)); -} + dsl_sync_task_t *dst = kmem_zalloc(sizeof (*dst), KM_SLEEP); -void -dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg) -{ - dsl_sync_task_t *dst; + dst->dst_pool = dp; + dst->dst_txg = dmu_tx_get_txg(tx); + dst->dst_space = blocks_modified << DST_AVG_BLKSHIFT; + dst->dst_checkfunc = dsl_null_checkfunc; + dst->dst_syncfunc = syncfunc; + dst->dst_arg = arg; + dst->dst_error = 0; + dst->dst_nowaiter = B_TRUE; - while (dst = list_head(&dstg->dstg_tasks)) { - list_remove(&dstg->dstg_tasks, dst); - kmem_free(dst, sizeof (dsl_sync_task_t)); - } - kmem_free(dstg, sizeof (dsl_sync_task_group_t)); + VERIFY(txg_list_add_tail(&dp->dp_sync_tasks, dst, dst->dst_txg)); } +/* + * Called in syncing context to execute the synctask. + */ void -dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx) +dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx) { - dsl_sync_task_t *dst; - dsl_pool_t *dp = dstg->dstg_pool; + dsl_pool_t *dp = dst->dst_pool; uint64_t quota, used; - ASSERT0(dstg->dstg_err); + ASSERT0(dst->dst_error); /* * Check for sufficient space. We just check against what's @@ -173,63 +154,21 @@ dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx) metaslab_class_get_deferred(spa_normal_class(dp->dp_spa)); used = dp->dp_root_dir->dd_phys->dd_used_bytes; /* MOS space is triple-dittoed, so we multiply by 3. */ - if (dstg->dstg_space > 0 && used + dstg->dstg_space * 3 > quota) { - dstg->dstg_err = ENOSPC; + if (dst->dst_space > 0 && used + dst->dst_space * 3 > quota) { + dst->dst_error = SET_ERROR(ENOSPC); + if (dst->dst_nowaiter) + kmem_free(dst, sizeof (*dst)); return; } /* - * Check for errors by calling checkfuncs. + * Check for errors by calling checkfunc. */ - rw_enter(&dp->dp_config_rwlock, RW_WRITER); - for (dst = list_head(&dstg->dstg_tasks); dst; - dst = list_next(&dstg->dstg_tasks, dst)) { - dst->dst_err = - dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx); - if (dst->dst_err) - dstg->dstg_err = dst->dst_err; - } - - if (dstg->dstg_err == 0) { - /* - * Execute sync tasks. - */ - for (dst = list_head(&dstg->dstg_tasks); dst; - dst = list_next(&dstg->dstg_tasks, dst)) { - dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx); - } - } - rw_exit(&dp->dp_config_rwlock); - - if (dstg->dstg_nowaiter) - dsl_sync_task_group_destroy(dstg); -} - -int -dsl_sync_task_do(dsl_pool_t *dp, - dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, - void *arg1, void *arg2, int blocks_modified) -{ - dsl_sync_task_group_t *dstg; - int err; - - ASSERT(spa_writeable(dp->dp_spa)); - - dstg = dsl_sync_task_group_create(dp); - dsl_sync_task_create(dstg, checkfunc, syncfunc, - arg1, arg2, blocks_modified); - err = dsl_sync_task_group_wait(dstg); - dsl_sync_task_group_destroy(dstg); - return (err); -} - -void -dsl_sync_task_do_nowait(dsl_pool_t *dp, - dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, - void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx) -{ - dsl_sync_task_group_t *dstg = dsl_sync_task_group_create(dp); - dsl_sync_task_create(dstg, checkfunc, syncfunc, - arg1, arg2, blocks_modified); - dsl_sync_task_group_nowait(dstg, tx); + rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); + dst->dst_error = dst->dst_checkfunc(dst->dst_arg, tx); + if (dst->dst_error == 0) + dst->dst_syncfunc(dst->dst_arg, tx); + rrw_exit(&dp->dp_config_rwlock, FTAG); + if (dst->dst_nowaiter) + kmem_free(dst, sizeof (*dst)); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c new file mode 100644 index 000000000..fa9d93708 --- /dev/null +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c @@ -0,0 +1,536 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct dsl_dataset_user_hold_arg { + nvlist_t *dduha_holds; + nvlist_t *dduha_errlist; + minor_t dduha_minor; +} dsl_dataset_user_hold_arg_t; + +/* + * If you add new checks here, you may need to add additional checks to the + * "temporary" case in snapshot_check() in dmu_objset.c. + */ +int +dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag, + boolean_t temphold, dmu_tx_t *tx) +{ + dsl_pool_t *dp = dmu_tx_pool(tx); + objset_t *mos = dp->dp_meta_objset; + int error = 0; + + if (strlen(htag) > MAXNAMELEN) + return (E2BIG); + /* Tempholds have a more restricted length */ + if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) + return (E2BIG); + + /* tags must be unique (if ds already exists) */ + if (ds != NULL) { + mutex_enter(&ds->ds_lock); + if (ds->ds_phys->ds_userrefs_obj != 0) { + uint64_t value; + error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, + htag, 8, 1, &value); + if (error == 0) + error = SET_ERROR(EEXIST); + else if (error == ENOENT) + error = 0; + } + mutex_exit(&ds->ds_lock); + } + + return (error); +} + +static int +dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_user_hold_arg_t *dduha = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + nvpair_t *pair; + int rv = 0; + + if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) + return (SET_ERROR(ENOTSUP)); + + for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) { + int error = 0; + dsl_dataset_t *ds; + char *htag; + + /* must be a snapshot */ + if (strchr(nvpair_name(pair), '@') == NULL) + error = SET_ERROR(EINVAL); + + if (error == 0) + error = nvpair_value_string(pair, &htag); + if (error == 0) { + error = dsl_dataset_hold(dp, + nvpair_name(pair), FTAG, &ds); + } + if (error == 0) { + error = dsl_dataset_user_hold_check_one(ds, htag, + dduha->dduha_minor != 0, tx); + dsl_dataset_rele(ds, FTAG); + } + + if (error != 0) { + rv = error; + fnvlist_add_int32(dduha->dduha_errlist, + nvpair_name(pair), error); + } + } + return (rv); +} + +void +dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag, + minor_t minor, uint64_t now, dmu_tx_t *tx) +{ + dsl_pool_t *dp = ds->ds_dir->dd_pool; + objset_t *mos = dp->dp_meta_objset; + uint64_t zapobj; + + mutex_enter(&ds->ds_lock); + if (ds->ds_phys->ds_userrefs_obj == 0) { + /* + * This is the first user hold for this dataset. Create + * the userrefs zap object. + */ + dmu_buf_will_dirty(ds->ds_dbuf, tx); + zapobj = ds->ds_phys->ds_userrefs_obj = + zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx); + } else { + zapobj = ds->ds_phys->ds_userrefs_obj; + } + ds->ds_userrefs++; + mutex_exit(&ds->ds_lock); + + VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx)); + + if (minor != 0) { + VERIFY0(dsl_pool_user_hold(dp, ds->ds_object, + htag, now, tx)); + dsl_register_onexit_hold_cleanup(ds, htag, minor); + } + + spa_history_log_internal_ds(ds, "hold", tx, + "tag=%s temp=%d refs=%llu", + htag, minor != 0, ds->ds_userrefs); +} + +static void +dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_user_hold_arg_t *dduha = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + nvpair_t *pair; + uint64_t now = gethrestime_sec(); + + for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) { + dsl_dataset_t *ds; + VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds)); + dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair), + dduha->dduha_minor, now, tx); + dsl_dataset_rele(ds, FTAG); + } +} + +/* + * holds is nvl of snapname -> holdname + * errlist will be filled in with snapname -> error + * if cleanup_minor is not 0, the holds will be temporary, cleaned up + * when the process exits. + * + * if any fails, all will fail. + */ +int +dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist) +{ + dsl_dataset_user_hold_arg_t dduha; + nvpair_t *pair; + + pair = nvlist_next_nvpair(holds, NULL); + if (pair == NULL) + return (0); + + dduha.dduha_holds = holds; + dduha.dduha_errlist = errlist; + dduha.dduha_minor = cleanup_minor; + + return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check, + dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds))); +} + +typedef struct dsl_dataset_user_release_arg { + nvlist_t *ddura_holds; + nvlist_t *ddura_todelete; + nvlist_t *ddura_errlist; +} dsl_dataset_user_release_arg_t; + +static int +dsl_dataset_user_release_check_one(dsl_dataset_t *ds, + nvlist_t *holds, boolean_t *todelete) +{ + uint64_t zapobj; + nvpair_t *pair; + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + int error; + int numholds = 0; + + *todelete = B_FALSE; + + if (!dsl_dataset_is_snapshot(ds)) + return (SET_ERROR(EINVAL)); + + zapobj = ds->ds_phys->ds_userrefs_obj; + if (zapobj == 0) + return (SET_ERROR(ESRCH)); + + for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(holds, pair)) { + /* Make sure the hold exists */ + uint64_t tmp; + error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp); + if (error == ENOENT) + error = SET_ERROR(ESRCH); + if (error != 0) + return (error); + numholds++; + } + + if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 && + ds->ds_userrefs == numholds) { + /* we need to destroy the snapshot as well */ + + if (dsl_dataset_long_held(ds)) + return (SET_ERROR(EBUSY)); + *todelete = B_TRUE; + } + return (0); +} + +static int +dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_user_release_arg_t *ddura = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + nvpair_t *pair; + int rv = 0; + + if (!dmu_tx_is_syncing(tx)) + return (0); + + for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) { + const char *name = nvpair_name(pair); + int error; + dsl_dataset_t *ds; + nvlist_t *holds; + + error = nvpair_value_nvlist(pair, &holds); + if (error != 0) + return (SET_ERROR(EINVAL)); + + error = dsl_dataset_hold(dp, name, FTAG, &ds); + if (error == 0) { + boolean_t deleteme; + error = dsl_dataset_user_release_check_one(ds, + holds, &deleteme); + if (error == 0 && deleteme) { + fnvlist_add_boolean(ddura->ddura_todelete, + name); + } + dsl_dataset_rele(ds, FTAG); + } + if (error != 0) { + if (ddura->ddura_errlist != NULL) { + fnvlist_add_int32(ddura->ddura_errlist, + name, error); + } + rv = error; + } + } + return (rv); +} + +static void +dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds, + dmu_tx_t *tx) +{ + dsl_pool_t *dp = ds->ds_dir->dd_pool; + objset_t *mos = dp->dp_meta_objset; + uint64_t zapobj; + int error; + nvpair_t *pair; + + for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(holds, pair)) { + ds->ds_userrefs--; + error = dsl_pool_user_release(dp, ds->ds_object, + nvpair_name(pair), tx); + VERIFY(error == 0 || error == ENOENT); + zapobj = ds->ds_phys->ds_userrefs_obj; + VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx)); + + spa_history_log_internal_ds(ds, "release", tx, + "tag=%s refs=%lld", nvpair_name(pair), + (longlong_t)ds->ds_userrefs); + } +} + +static void +dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_user_release_arg_t *ddura = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + nvpair_t *pair; + + for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) { + dsl_dataset_t *ds; + + VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds)); + dsl_dataset_user_release_sync_one(ds, + fnvpair_value_nvlist(pair), tx); + if (nvlist_exists(ddura->ddura_todelete, + nvpair_name(pair))) { + ASSERT(ds->ds_userrefs == 0 && + ds->ds_phys->ds_num_children == 1 && + DS_IS_DEFER_DESTROY(ds)); + dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx); + } + dsl_dataset_rele(ds, FTAG); + } +} + +/* + * holds is nvl of snapname -> { holdname, ... } + * errlist will be filled in with snapname -> error + * + * if any fails, all will fail. + */ +int +dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist) +{ + dsl_dataset_user_release_arg_t ddura; + nvpair_t *pair; + int error; + + pair = nvlist_next_nvpair(holds, NULL); + if (pair == NULL) + return (0); + + ddura.ddura_holds = holds; + ddura.ddura_errlist = errlist; + ddura.ddura_todelete = fnvlist_alloc(); + + error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check, + dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds)); + fnvlist_free(ddura.ddura_todelete); + return (error); +} + +typedef struct dsl_dataset_user_release_tmp_arg { + uint64_t ddurta_dsobj; + nvlist_t *ddurta_holds; + boolean_t ddurta_deleteme; +} dsl_dataset_user_release_tmp_arg_t; + +static int +dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_user_release_tmp_arg_t *ddurta = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + int error; + + if (!dmu_tx_is_syncing(tx)) + return (0); + + error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds); + if (error) + return (error); + + error = dsl_dataset_user_release_check_one(ds, + ddurta->ddurta_holds, &ddurta->ddurta_deleteme); + dsl_dataset_rele(ds, FTAG); + return (error); +} + +static void +dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_user_release_tmp_arg_t *ddurta = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + + VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds)); + dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx); + if (ddurta->ddurta_deleteme) { + ASSERT(ds->ds_userrefs == 0 && + ds->ds_phys->ds_num_children == 1 && + DS_IS_DEFER_DESTROY(ds)); + dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx); + } + dsl_dataset_rele(ds, FTAG); +} + +/* + * Called at spa_load time to release a stale temporary user hold. + * Also called by the onexit code. + */ +void +dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag) +{ + dsl_dataset_user_release_tmp_arg_t ddurta; + dsl_dataset_t *ds; + int error; + +#ifdef _KERNEL + /* Make sure it is not mounted. */ + dsl_pool_config_enter(dp, FTAG); + error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); + if (error == 0) { + char name[MAXNAMELEN]; + dsl_dataset_name(ds, name); + dsl_dataset_rele(ds, FTAG); + dsl_pool_config_exit(dp, FTAG); + zfs_unmount_snap(name); + } else { + dsl_pool_config_exit(dp, FTAG); + } +#endif + + ddurta.ddurta_dsobj = dsobj; + ddurta.ddurta_holds = fnvlist_alloc(); + fnvlist_add_boolean(ddurta.ddurta_holds, htag); + + (void) dsl_sync_task(spa_name(dp->dp_spa), + dsl_dataset_user_release_tmp_check, + dsl_dataset_user_release_tmp_sync, &ddurta, 1); + fnvlist_free(ddurta.ddurta_holds); +} + +typedef struct zfs_hold_cleanup_arg { + char zhca_spaname[MAXNAMELEN]; + uint64_t zhca_spa_load_guid; + uint64_t zhca_dsobj; + char zhca_htag[MAXNAMELEN]; +} zfs_hold_cleanup_arg_t; + +static void +dsl_dataset_user_release_onexit(void *arg) +{ + zfs_hold_cleanup_arg_t *ca = arg; + spa_t *spa; + int error; + + error = spa_open(ca->zhca_spaname, &spa, FTAG); + if (error != 0) { + zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s " + "because pool is no longer loaded", + ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag); + return; + } + if (spa_load_guid(spa) != ca->zhca_spa_load_guid) { + zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s " + "because pool is no longer loaded (guid doesn't match)", + ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag); + spa_close(spa, FTAG); + return; + } + + dsl_dataset_user_release_tmp(spa_get_dsl(spa), + ca->zhca_dsobj, ca->zhca_htag); + kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t)); + spa_close(spa, FTAG); +} + +void +dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, + minor_t minor) +{ + zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP); + spa_t *spa = dsl_dataset_get_spa(ds); + (void) strlcpy(ca->zhca_spaname, spa_name(spa), + sizeof (ca->zhca_spaname)); + ca->zhca_spa_load_guid = spa_load_guid(spa); + ca->zhca_dsobj = ds->ds_object; + (void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag)); + VERIFY0(zfs_onexit_add_cb(minor, + dsl_dataset_user_release_onexit, ca, NULL)); +} + +int +dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl) +{ + dsl_pool_t *dp; + dsl_dataset_t *ds; + int err; + + err = dsl_pool_hold(dsname, FTAG, &dp); + if (err != 0) + return (err); + err = dsl_dataset_hold(dp, dsname, FTAG, &ds); + if (err != 0) { + dsl_pool_rele(dp, FTAG); + return (err); + } + + if (ds->ds_phys->ds_userrefs_obj != 0) { + zap_attribute_t *za; + zap_cursor_t zc; + + za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset, + ds->ds_phys->ds_userrefs_obj); + zap_cursor_retrieve(&zc, za) == 0; + zap_cursor_advance(&zc)) { + fnvlist_add_uint64(nvl, za->za_name, + za->za_first_integer); + } + zap_cursor_fini(&zc); + kmem_free(za, sizeof (zap_attribute_t)); + } + dsl_dataset_rele(ds, FTAG); + dsl_pool_rele(dp, FTAG); + return (0); +} diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c index edc5cfa53..c19d792e2 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. */ @@ -1518,7 +1518,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, * For testing, make some blocks above a certain size be gang blocks. */ if (psize >= metaslab_gang_bang && (ddi_get_lbolt() & 3) == 0) - return (ENOSPC); + return (SET_ERROR(ENOSPC)); /* * Start at the rotor and loop through all mgs until we find something. @@ -1683,7 +1683,7 @@ next: bzero(&dva[d], sizeof (dva_t)); - return (ENOSPC); + return (SET_ERROR(ENOSPC)); } /* @@ -1752,7 +1752,7 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg) if ((vd = vdev_lookup_top(spa, vdev)) == NULL || (offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) - return (ENXIO); + return (SET_ERROR(ENXIO)); msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; @@ -1765,7 +1765,7 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg) error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY); if (error == 0 && !space_map_contains(msp->ms_map, offset, size)) - error = ENOENT; + error = SET_ERROR(ENOENT); if (error || txg == 0) { /* txg == 0 indicates dry run */ mutex_exit(&msp->ms_lock); @@ -1800,7 +1800,7 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp, if (mc->mc_rotor == NULL) { /* no vdevs in this class */ spa_config_exit(spa, SCL_ALLOC, FTAG); - return (ENOSPC); + return (SET_ERROR(ENOSPC)); } ASSERT(ndvas > 0 && ndvas <= spa_max_replication(spa)); @@ -1876,3 +1876,41 @@ metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg) return (error); } + +static void +checkmap(space_map_t *sm, uint64_t off, uint64_t size) +{ + space_seg_t *ss; + avl_index_t where; + + mutex_enter(sm->sm_lock); + ss = space_map_find(sm, off, size, &where); + if (ss != NULL) + panic("freeing free block; ss=%p", (void *)ss); + mutex_exit(sm->sm_lock); +} + +void +metaslab_check_free(spa_t *spa, const blkptr_t *bp) +{ + if ((zfs_flags & ZFS_DEBUG_ZIO_FREE) == 0) + return; + + spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); + for (int i = 0; i < BP_GET_NDVAS(bp); i++) { + uint64_t vdid = DVA_GET_VDEV(&bp->blk_dva[i]); + vdev_t *vd = vdev_lookup_top(spa, vdid); + uint64_t off = DVA_GET_OFFSET(&bp->blk_dva[i]); + uint64_t size = DVA_GET_ASIZE(&bp->blk_dva[i]); + metaslab_t *ms = vd->vdev_ms[off >> vd->vdev_ms_shift]; + + if (ms->ms_map->sm_loaded) + checkmap(ms->ms_map, off, size); + + for (int j = 0; j < TXG_SIZE; j++) + checkmap(ms->ms_freemap[j], off, size); + for (int j = 0; j < TXG_DEFER_SIZE; j++) + checkmap(ms->ms_defermap[j], off, size); + } + spa_config_exit(spa, SCL_VDEV, FTAG); +} diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/refcount.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/refcount.c index 860c8c6ce..7e96e558c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/refcount.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/refcount.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -32,7 +33,7 @@ int reference_tracking_enable = FALSE; /* runs out of memory too easily */ #else int reference_tracking_enable = TRUE; #endif -int reference_history = 4; /* tunable */ +int reference_history = 3; /* tunable */ static kmem_cache_t *reference_cache; static kmem_cache_t *reference_history_cache; @@ -64,6 +65,14 @@ refcount_create(refcount_t *rc) offsetof(reference_t, ref_link)); rc->rc_count = 0; rc->rc_removed_count = 0; + rc->rc_tracked = reference_tracking_enable; +} + +void +refcount_create_untracked(refcount_t *rc) +{ + refcount_create(rc); + rc->rc_tracked = B_FALSE; } void @@ -96,14 +105,12 @@ refcount_destroy(refcount_t *rc) int refcount_is_zero(refcount_t *rc) { - ASSERT(rc->rc_count >= 0); return (rc->rc_count == 0); } int64_t refcount_count(refcount_t *rc) { - ASSERT(rc->rc_count >= 0); return (rc->rc_count); } @@ -113,14 +120,14 @@ refcount_add_many(refcount_t *rc, uint64_t number, void *holder) reference_t *ref = NULL; int64_t count; - if (reference_tracking_enable) { + if (rc->rc_tracked) { ref = kmem_cache_alloc(reference_cache, KM_SLEEP); ref->ref_holder = holder; ref->ref_number = number; } mutex_enter(&rc->rc_mtx); ASSERT(rc->rc_count >= 0); - if (reference_tracking_enable) + if (rc->rc_tracked) list_insert_head(&rc->rc_list, ref); rc->rc_count += number; count = rc->rc_count; @@ -144,7 +151,7 @@ refcount_remove_many(refcount_t *rc, uint64_t number, void *holder) mutex_enter(&rc->rc_mtx); ASSERT(rc->rc_count >= number); - if (!reference_tracking_enable) { + if (!rc->rc_tracked) { rc->rc_count -= number; count = rc->rc_count; mutex_exit(&rc->rc_mtx); @@ -161,7 +168,7 @@ refcount_remove_many(refcount_t *rc, uint64_t number, void *holder) KM_SLEEP); list_insert_head(&rc->rc_removed, ref); rc->rc_removed_count++; - if (rc->rc_removed_count >= reference_history) { + if (rc->rc_removed_count > reference_history) { ref = list_tail(&rc->rc_removed); list_remove(&rc->rc_removed, ref); kmem_cache_free(reference_history_cache, diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c index 4cef53f95..8e80166c7 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c @@ -22,6 +22,9 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ #include #include @@ -72,8 +75,9 @@ uint_t rrw_tsd_key; typedef struct rrw_node { - struct rrw_node *rn_next; - rrwlock_t *rn_rrl; + struct rrw_node *rn_next; + rrwlock_t *rn_rrl; + void *rn_tag; } rrw_node_t; static rrw_node_t * @@ -95,13 +99,14 @@ rrn_find(rrwlock_t *rrl) * Add a node to the head of the singly linked list. */ static void -rrn_add(rrwlock_t *rrl) +rrn_add(rrwlock_t *rrl, void *tag) { rrw_node_t *rn; rn = kmem_alloc(sizeof (*rn), KM_SLEEP); rn->rn_rrl = rrl; rn->rn_next = tsd_get(rrw_tsd_key); + rn->rn_tag = tag; VERIFY(tsd_set(rrw_tsd_key, rn) == 0); } @@ -110,7 +115,7 @@ rrn_add(rrwlock_t *rrl) * thread's list and return TRUE; otherwise return FALSE. */ static boolean_t -rrn_find_and_remove(rrwlock_t *rrl) +rrn_find_and_remove(rrwlock_t *rrl, void *tag) { rrw_node_t *rn; rrw_node_t *prev = NULL; @@ -119,7 +124,7 @@ rrn_find_and_remove(rrwlock_t *rrl) return (B_FALSE); for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) { - if (rn->rn_rrl == rrl) { + if (rn->rn_rrl == rrl && rn->rn_tag == tag) { if (prev) prev->rn_next = rn->rn_next; else @@ -133,7 +138,7 @@ rrn_find_and_remove(rrwlock_t *rrl) } void -rrw_init(rrwlock_t *rrl) +rrw_init(rrwlock_t *rrl, boolean_t track_all) { mutex_init(&rrl->rr_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&rrl->rr_cv, NULL, CV_DEFAULT, NULL); @@ -141,6 +146,7 @@ rrw_init(rrwlock_t *rrl) refcount_create(&rrl->rr_anon_rcount); refcount_create(&rrl->rr_linked_rcount); rrl->rr_writer_wanted = B_FALSE; + rrl->rr_track_all = track_all; } void @@ -153,12 +159,13 @@ rrw_destroy(rrwlock_t *rrl) refcount_destroy(&rrl->rr_linked_rcount); } -static void +void rrw_enter_read(rrwlock_t *rrl, void *tag) { mutex_enter(&rrl->rr_lock); #if !defined(DEBUG) && defined(_KERNEL) - if (!rrl->rr_writer && !rrl->rr_writer_wanted) { + if (rrl->rr_writer == NULL && !rrl->rr_writer_wanted && + !rrl->rr_track_all) { rrl->rr_anon_rcount.rc_count++; mutex_exit(&rrl->rr_lock); return; @@ -168,14 +175,14 @@ rrw_enter_read(rrwlock_t *rrl, void *tag) ASSERT(rrl->rr_writer != curthread); ASSERT(refcount_count(&rrl->rr_anon_rcount) >= 0); - while (rrl->rr_writer || (rrl->rr_writer_wanted && + while (rrl->rr_writer != NULL || (rrl->rr_writer_wanted && refcount_is_zero(&rrl->rr_anon_rcount) && rrn_find(rrl) == NULL)) cv_wait(&rrl->rr_cv, &rrl->rr_lock); - if (rrl->rr_writer_wanted) { + if (rrl->rr_writer_wanted || rrl->rr_track_all) { /* may or may not be a re-entrant enter */ - rrn_add(rrl); + rrn_add(rrl, tag); (void) refcount_add(&rrl->rr_linked_rcount, tag); } else { (void) refcount_add(&rrl->rr_anon_rcount, tag); @@ -184,7 +191,7 @@ rrw_enter_read(rrwlock_t *rrl, void *tag) mutex_exit(&rrl->rr_lock); } -static void +void rrw_enter_write(rrwlock_t *rrl) { mutex_enter(&rrl->rr_lock); @@ -230,10 +237,12 @@ rrw_exit(rrwlock_t *rrl, void *tag) if (rrl->rr_writer == NULL) { int64_t count; - if (rrn_find_and_remove(rrl)) + if (rrn_find_and_remove(rrl, tag)) { count = refcount_remove(&rrl->rr_linked_rcount, tag); - else + } else { + ASSERT(!rrl->rr_track_all); count = refcount_remove(&rrl->rr_anon_rcount, tag); + } if (count == 0) cv_broadcast(&rrl->rr_cv); } else { @@ -246,6 +255,11 @@ rrw_exit(rrwlock_t *rrl, void *tag) mutex_exit(&rrl->rr_lock); } +/* + * If the lock was created with track_all, rrw_held(RW_READER) will return + * B_TRUE iff the current thread has the lock for reader. Otherwise it may + * return B_TRUE if any thread has the lock for reader. + */ boolean_t rrw_held(rrwlock_t *rrl, krw_t rw) { @@ -256,9 +270,19 @@ rrw_held(rrwlock_t *rrl, krw_t rw) held = (rrl->rr_writer == curthread); } else { held = (!refcount_is_zero(&rrl->rr_anon_rcount) || - !refcount_is_zero(&rrl->rr_linked_rcount)); + rrn_find(rrl) != NULL); } mutex_exit(&rrl->rr_lock); return (held); } + +void +rrw_tsd_destroy(void *arg) +{ + rrw_node_t *rn = arg; + if (rn != NULL) { + panic("thread %p terminating with rrw lock %p held", + (void *)curthread, (void *)rn->rn_rrl); + } +} diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c index 1406d4d71..e96044a4e 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Portions Copyright 2011 iXsystems, Inc - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -373,7 +373,7 @@ sa_attr_op(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count, switch (data_op) { case SA_LOOKUP: if (bulk[i].sa_addr == NULL) - return (ENOENT); + return (SET_ERROR(ENOENT)); if (bulk[i].sa_data) { SA_COPY_DATA(bulk[i].sa_data_func, bulk[i].sa_addr, bulk[i].sa_data, @@ -503,7 +503,7 @@ sa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx) blocksize = SPA_MINBLOCKSIZE; } else if (size > SPA_MAXBLOCKSIZE) { ASSERT(0); - return (EFBIG); + return (SET_ERROR(EFBIG)); } else { blocksize = P2ROUNDUP_TYPED(size, SPA_MINBLOCKSIZE, uint32_t); } @@ -677,7 +677,7 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count, SA_BONUS, &i, &used, &spilling); if (used > SPA_MAXBLOCKSIZE) - return (EFBIG); + return (SET_ERROR(EFBIG)); VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ? MIN(DN_MAX_BONUSLEN - sizeof (blkptr_t), used + hdrsize) : @@ -701,7 +701,7 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count, &spill_used, &dummy); if (spill_used > SPA_MAXBLOCKSIZE) - return (EFBIG); + return (SET_ERROR(EFBIG)); buf_space = hdl->sa_spill->db_size - spillhdrsize; if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) > @@ -861,7 +861,7 @@ sa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count) */ if (error || (error == 0 && sa_attr_count == 0)) { if (error == 0) - error = EINVAL; + error = SET_ERROR(EINVAL); goto bail; } sa_reg_count = sa_attr_count; @@ -892,7 +892,7 @@ sa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count) error = zap_lookup(os, sa->sa_reg_attr_obj, reg_attrs[i].sa_name, 8, 1, &attr_value); else - error = ENOENT; + error = SET_ERROR(ENOENT); switch (error) { case ENOENT: sa->sa_user_table[i] = (sa_attr_type_t)sa_attr_count; @@ -1004,10 +1004,10 @@ sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count, sa_attr_type_t *tb; int error; - mutex_enter(&os->os_lock); + mutex_enter(&os->os_user_ptr_lock); if (os->os_sa) { mutex_enter(&os->os_sa->sa_lock); - mutex_exit(&os->os_lock); + mutex_exit(&os->os_user_ptr_lock); tb = os->os_sa->sa_user_table; mutex_exit(&os->os_sa->sa_lock); *user_table = tb; @@ -1020,7 +1020,7 @@ sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count, os->os_sa = sa; mutex_enter(&sa->sa_lock); - mutex_exit(&os->os_lock); + mutex_exit(&os->os_user_ptr_lock); avl_create(&sa->sa_layout_num_tree, layout_num_compare, sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node)); avl_create(&sa->sa_layout_hash_tree, layout_hash_compare, @@ -1051,7 +1051,7 @@ sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count, */ if (error || (error == 0 && layout_count == 0)) { if (error == 0) - error = EINVAL; + error = SET_ERROR(EINVAL); goto fail; } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c index d7c496cb4..0524c798c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c @@ -21,7 +21,8 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright 2013 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 Martin Matuska . All rights reserved. */ @@ -61,6 +62,9 @@ #include #include #include +#include +#include +#include #include #include @@ -119,10 +123,8 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, }; -static dsl_syncfunc_t spa_sync_version; -static dsl_syncfunc_t spa_sync_props; -static dsl_checkfunc_t spa_change_guid_check; -static dsl_syncfunc_t spa_change_guid_sync; +static void spa_sync_version(void *arg, dmu_tx_t *tx); +static void spa_sync_props(void *arg, dmu_tx_t *tx); static boolean_t spa_has_active_shared_spare(spa_t *spa); static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config, spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig, @@ -323,10 +325,10 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp) dsl_dataset_t *ds = NULL; dp = spa_get_dsl(spa); - rw_enter(&dp->dp_config_rwlock, RW_READER); + dsl_pool_config_enter(dp, FTAG); if (err = dsl_dataset_hold_obj(dp, za.za_first_integer, FTAG, &ds)) { - rw_exit(&dp->dp_config_rwlock); + dsl_pool_config_exit(dp, FTAG); break; } @@ -335,7 +337,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp) KM_SLEEP); dsl_dataset_name(ds, strval); dsl_dataset_rele(ds, FTAG); - rw_exit(&dp->dp_config_rwlock); + dsl_pool_config_exit(dp, FTAG); } else { strval = NULL; intval = za.za_first_integer; @@ -400,7 +402,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) switch (prop) { case ZPROP_INVAL: if (!zpool_prop_feature(propname)) { - error = EINVAL; + error = SET_ERROR(EINVAL); break; } @@ -408,23 +410,23 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) * Sanitize the input. */ if (nvpair_type(elem) != DATA_TYPE_UINT64) { - error = EINVAL; + error = SET_ERROR(EINVAL); break; } if (nvpair_value_uint64(elem, &intval) != 0) { - error = EINVAL; + error = SET_ERROR(EINVAL); break; } if (intval != 0) { - error = EINVAL; + error = SET_ERROR(EINVAL); break; } fname = strchr(propname, '@') + 1; if (zfeature_lookup_name(fname, NULL) != 0) { - error = EINVAL; + error = SET_ERROR(EINVAL); break; } @@ -437,7 +439,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) (intval < spa_version(spa) || intval > SPA_VERSION_BEFORE_FEATURES || has_feature)) - error = EINVAL; + error = SET_ERROR(EINVAL); break; case ZPOOL_PROP_DELEGATION: @@ -446,7 +448,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) case ZPOOL_PROP_AUTOEXPAND: error = nvpair_value_uint64(elem, &intval); if (!error && intval > 1) - error = EINVAL; + error = SET_ERROR(EINVAL); break; case ZPOOL_PROP_BOOTFS: @@ -456,7 +458,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) * the bootfs property cannot be set. */ if (spa_version(spa) < SPA_VERSION_BOOTFS) { - error = ENOTSUP; + error = SET_ERROR(ENOTSUP); break; } @@ -464,7 +466,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) * Make sure the vdev config is bootable */ if (!vdev_is_bootable(spa->spa_root_vdev)) { - error = ENOTSUP; + error = SET_ERROR(ENOTSUP); break; } @@ -488,12 +490,13 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) /* Must be ZPL and not gzip compressed. */ if (dmu_objset_type(os) != DMU_OST_ZFS) { - error = ENOTSUP; - } else if ((error = dsl_prop_get_integer(strval, + error = SET_ERROR(ENOTSUP); + } else if ((error = + dsl_prop_get_int_ds(dmu_objset_ds(os), zfs_prop_to_name(ZFS_PROP_COMPRESSION), - &compress, NULL)) == 0 && + &compress)) == 0 && !BOOTFS_COMPRESS_VALID(compress)) { - error = ENOTSUP; + error = SET_ERROR(ENOTSUP); } else { objnum = dmu_objset_id(os); } @@ -505,7 +508,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) error = nvpair_value_uint64(elem, &intval); if (!error && (intval < ZIO_FAILURE_MODE_WAIT || intval > ZIO_FAILURE_MODE_PANIC)) - error = EINVAL; + error = SET_ERROR(EINVAL); /* * This is a special case which only occurs when @@ -519,7 +522,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) */ if (!error && spa_suspended(spa)) { spa->spa_failmode = intval; - error = EIO; + error = SET_ERROR(EIO); } break; @@ -534,7 +537,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) break; if (strval[0] != '/') { - error = EINVAL; + error = SET_ERROR(EINVAL); break; } @@ -543,7 +546,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || strcmp(slash, "/..") == 0) - error = EINVAL; + error = SET_ERROR(EINVAL); break; case ZPOOL_PROP_COMMENT: @@ -557,7 +560,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) * there is an easy-to-use kernel isprint(). */ if (*check >= 0x7f) { - error = EINVAL; + error = SET_ERROR(EINVAL); break; } check++; @@ -568,12 +571,12 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) case ZPOOL_PROP_DEDUPDITTO: if (spa_version(spa) < SPA_VERSION_DEDUP) - error = ENOTSUP; + error = SET_ERROR(ENOTSUP); else error = nvpair_value_uint64(elem, &intval); if (error == 0 && intval != 0 && intval < ZIO_DEDUPDITTO_MIN) - error = EINVAL; + error = SET_ERROR(EINVAL); break; } @@ -658,8 +661,8 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp) * read object, the features for write object, or the * feature descriptions object. */ - error = dsl_sync_task_do(spa_get_dsl(spa), NULL, - spa_sync_version, spa, &ver, 6); + error = dsl_sync_task(spa->spa_name, NULL, + spa_sync_version, &ver, 6); if (error) return (error); continue; @@ -670,8 +673,8 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp) } if (need_sync) { - return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, - spa, nvp, 6)); + return (dsl_sync_task(spa->spa_name, NULL, spa_sync_props, + nvp, 6)); } return (0); @@ -693,10 +696,10 @@ spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) /*ARGSUSED*/ static int -spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx) +spa_change_guid_check(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; - uint64_t *newguid = arg2; + uint64_t *newguid = arg; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; vdev_t *rvd = spa->spa_root_vdev; uint64_t vdev_state; @@ -705,7 +708,7 @@ spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx) spa_config_exit(spa, SCL_STATE, FTAG); if (vdev_state != VDEV_STATE_HEALTHY) - return (ENXIO); + return (SET_ERROR(ENXIO)); ASSERT3U(spa_guid(spa), !=, *newguid); @@ -713,10 +716,10 @@ spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx) } static void -spa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx) +spa_change_guid_sync(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; - uint64_t *newguid = arg2; + uint64_t *newguid = arg; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; uint64_t oldguid; vdev_t *rvd = spa->spa_root_vdev; @@ -728,17 +731,8 @@ spa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx) vdev_config_dirty(rvd); spa_config_exit(spa, SCL_STATE, FTAG); -#ifdef __FreeBSD__ - /* - * TODO: until recent illumos logging changes are merged - * log reguid as pool property change - */ - spa_history_log_internal(LOG_POOL_PROPSET, spa, tx, - "guid change old=%llu new=%llu", oldguid, *newguid); -#else - spa_history_log_internal(spa, "guid change", tx, "old=%lld new=%lld", + spa_history_log_internal(spa, "guid change", tx, "old=%llu new=%llu", oldguid, *newguid); -#endif } /* @@ -759,8 +753,8 @@ spa_change_guid(spa_t *spa) mutex_enter(&spa_namespace_lock); guid = spa_generate_guid(NULL); - error = dsl_sync_task_do(spa_get_dsl(spa), spa_change_guid_check, - spa_change_guid_sync, spa, &guid, 5); + error = dsl_sync_task(spa->spa_name, spa_change_guid_check, + spa_change_guid_sync, &guid, 5); if (error == 0) { spa_config_sync(spa, B_FALSE, B_TRUE); @@ -1122,7 +1116,7 @@ spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, if (error) { vdev_free(*vdp); *vdp = NULL; - return (EINVAL); + return (SET_ERROR(EINVAL)); } for (int c = 0; c < children; c++) { @@ -1515,7 +1509,8 @@ spa_check_removed(vdev_t *vd) for (int c = 0; c < vd->vdev_children; c++) spa_check_removed(vd->vdev_child[c]); - if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { + if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd) && + !vd->vdev_ishole) { zfs_post_autoreplace(vd->vdev_spa, vd); spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); } @@ -1646,21 +1641,22 @@ spa_config_valid(spa_t *spa, nvlist_t *config) /* * Check for missing log devices */ -static int +static boolean_t spa_check_logs(spa_t *spa) { + boolean_t rv = B_FALSE; + switch (spa->spa_log_state) { case SPA_LOG_MISSING: /* need to recheck in case slog has been restored */ case SPA_LOG_UNKNOWN: - if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, - DS_FIND_CHILDREN)) { + rv = (dmu_objset_find(spa->spa_name, zil_check_log_chain, + NULL, DS_FIND_CHILDREN) != 0); + if (rv) spa_set_log_state(spa, SPA_LOG_MISSING); - return (1); - } break; } - return (0); + return (rv); } static boolean_t @@ -1706,11 +1702,11 @@ spa_activate_log(spa_t *spa) int spa_offline_log(spa_t *spa) { - int error = 0; - - if ((error = dmu_objset_find(spa_name(spa), zil_vdev_offline, - NULL, DS_FIND_CHILDREN)) == 0) { + int error; + error = dmu_objset_find(spa_name(spa), zil_vdev_offline, + NULL, DS_FIND_CHILDREN); + if (error == 0) { /* * We successfully offlined the log device, sync out the * current txg so that the "stubby" block can be removed @@ -1831,7 +1827,7 @@ spa_load_verify(spa_t *spa) if (error) { if (error != ENXIO && error != EIO) - error = EIO; + error = SET_ERROR(EIO); return (error); } @@ -1959,7 +1955,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type, nvlist_t *nvl; if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) - return (EINVAL); + return (SET_ERROR(EINVAL)); ASSERT(spa->spa_comment == NULL); if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) @@ -1978,7 +1974,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type, if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && spa_guid_exists(pool_guid, 0)) { - error = EEXIST; + error = SET_ERROR(EEXIST); } else { spa->spa_config_guid = pool_guid; @@ -2044,7 +2040,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, spa->spa_load_state = state; if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot)) - return (EINVAL); + return (SET_ERROR(EINVAL)); parse = (type == SPA_IMPORT_EXISTING ? VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT); @@ -2104,7 +2100,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, return (error); if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) - return (ENXIO); + return (SET_ERROR(ENXIO)); } /* @@ -2339,7 +2335,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, "See: http://illumos.org/msg/ZFS-8000-EY", spa_name(spa), hostname, (unsigned long)hostid); - return (EBADF); + return (SET_ERROR(EBADF)); } } if (nvlist_lookup_nvlist(spa->spa_config, @@ -2528,7 +2524,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, * more toplevel vdevs are faulted. */ if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) - return (ENXIO); + return (SET_ERROR(ENXIO)); if (spa_check_logs(spa)) { *ereport = FM_EREPORT_ZFS_LOG_REPLAY; @@ -2625,6 +2621,12 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, vdev_resilver_needed(rvd, NULL, NULL)) spa_async_request(spa, SPA_ASYNC_RESILVER); + /* + * Log the fact that we booted up (so that we can detect if + * we rebooted in the middle of an operation). + */ + spa_history_log_version(spa, "open"); + /* * Delete any inconsistent datasets. */ @@ -2785,7 +2787,7 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy, if ((spa = spa_lookup(pool)) == NULL) { if (locked) mutex_exit(&spa_namespace_lock); - return (ENOENT); + return (SET_ERROR(ENOENT)); } if (spa->spa_state == POOL_STATE_UNINITIALIZED) { @@ -2820,7 +2822,7 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy, spa_remove(spa); if (locked) mutex_exit(&spa_namespace_lock); - return (ENOENT); + return (SET_ERROR(ENOENT)); } if (error) { @@ -3157,14 +3159,14 @@ spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, return (0); if (ndev == 0) - return (EINVAL); + return (SET_ERROR(EINVAL)); /* * Make sure the pool is formatted with a version that supports this * device type. */ if (spa_version(spa) < version) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); /* * Set the pending device list so we correctly handle device in-use @@ -3180,7 +3182,7 @@ spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, if (!vd->vdev_ops->vdev_op_leaf) { vdev_free(vd); - error = EINVAL; + error = SET_ERROR(EINVAL); goto out; } @@ -3191,7 +3193,7 @@ spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, #ifdef _KERNEL if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { - error = ENOTBLK; + error = SET_ERROR(ENOTBLK); vdev_free(vd); goto out; } @@ -3310,7 +3312,7 @@ spa_l2cache_drop(spa_t *spa) */ int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, - const char *history_str, nvlist_t *zplprops) + nvlist_t *zplprops) { spa_t *spa; char *altroot = NULL; @@ -3330,7 +3332,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, mutex_enter(&spa_namespace_lock); if (spa_lookup(pool) != NULL) { mutex_exit(&spa_namespace_lock); - return (EEXIST); + return (SET_ERROR(EEXIST)); } /* @@ -3383,7 +3385,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, ASSERT(error != 0 || spa->spa_root_vdev == rvd); if (error == 0 && !zfs_allocatable_devs(nvroot)) - error = EINVAL; + error = SET_ERROR(EINVAL); if (error == 0 && (error = vdev_create(rvd, txg, B_FALSE)) == 0 && @@ -3513,7 +3515,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, if (props != NULL) { spa_configfile_set(spa, props, B_FALSE); - spa_sync_props(spa, props, tx); + spa_sync_props(props, tx); } dmu_tx_commit(tx); @@ -3529,9 +3531,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa_config_sync(spa, B_FALSE, B_TRUE); - if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) - (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); - spa_history_log_version(spa, LOG_POOL_CREATE); + spa_history_log_version(spa, "create"); spa->spa_minref = refcount_count(&spa->spa_refcount); @@ -3658,7 +3658,7 @@ spa_import_rootpool(char *devpath, char *devid) if (config == NULL) { cmn_err(CE_NOTE, "Cannot read the pool label from '%s'", devpath); - return (EIO); + return (SET_ERROR(EIO)); } VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, @@ -3701,7 +3701,7 @@ spa_import_rootpool(char *devpath, char *devid) if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) { cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu", (u_longlong_t)guid); - error = ENOENT; + error = SET_ERROR(ENOENT); goto out; } @@ -3713,7 +3713,7 @@ spa_import_rootpool(char *devpath, char *devid) if (avd != bvd) { cmn_err(CE_NOTE, "The boot device is 'degraded'. Please " "try booting from '%s'", avd->vdev_path); - error = EINVAL; + error = SET_ERROR(EINVAL); goto out; } @@ -3727,12 +3727,11 @@ spa_import_rootpool(char *devpath, char *devid) "try booting from '%s'", bvd->vdev_parent-> vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path); - error = EINVAL; + error = SET_ERROR(EINVAL); goto out; } error = 0; - spa_history_log_version(spa, LOG_POOL_IMPORT); out: spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); vdev_free(rvd); @@ -3963,7 +3962,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) mutex_enter(&spa_namespace_lock); if (spa_lookup(pool) != NULL) { mutex_exit(&spa_namespace_lock); - return (EEXIST); + return (SET_ERROR(EEXIST)); } /* @@ -3989,7 +3988,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) spa_config_sync(spa, B_FALSE, B_TRUE); mutex_exit(&spa_namespace_lock); - spa_history_log_version(spa, LOG_POOL_IMPORT); + spa_history_log_version(spa, "import"); return (0); } @@ -4120,7 +4119,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); mutex_exit(&spa_namespace_lock); - spa_history_log_version(spa, LOG_POOL_IMPORT); + spa_history_log_version(spa, "import"); #ifdef __FreeBSD__ #ifdef _KERNEL @@ -4241,12 +4240,12 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, *oldconfig = NULL; if (!(spa_mode_global & FWRITE)) - return (EROFS); + return (SET_ERROR(EROFS)); mutex_enter(&spa_namespace_lock); if ((spa = spa_lookup(pool)) == NULL) { mutex_exit(&spa_namespace_lock); - return (ENOENT); + return (SET_ERROR(ENOENT)); } /* @@ -4280,7 +4279,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, new_state != POOL_STATE_UNINITIALIZED)) { spa_async_resume(spa); mutex_exit(&spa_namespace_lock); - return (EBUSY); + return (SET_ERROR(EBUSY)); } /* @@ -4293,7 +4292,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, spa_has_active_shared_spare(spa)) { spa_async_resume(spa); mutex_exit(&spa_namespace_lock); - return (EXDEV); + return (SET_ERROR(EXDEV)); } /* @@ -4663,7 +4662,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) */ (void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0); - spa_history_log_internal(LOG_POOL_VDEV_ATTACH, spa, NULL, + spa_history_log_internal(spa, "vdev attach", NULL, "%s vdev=%s %s vdev=%s", replacing && newvd_isspare ? "spare in" : replacing ? "replace" : "attach", newvdpath, @@ -4880,7 +4879,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) error = spa_vdev_exit(spa, vd, txg, 0); - spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL, + spa_history_log_internal(spa, "detach", NULL, "vdev=%s", vdpath); spa_strfree(vdpath); @@ -5001,7 +5000,7 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, spa->spa_root_vdev->vdev_child[c]->vdev_islog) { continue; } else { - error = EINVAL; + error = SET_ERROR(EINVAL); break; } } @@ -5009,14 +5008,14 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, /* which disk is going to be split? */ if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID, &glist[c]) != 0) { - error = EINVAL; + error = SET_ERROR(EINVAL); break; } /* look it up in the spa */ vml[c] = spa_lookup_by_guid(spa, glist[c], B_FALSE); if (vml[c] == NULL) { - error = ENODEV; + error = SET_ERROR(ENODEV); break; } @@ -5030,12 +5029,12 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, vml[c]->vdev_children != 0 || vml[c]->vdev_state != VDEV_STATE_HEALTHY || c != spa->spa_root_vdev->vdev_child[c]->vdev_id) { - error = EINVAL; + error = SET_ERROR(EINVAL); break; } if (vdev_dtl_required(vml[c])) { - error = EBUSY; + error = SET_ERROR(EBUSY); break; } @@ -5156,9 +5155,8 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, if (vml[c] != NULL) { vdev_split(vml[c]); if (error == 0) - spa_history_log_internal(LOG_POOL_VDEV_DETACH, - spa, tx, "vdev=%s", - vml[c]->vdev_path); + spa_history_log_internal(spa, "detach", tx, + "vdev=%s", vml[c]->vdev_path); vdev_free(vml[c]); } } @@ -5173,8 +5171,8 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, zio_handle_panic_injection(spa, FTAG, 3); /* split is complete; log a history record */ - spa_history_log_internal(LOG_POOL_SPLIT, newspa, NULL, - "split new pool %s from pool %s", newname, spa_name(spa)); + spa_history_log_internal(newspa, "split", NULL, + "from pool %s", spa_name(spa)); kmem_free(vml, children * sizeof (vdev_t *)); @@ -5271,7 +5269,7 @@ spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) if (vd->vdev_stat.vs_alloc != 0) error = spa_offline_log(spa); } else { - error = ENOTSUP; + error = SET_ERROR(ENOTSUP); } if (error) @@ -5380,7 +5378,7 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) spa_load_spares(spa); spa->spa_spares.sav_sync = B_TRUE; } else { - error = EBUSY; + error = SET_ERROR(EBUSY); } } else if (spa->spa_l2cache.sav_vdevs != NULL && nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, @@ -5440,12 +5438,12 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) /* * Normal vdevs cannot be removed (yet). */ - error = ENOTSUP; + error = SET_ERROR(ENOTSUP); } else { /* * There is no vdev of any kind with the specified guid. */ - error = ENOENT; + error = SET_ERROR(ENOENT); } if (!locked) @@ -5632,7 +5630,7 @@ spa_scan_stop(spa_t *spa) { ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); if (dsl_scan_resilvering(spa->spa_dsl_pool)) - return (EBUSY); + return (SET_ERROR(EBUSY)); return (dsl_scan_cancel(spa->spa_dsl_pool)); } @@ -5642,7 +5640,7 @@ spa_scan(spa_t *spa, pool_scan_func_t func) ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); /* * If a resilver was requested, but there is no DTL on a @@ -5761,8 +5759,7 @@ spa_async_thread(void *arg) * then log an internal history event. */ if (new_space != old_space) { - spa_history_log_internal(LOG_POOL_VDEV_ONLINE, - spa, NULL, + spa_history_log_internal(spa, "vdev online", NULL, "pool '%s' size: %llu(+%llu)", spa_name(spa), new_space, new_space - old_space); } @@ -5991,10 +5988,11 @@ spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) } static void -spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx) +spa_sync_version(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; - uint64_t version = *(uint64_t *)arg2; + uint64_t *versionp = arg; + uint64_t version = *versionp; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; /* * Setting the version is special cased when first creating the pool. @@ -6006,17 +6004,18 @@ spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx) spa->spa_uberblock.ub_version = version; vdev_config_dirty(spa->spa_root_vdev); + spa_history_log_internal(spa, "set", tx, "version=%lld", version); } /* * Set zpool properties. */ static void -spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) +spa_sync_props(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; + nvlist_t *nvp = arg; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; objset_t *mos = spa->spa_meta_objset; - nvlist_t *nvp = arg2; nvpair_t *elem = NULL; mutex_enter(&spa->spa_props_lock); @@ -6040,6 +6039,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature)); spa_feature_enable(spa, feature, tx); + spa_history_log_internal(spa, "set", tx, + "%s=enabled", nvpair_name(elem)); break; case ZPOOL_PROP_VERSION: @@ -6079,6 +6080,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) */ if (tx->tx_txg != TXG_INITIAL) vdev_config_dirty(spa->spa_root_vdev); + spa_history_log_internal(spa, "set", tx, + "%s=%s", nvpair_name(elem), strval); break; default: /* @@ -6101,7 +6104,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(zap_update(mos, spa->spa_pool_props_object, propname, 1, strlen(strval) + 1, strval, tx) == 0); - + spa_history_log_internal(spa, "set", tx, + "%s=%s", nvpair_name(elem), strval); } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { VERIFY(nvpair_value_uint64(elem, &intval) == 0); @@ -6113,6 +6117,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(zap_update(mos, spa->spa_pool_props_object, propname, 8, 1, &intval, tx) == 0); + spa_history_log_internal(spa, "set", tx, + "%s=%lld", nvpair_name(elem), intval); } else { ASSERT(0); /* not allowed */ } @@ -6141,13 +6147,6 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) } } - /* log internal history if this is not a zpool create */ - if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && - tx->tx_txg != TXG_INITIAL) { - spa_history_log_internal(LOG_POOL_PROPSET, - spa, tx, "%s %lld %s", - nvpair_name(elem), intval, spa_name(spa)); - } } mutex_exit(&spa->spa_props_lock); @@ -6167,6 +6166,8 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx) ASSERT(spa->spa_sync_pass == 1); + rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); + if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { dsl_pool_create_origin(dp, tx); @@ -6192,6 +6193,7 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx) spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) { spa_feature_create_zap_objects(spa, tx); } + rrw_exit(&dp->dp_config_rwlock, FTAG); } /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c index 3dae0ba1e..f313c5b03 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c @@ -220,7 +220,15 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) */ nvl = NULL; while ((spa = spa_next(spa)) != NULL) { - if (spa == target && removing) + /* + * Skip over our own pool if we're about to remove + * ourselves from the spa namespace or any pool that + * is readonly. Since we cannot guarantee that a + * readonly pool would successfully import upon reboot, + * we don't allow them to be written to the cache file. + */ + if ((spa == target && removing) || + !spa_writeable(spa)) continue; mutex_enter(&spa->spa_props_lock); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c index 282140b3b..9152846d6 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ /* @@ -175,7 +176,7 @@ process_error_log(spa_t *spa, uint64_t obj, void *addr, size_t *count) if (*count == 0) { zap_cursor_fini(&zc); - return (ENOMEM); + return (SET_ERROR(ENOMEM)); } name_to_bookmark(za.za_name, &zb); @@ -183,7 +184,7 @@ process_error_log(spa_t *spa, uint64_t obj, void *addr, size_t *count) if (copyout(&zb, (char *)addr + (*count - 1) * sizeof (zbookmark_t), sizeof (zbookmark_t)) != 0) - return (EFAULT); + return (SET_ERROR(EFAULT)); *count -= 1; } @@ -201,12 +202,12 @@ process_error_list(avl_tree_t *list, void *addr, size_t *count) for (se = avl_first(list); se != NULL; se = AVL_NEXT(list, se)) { if (*count == 0) - return (ENOMEM); + return (SET_ERROR(ENOMEM)); if (copyout(&se->se_bookmark, (char *)addr + (*count - 1) * sizeof (zbookmark_t), sizeof (zbookmark_t)) != 0) - return (EFAULT); + return (SET_ERROR(EFAULT)); *count -= 1; } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c index 5cd5a9dc7..f6e02770e 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -30,8 +30,11 @@ #include #include #include +#include +#include #include #include +#include #include "zfs_comutil.h" #ifdef _KERNEL #include @@ -176,14 +179,14 @@ spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp, } static char * -spa_history_zone() +spa_history_zone(void) { #ifdef _KERNEL /* XXX: pr_hostname can be changed by default from within a jail! */ if (jailed(curthread->td_ucred)) return (curthread->td_ucred->cr_prison->pr_hostname); #endif - return ("global"); + return (NULL); } /* @@ -191,17 +194,15 @@ spa_history_zone() */ /*ARGSUSED*/ static void -spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) +spa_history_log_sync(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; - history_arg_t *hap = arg2; - const char *history_str = hap->ha_history_str; + nvlist_t *nvl = arg; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; objset_t *mos = spa->spa_meta_objset; dmu_buf_t *dbp; spa_history_phys_t *shpp; size_t reclen; uint64_t le_len; - nvlist_t *nvrecord; char *record_packed = NULL; int ret; @@ -218,7 +219,7 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) * Get the offset of where we need to write via the bonus buffer. * Update the offset when the write completes. */ - VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)); + VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)); shpp = dbp->db_data; dmu_buf_will_dirty(dbp, tx); @@ -231,46 +232,35 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) } #endif - VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME, - gethrestime_sec()) == 0); - VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_WHO, hap->ha_uid) == 0); - if (hap->ha_zone != NULL) - VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_ZONE, - hap->ha_zone) == 0); + fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec()); #ifdef _KERNEL - VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_HOST, - utsname.nodename) == 0); + fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname.nodename); #endif - if (hap->ha_log_type == LOG_CMD_POOL_CREATE || - hap->ha_log_type == LOG_CMD_NORMAL) { - VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD, - history_str) == 0); - - zfs_dbgmsg("command: %s", history_str); - } else { - VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT, - hap->ha_event) == 0); - VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TXG, - tx->tx_txg) == 0); - VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR, - history_str) == 0); - - zfs_dbgmsg("internal %s pool:%s txg:%llu %s", - zfs_history_event_names[hap->ha_event], spa_name(spa), - (longlong_t)tx->tx_txg, history_str); - + if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) { + zfs_dbgmsg("command: %s", + fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD)); + } else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) { + if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) { + zfs_dbgmsg("txg %lld %s %s (id %llu) %s", + fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG), + fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), + fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME), + fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID), + fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); + } else { + zfs_dbgmsg("txg %lld %s %s", + fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG), + fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), + fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); + } + } else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) { + zfs_dbgmsg("ioctl %s", + fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL)); } - VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0); - record_packed = kmem_alloc(reclen, KM_SLEEP); - - VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen, - NV_ENCODE_XDR, KM_SLEEP) == 0); + record_packed = fnvlist_pack(nvl, &reclen); mutex_enter(&spa->spa_history_lock); - if (hap->ha_log_type == LOG_CMD_POOL_CREATE) - VERIFY(shpp->sh_eof == shpp->sh_pool_create_len); /* write out the packed length as little endian */ le_len = LE_64((uint64_t)reclen); @@ -278,36 +268,45 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) if (!ret) ret = spa_history_write(spa, record_packed, reclen, shpp, tx); - if (!ret && hap->ha_log_type == LOG_CMD_POOL_CREATE) { - shpp->sh_pool_create_len += sizeof (le_len) + reclen; - shpp->sh_bof = shpp->sh_pool_create_len; + /* The first command is the create, which we keep forever */ + if (ret == 0 && shpp->sh_pool_create_len == 0 && + nvlist_exists(nvl, ZPOOL_HIST_CMD)) { + shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof; } mutex_exit(&spa->spa_history_lock); - nvlist_free(nvrecord); - kmem_free(record_packed, reclen); + fnvlist_pack_free(record_packed, reclen); dmu_buf_rele(dbp, FTAG); - - strfree(hap->ha_history_str); - if (hap->ha_zone != NULL) - strfree(hap->ha_zone); - kmem_free(hap, sizeof (history_arg_t)); + fnvlist_free(nvl); } /* * Write out a history event. */ int -spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what) +spa_history_log(spa_t *spa, const char *msg) +{ + int err; + nvlist_t *nvl = fnvlist_alloc(); + + fnvlist_add_string(nvl, ZPOOL_HIST_CMD, msg); + err = spa_history_log_nvl(spa, nvl); + fnvlist_free(nvl); + return (err); +} + +int +spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) { - history_arg_t *ha; int err = 0; dmu_tx_t *tx; + nvlist_t *nvarg; - ASSERT(what != LOG_INTERNAL); + if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) + return (EINVAL); if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa)) - return (EINVAL); + return (SET_ERROR(EINVAL)); tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); err = dmu_tx_assign(tx, TXG_WAIT); @@ -316,19 +315,21 @@ spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what) return (err); } - ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP); - ha->ha_history_str = strdup(history_str); - ha->ha_zone = strdup(spa_history_zone()); - ha->ha_log_type = what; - ha->ha_uid = crgetuid(CRED()); + nvarg = fnvlist_dup(nvl); + if (spa_history_zone() != NULL) { + fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE, + spa_history_zone()); + } + fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED())); /* Kick this off asynchronously; errors are ignored. */ - dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, - spa_history_log_sync, spa, ha, 0, tx); + dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync, + nvarg, 0, tx); dmu_tx_commit(tx); - /* spa_history_log_sync will free ha and strings */ + /* spa_history_log_sync will free nvl */ return (err); + } /* @@ -345,11 +346,11 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf) int err; /* - * If the command history doesn't exist (older pool), + * If the command history doesn't exist (older pool), * that's ok, just return ENOENT. */ if (!spa->spa_history) - return (ENOENT); + return (SET_ERROR(ENOENT)); /* * The history is logged asynchronously, so when they request @@ -428,11 +429,14 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf) return (err); } +/* + * The nvlist will be consumed by this call. + */ static void -log_internal(history_internal_events_t event, spa_t *spa, +log_internal(nvlist_t *nvl, const char *operation, spa_t *spa, dmu_tx_t *tx, const char *fmt, va_list adx) { - history_arg_t *ha; + char *msg; va_list adx2; /* @@ -440,35 +444,34 @@ log_internal(history_internal_events_t event, spa_t *spa, * initialized yet, so don't bother logging the internal events. * Likewise if the pool is not writeable. */ - if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa)) + if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa)) { + fnvlist_free(nvl); return; + } va_copy(adx2, adx); - ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP); - ha->ha_history_str = kmem_alloc(vsnprintf(NULL, 0, fmt, adx2) + 1, - KM_SLEEP); + msg = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1, KM_SLEEP); + (void) vsprintf(msg, fmt, adx2); + fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg); + strfree(msg); va_end(adx2); - (void) vsprintf(ha->ha_history_str, fmt, adx); - - ha->ha_log_type = LOG_INTERNAL; - ha->ha_event = event; - ha->ha_zone = NULL; - ha->ha_uid = 0; + fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation); + fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg); if (dmu_tx_is_syncing(tx)) { - spa_history_log_sync(spa, ha, tx); + spa_history_log_sync(nvl, tx); } else { - dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, - spa_history_log_sync, spa, ha, 0, tx); + dsl_sync_task_nowait(spa_get_dsl(spa), + spa_history_log_sync, nvl, 0, tx); } - /* spa_history_log_sync() will free ha and strings */ + /* spa_history_log_sync() will free nvl */ } void -spa_history_log_internal(history_internal_events_t event, spa_t *spa, +spa_history_log_internal(spa_t *spa, const char *operation, dmu_tx_t *tx, const char *fmt, ...) { dmu_tx_t *htx = tx; @@ -484,7 +487,7 @@ spa_history_log_internal(history_internal_events_t event, spa_t *spa, } va_start(adx, fmt); - log_internal(event, spa, htx, fmt, adx); + log_internal(fnvlist_alloc(), operation, spa, htx, fmt, adx); va_end(adx); /* if we didn't get a tx from the caller, commit the one we made */ @@ -493,23 +496,50 @@ spa_history_log_internal(history_internal_events_t event, spa_t *spa, } void -spa_history_log_version(spa_t *spa, history_internal_events_t event) +spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation, + dmu_tx_t *tx, const char *fmt, ...) +{ + va_list adx; + char namebuf[MAXNAMELEN]; + nvlist_t *nvl = fnvlist_alloc(); + + ASSERT(tx != NULL); + + dsl_dataset_name(ds, namebuf); + fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); + fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, ds->ds_object); + + va_start(adx, fmt); + log_internal(nvl, operation, dsl_dataset_get_spa(ds), tx, fmt, adx); + va_end(adx); +} + +void +spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, + dmu_tx_t *tx, const char *fmt, ...) { -#ifdef _KERNEL - uint64_t current_vers = spa_version(spa); - - if (current_vers >= SPA_VERSION_ZPOOL_HISTORY) { - spa_history_log_internal(event, spa, NULL, - "pool spa %llu; zfs spa %llu; zpl %d; uts %s %s %s %s", - (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION, - utsname.nodename, utsname.release, utsname.version, - utsname.machine); - } -#if 0 - cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", - event == LOG_POOL_IMPORT ? "imported" : - event == LOG_POOL_CREATE ? "created" : "accessed", - (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION); -#endif -#endif + va_list adx; + char namebuf[MAXNAMELEN]; + nvlist_t *nvl = fnvlist_alloc(); + + ASSERT(tx != NULL); + + dsl_dir_name(dd, namebuf); + fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); + fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, + dd->dd_phys->dd_head_dataset_obj); + + va_start(adx, fmt); + log_internal(nvl, operation, dd->dd_pool->dp_spa, tx, fmt, adx); + va_end(adx); +} + +void +spa_history_log_version(spa_t *spa, const char *operation) +{ + spa_history_log_internal(spa, operation, NULL, + "pool version %llu; software version %llu/%d; uts %s %s %s %s", + (u_longlong_t)spa_version(spa), SPA_VERSION, ZPL_VERSION, + utsname.nodename, utsname.release, utsname.version, + utsname.machine); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c index dc6468280..489b93c7d 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2013 Martin Matuska . All rights reserved. */ @@ -238,8 +238,8 @@ kmem_cache_t *spa_buffer_pool; int spa_mode_global; #ifdef ZFS_DEBUG -/* Everything except dprintf is on by default in debug builds */ -int zfs_flags = ~ZFS_DEBUG_DPRINTF; +/* Everything except dprintf and spa is on by default in debug builds */ +int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SPA); #else int zfs_flags = 0; #endif @@ -314,7 +314,7 @@ spa_config_lock_init(spa_t *spa) spa_config_lock_t *scl = &spa->spa_config_lock[i]; mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL); - refcount_create(&scl->scl_count); + refcount_create_untracked(&scl->scl_count); scl->scl_writer = NULL; scl->scl_write_wanted = 0; } @@ -367,6 +367,8 @@ spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw) { int wlocks_held = 0; + ASSERT3U(SCL_LOCKS, <, sizeof (wlocks_held) * NBBY); + for (int i = 0; i < SCL_LOCKS; i++) { spa_config_lock_t *scl = &spa->spa_config_lock[i]; if (scl->scl_writer == curthread) @@ -445,27 +447,22 @@ spa_lookup(const char *name) static spa_t search; /* spa_t is large; don't allocate on stack */ spa_t *spa; avl_index_t where; - char c; char *cp; ASSERT(MUTEX_HELD(&spa_namespace_lock)); + (void) strlcpy(search.spa_name, name, sizeof (search.spa_name)); + /* * If it's a full dataset name, figure out the pool name and * just use that. */ - cp = strpbrk(name, "/@"); - if (cp) { - c = *cp; + cp = strpbrk(search.spa_name, "/@"); + if (cp != NULL) *cp = '\0'; - } - (void) strlcpy(search.spa_name, name, sizeof (search.spa_name)); spa = avl_find(&spa_namespace_avl, &search, &where); - if (cp) - *cp = c; - return (spa); } @@ -600,6 +597,8 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) KM_SLEEP) == 0); } + spa->spa_debug = ((zfs_flags & ZFS_DEBUG_SPA) != 0); + return (spa); } @@ -1862,7 +1861,7 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps) dsl_scan_t *scn = spa->spa_dsl_pool ? spa->spa_dsl_pool->dp_scan : NULL; if (scn == NULL || scn->scn_phys.scn_func == POOL_SCAN_NONE) - return (ENOENT); + return (SET_ERROR(ENOENT)); bzero(ps, sizeof (pool_scan_stat_t)); /* data stored on disk */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c index 5db6973b2..137e29c39 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c @@ -102,7 +102,7 @@ void space_map_add(space_map_t *sm, uint64_t start, uint64_t size) { avl_index_t where; - space_seg_t ssearch, *ss_before, *ss_after, *ss; + space_seg_t *ss_before, *ss_after, *ss; uint64_t end = start + size; int merge_before, merge_after; @@ -115,11 +115,8 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size) VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0); VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0); - ssearch.ss_start = start; - ssearch.ss_end = end; - ss = avl_find(&sm->sm_root, &ssearch, &where); - - if (ss != NULL && ss->ss_start <= start && ss->ss_end >= end) { + ss = space_map_find(sm, start, size, &where); + if (ss != NULL) { zfs_panic_recover("zfs: allocating allocated segment" "(offset=%llu size=%llu)\n", (longlong_t)start, (longlong_t)size); @@ -170,19 +167,19 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size) void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size) { - space_seg_t ssearch, *ss, *newseg; +#ifdef illumos + avl_index_t where; +#endif + space_seg_t *ss, *newseg; uint64_t end = start + size; int left_over, right_over; - ASSERT(MUTEX_HELD(sm->sm_lock)); VERIFY(!sm->sm_condensing); - VERIFY(size != 0); - VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0); - VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0); - - ssearch.ss_start = start; - ssearch.ss_end = end; - ss = avl_find(&sm->sm_root, &ssearch, NULL); +#ifdef illumos + ss = space_map_find(sm, start, size, &where); +#else + ss = space_map_find(sm, start, size, NULL); +#endif /* Make sure we completely overlap with someone */ if (ss == NULL) { @@ -225,12 +222,11 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size) sm->sm_space -= size; } -boolean_t -space_map_contains(space_map_t *sm, uint64_t start, uint64_t size) +space_seg_t * +space_map_find(space_map_t *sm, uint64_t start, uint64_t size, + avl_index_t *wherep) { - avl_index_t where; space_seg_t ssearch, *ss; - uint64_t end = start + size; ASSERT(MUTEX_HELD(sm->sm_lock)); VERIFY(size != 0); @@ -238,10 +234,20 @@ space_map_contains(space_map_t *sm, uint64_t start, uint64_t size) VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0); ssearch.ss_start = start; - ssearch.ss_end = end; - ss = avl_find(&sm->sm_root, &ssearch, &where); + ssearch.ss_end = start + size; + ss = avl_find(&sm->sm_root, &ssearch, wherep); + + if (ss != NULL && ss->ss_start <= start && ss->ss_end >= start + size) + return (ss); + return (NULL); +} + +boolean_t +space_map_contains(space_map_t *sm, uint64_t start, uint64_t size) +{ + avl_index_t where; - return (ss != NULL && ss->ss_start <= start && ss->ss_end >= end); + return (space_map_find(sm, start, size, &where) != 0); } void diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h index deb57f26e..f43e8087c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h @@ -89,7 +89,7 @@ arc_buf_t *arc_loan_buf(spa_t *spa, int size); void arc_return_buf(arc_buf_t *buf, void *tag); void arc_loan_inuse_buf(arc_buf_t *buf, void *tag); void arc_buf_add_ref(arc_buf_t *buf, void *tag); -int arc_buf_remove_ref(arc_buf_t *buf, void *tag); +boolean_t arc_buf_remove_ref(arc_buf_t *buf, void *tag); int arc_buf_size(arc_buf_t *buf); void arc_release(arc_buf_t *buf, void *tag); int arc_released(arc_buf_t *buf); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h index 8591f1585..a29f7b3cc 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h @@ -311,20 +311,17 @@ void dbuf_fini(void); boolean_t dbuf_is_metadata(dmu_buf_impl_t *db); -#define DBUF_IS_METADATA(_db) \ - (dbuf_is_metadata(_db)) - #define DBUF_GET_BUFC_TYPE(_db) \ - (DBUF_IS_METADATA(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA) + (dbuf_is_metadata(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA) #define DBUF_IS_CACHEABLE(_db) \ ((_db)->db_objset->os_primary_cache == ZFS_CACHE_ALL || \ - (DBUF_IS_METADATA(_db) && \ + (dbuf_is_metadata(_db) && \ ((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA))) #define DBUF_IS_L2CACHEABLE(_db) \ ((_db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL || \ - (DBUF_IS_METADATA(_db) && \ + (dbuf_is_metadata(_db) && \ ((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA))) #ifdef ZFS_DEBUG diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h index 4d36b465a..b67ff2d5a 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -216,15 +217,10 @@ typedef enum dmu_object_type { DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE), } dmu_object_type_t; -typedef enum dmu_objset_type { - DMU_OST_NONE, - DMU_OST_META, - DMU_OST_ZFS, - DMU_OST_ZVOL, - DMU_OST_OTHER, /* For testing only! */ - DMU_OST_ANY, /* Be careful! */ - DMU_OST_NUMTYPES -} dmu_objset_type_t; +typedef enum txg_how { + TXG_WAIT = 1, + TXG_NOWAIT, +} txg_how_t; void byteswap_uint64_array(void *buf, size_t size); void byteswap_uint32_array(void *buf, size_t size); @@ -264,22 +260,21 @@ void dmu_objset_rele(objset_t *os, void *tag); void dmu_objset_disown(objset_t *os, void *tag); int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp); -int dmu_objset_evict_dbufs(objset_t *os); +void dmu_objset_evict_dbufs(objset_t *os); int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); -int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin, - uint64_t flags); -int dmu_objset_destroy(const char *name, boolean_t defer); -int dmu_get_recursive_snaps_nvl(const char *fsname, const char *snapname, +int dmu_get_recursive_snaps_nvl(char *fsname, const char *snapname, struct nvlist *snaps); -int dmu_snapshots_destroy_nvl(struct nvlist *snaps, boolean_t defer, char *); -int dmu_objset_snapshot(char *fsname, char *snapname, char *tag, - struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd); -int dmu_objset_rename(const char *name, const char *newname, - boolean_t recursive); -int dmu_objset_find(const char *name, int func(const char *, void *), void *arg, +int dmu_objset_clone(const char *name, const char *origin); +int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer, + struct nvlist *errlist); +int dmu_objset_snapshot_one(const char *fsname, const char *snapname); +int dmu_objset_snapshot_tmp(const char *, const char *, int); +int dmu_objset_find(char *name, int func(const char *, void *), void *arg, int flags); void dmu_objset_byteswap(void *buf, size_t size); +int dsl_dataset_rename_snapshot(const char *fsname, + const char *oldsnapname, const char *newsnapname, boolean_t recursive); typedef struct dmu_buf { uint64_t db_object; /* object that this buffer is part of */ @@ -554,7 +549,7 @@ void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object); void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow); void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size); void dmu_tx_abort(dmu_tx_t *tx); -int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how); +int dmu_tx_assign(dmu_tx_t *tx, enum txg_how txg_how); void dmu_tx_wait(dmu_tx_t *tx); void dmu_tx_commit(dmu_tx_t *tx); @@ -796,38 +791,8 @@ typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp, uint64_t object, uint64_t offset, int len); void dmu_traverse_objset(objset_t *os, uint64_t txg_start, dmu_traverse_cb_t cb, void *arg); - -int dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, - int outfd, struct file *fp, offset_t *off); -int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, - boolean_t fromorigin, uint64_t *sizep); - -typedef struct dmu_recv_cookie { - /* - * This structure is opaque! - * - * If logical and real are different, we are recving the stream - * into the "real" temporary clone, and then switching it with - * the "logical" target. - */ - struct dsl_dataset *drc_logical_ds; - struct dsl_dataset *drc_real_ds; - struct drr_begin *drc_drrb; - char *drc_tosnap; - char *drc_top_ds; - boolean_t drc_newfs; - boolean_t drc_force; - struct avl_tree *drc_guid_to_ds_map; -} dmu_recv_cookie_t; - -int dmu_recv_begin(char *tofs, char *tosnap, char *topds, struct drr_begin *, - boolean_t force, objset_t *origin, dmu_recv_cookie_t *); -int dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, - int cleanup_fd, uint64_t *action_handlep); -int dmu_recv_end(dmu_recv_cookie_t *drc); - -int dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct file *fp, - offset_t *off); +int dmu_diff(const char *tosnap_name, const char *fromsnap_name, + struct file *fp, offset_t *offp); /* CRC64 table */ #define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h index fac0f9f2b..143e594d1 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -42,6 +43,7 @@ extern "C" { extern krwlock_t os_lock; +struct dsl_pool; struct dsl_dataset; struct dmu_tx; @@ -113,8 +115,6 @@ struct objset { /* stuff we store for the user */ kmutex_t os_user_ptr_lock; void *os_user_ptr; - - /* SA layout/attribute registration */ sa_os_t *os_sa; }; @@ -137,25 +137,16 @@ void dmu_objset_rele(objset_t *os, void *tag); void dmu_objset_disown(objset_t *os, void *tag); int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp); -int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, - void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); -int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin, - uint64_t flags); -int dmu_objset_destroy(const char *name, boolean_t defer); -int dmu_objset_snapshot(char *fsname, char *snapname, char *tag, - struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd); void dmu_objset_stats(objset_t *os, nvlist_t *nv); void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat); void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, uint64_t *usedobjsp, uint64_t *availobjsp); uint64_t dmu_objset_fsid_guid(objset_t *os); -int dmu_objset_find(const char *name, int func(const char *, void *), void *arg, - int flags); -int dmu_objset_find_spa(spa_t *spa, const char *name, - int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags); +int dmu_objset_find_dp(struct dsl_pool *dp, uint64_t ddobj, + int func(struct dsl_pool *, struct dsl_dataset *, void *), + void *arg, int flags); int dmu_objset_prefetch(const char *name, void *arg); -void dmu_objset_byteswap(void *buf, size_t size); -int dmu_objset_evict_dbufs(objset_t *os); +void dmu_objset_evict_dbufs(objset_t *os); timestruc_t dmu_objset_snap_cmtime(objset_t *os); /* called from dsl */ @@ -171,6 +162,7 @@ void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx); boolean_t dmu_objset_userused_enabled(objset_t *os); int dmu_objset_userspace_upgrade(objset_t *os); boolean_t dmu_objset_userspace_present(objset_t *os); +int dmu_fsname(const char *snapname, char *buf); void dmu_objset_init(void); void dmu_objset_fini(void); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h new file mode 100644 index 000000000..8656e0875 --- /dev/null +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h @@ -0,0 +1,77 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ + +#ifndef _DMU_SEND_H +#define _DMU_SEND_H + +#include + +struct vnode; +struct dsl_dataset; +struct drr_begin; +struct avl_tree; + +int dmu_send(const char *tosnap, const char *fromsnap, int outfd, +#ifdef illumos + struct vnode *vp, offset_t *off); +#else + struct file *fp, offset_t *off); +#endif +int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds, + uint64_t *sizep); +int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, +#ifdef illumos + int outfd, struct vnode *vp, offset_t *off); +#else + int outfd, struct file *fp, offset_t *off); +#endif + +typedef struct dmu_recv_cookie { + struct dsl_dataset *drc_ds; + struct drr_begin *drc_drrb; + const char *drc_tofs; + const char *drc_tosnap; + boolean_t drc_newfs; + boolean_t drc_byteswap; + boolean_t drc_force; + struct avl_tree *drc_guid_to_ds_map; + zio_cksum_t drc_cksum; + uint64_t drc_newsnapobj; +} dmu_recv_cookie_t; + +int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, + boolean_t force, char *origin, dmu_recv_cookie_t *drc); +#ifdef illumos +int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp, +#else +int dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, +#endif + int cleanup_fd, uint64_t *action_handlep); +int dmu_recv_end(dmu_recv_cookie_t *drc); + +#endif /* _DMU_SEND_H */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h index bbc66347d..1f16e3c92 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_tx.h @@ -22,6 +22,9 @@ * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ #ifndef _SYS_DMU_TX_H #define _SYS_DMU_TX_H @@ -107,10 +110,11 @@ typedef struct dmu_tx_callback { * These routines are defined in dmu.h, and are called by the user. */ dmu_tx_t *dmu_tx_create(objset_t *dd); -int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how); +int dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how); void dmu_tx_commit(dmu_tx_t *tx); void dmu_tx_abort(dmu_tx_t *tx); uint64_t dmu_tx_get_txg(dmu_tx_t *tx); +struct dsl_pool *dmu_tx_pool(dmu_tx_t *tx); void dmu_tx_wait(dmu_tx_t *tx); void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func, diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h index d030cd779..6729f9f05 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h @@ -20,8 +20,6 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 Pawel Jakub Dawidek . - * All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ @@ -37,6 +35,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -50,10 +49,8 @@ struct dsl_pool; #define DS_IS_INCONSISTENT(ds) \ ((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) /* - * NB: nopromote can not yet be set, but we want support for it in this - * on-disk version, so that we don't need to upgrade for it later. It - * will be needed when we implement 'zfs split' (where the split off - * clone should not be promoted). + * Note: nopromote can not yet be set, but we want support for it in this + * on-disk version, so that we don't need to upgrade for it later. */ #define DS_FLAG_NOPROMOTE (1ULL<<1) @@ -78,6 +75,8 @@ struct dsl_pool; */ #define DS_FLAG_CI_DATASET (1ULL<<16) +#define DS_CREATE_FLAG_NODIRTY (1ULL<<24) + typedef struct dsl_dataset_phys { uint64_t ds_dir_obj; /* DMU_OT_DSL_DIR */ uint64_t ds_prev_snap_obj; /* DMU_OT_DSL_DATASET */ @@ -127,9 +126,6 @@ typedef struct dsl_dataset { dsl_deadlist_t ds_deadlist; bplist_t ds_pending_deadlist; - /* to protect against multiple concurrent incremental recv */ - kmutex_t ds_recvlock; - /* protected by lock on pool's dp_dirty_datasets list */ txg_node_t ds_dirty_link; list_node_t ds_synced_link; @@ -141,13 +137,15 @@ typedef struct dsl_dataset { kmutex_t ds_lock; objset_t *ds_objset; uint64_t ds_userrefs; + void *ds_owner; /* - * ds_owner is protected by the ds_rwlock and the ds_lock + * Long holds prevent the ds from being destroyed; they allow the + * ds to remain held even after dropping the dp_config_rwlock. + * Owning counts as a long hold. See the comments above + * dsl_pool_hold() for details. */ - krwlock_t ds_rwlock; - kcondvar_t ds_exclusive_cv; - void *ds_owner; + refcount_t ds_longholds; /* no locking; only for making guesses */ uint64_t ds_trysnap_txg; @@ -165,83 +163,44 @@ typedef struct dsl_dataset { char ds_snapname[MAXNAMELEN]; } dsl_dataset_t; -struct dsl_ds_destroyarg { - dsl_dataset_t *ds; /* ds to destroy */ - dsl_dataset_t *rm_origin; /* also remove our origin? */ - boolean_t is_origin_rm; /* set if removing origin snap */ - boolean_t defer; /* destroy -d requested? */ - boolean_t releasing; /* destroying due to release? */ - boolean_t need_prep; /* do we need to retry due to EBUSY? */ -}; - /* * The max length of a temporary tag prefix is the number of hex digits * required to express UINT64_MAX plus one for the hyphen. */ #define MAX_TAG_PREFIX_LEN 17 -struct dsl_ds_holdarg { - dsl_sync_task_group_t *dstg; - char *htag; - char *snapname; - boolean_t recursive; - boolean_t gotone; - boolean_t temphold; - char failed[MAXPATHLEN]; -}; - -/* - * Flags for dsl_dataset_rename(). - */ -#define ZFS_RENAME_RECURSIVE 0x01 -#define ZFS_RENAME_ALLOW_MOUNTED 0x02 - #define dsl_dataset_is_snapshot(ds) \ ((ds)->ds_phys->ds_num_children != 0) #define DS_UNIQUE_IS_ACCURATE(ds) \ (((ds)->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0) -int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp); -int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, - void *tag, dsl_dataset_t **); -int dsl_dataset_own(const char *name, boolean_t inconsistentok, +int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag, + dsl_dataset_t **dsp); +int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag, + dsl_dataset_t **); +void dsl_dataset_rele(dsl_dataset_t *ds, void *tag); +int dsl_dataset_own(struct dsl_pool *dp, const char *name, void *tag, dsl_dataset_t **dsp); int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj, - boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp); -void dsl_dataset_name(dsl_dataset_t *ds, char *name); -void dsl_dataset_rele(dsl_dataset_t *ds, void *tag); + void *tag, dsl_dataset_t **dsp); void dsl_dataset_disown(dsl_dataset_t *ds, void *tag); -void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag); -boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, - void *tag); -void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *tag); +void dsl_dataset_name(dsl_dataset_t *ds, char *name); +boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag); void dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, minor_t minor); uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *); uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, uint64_t flags, dmu_tx_t *tx); -int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer); -int dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer); -dsl_checkfunc_t dsl_dataset_destroy_check; -dsl_syncfunc_t dsl_dataset_destroy_sync; -dsl_checkfunc_t dsl_dataset_snapshot_check; -dsl_syncfunc_t dsl_dataset_snapshot_sync; -dsl_syncfunc_t dsl_dataset_user_hold_sync; -int dsl_dataset_rename(char *name, const char *newname, int flags); +int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors); int dsl_dataset_promote(const char *name, char *conflsnap); int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, boolean_t force); -int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag, - boolean_t recursive, boolean_t temphold, int cleanup_fd); -int dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag, - boolean_t temphold); -int dsl_dataset_user_release(char *dsname, char *snapname, char *htag, - boolean_t recursive); -int dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj, - char *htag, boolean_t retry); -int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp); +int dsl_dataset_rename_snapshot(const char *fsname, + const char *oldsnapname, const char *newsnapname, boolean_t recursive); +int dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, + minor_t cleanup_minor, const char *htag); blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds); void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx); @@ -278,13 +237,35 @@ int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf); int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv); -int dsl_dataset_set_quota(const char *dsname, zprop_source_t source, +int dsl_dataset_set_refquota(const char *dsname, zprop_source_t source, uint64_t quota); -dsl_syncfunc_t dsl_dataset_set_quota_sync; -int dsl_dataset_set_reservation(const char *dsname, zprop_source_t source, +int dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source, uint64_t reservation); -int dsl_destroy_inconsistent(const char *dsname, void *arg); +boolean_t dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier); +void dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag); +void dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag); +boolean_t dsl_dataset_long_held(dsl_dataset_t *ds); + +int dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, + dsl_dataset_t *origin_head, boolean_t force); +void dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, + dsl_dataset_t *origin_head, dmu_tx_t *tx); +int dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname, + dmu_tx_t *tx); +void dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, + dmu_tx_t *tx); + +void dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, + dmu_tx_t *tx); +void dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds); +int dsl_dataset_get_snapname(dsl_dataset_t *ds); +int dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, + uint64_t *value); +int dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx); +void dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, + zprop_source_t source, uint64_t value, dmu_tx_t *tx); +int dsl_dataset_rollback(const char *fsname); #ifdef ZFS_DEBUG #define dprintf_ds(ds, fmt, ...) do { \ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h index 9db6d07e8..5842639aa 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_deleg.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_DSL_DELEG_H @@ -65,8 +65,7 @@ extern "C" { int dsl_deleg_get(const char *ddname, nvlist_t **nvp); int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset); int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr); -int dsl_deleg_access_impl(struct dsl_dataset *ds, boolean_t descendent, - const char *perm, cred_t *cr); +int dsl_deleg_access_impl(struct dsl_dataset *ds, const char *perm, cred_t *cr); void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr); int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr); int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_destroy.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_destroy.h new file mode 100644 index 000000000..c5a70bb90 --- /dev/null +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_destroy.h @@ -0,0 +1,52 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_DSL_DESTROY_H +#define _SYS_DSL_DESTROY_H + +#ifdef __cplusplus +extern "C" { +#endif + +struct nvlist; +struct dsl_dataset; +struct dmu_tx; + +int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer, + struct nvlist *errlist); +int dsl_destroy_snapshot(const char *name, boolean_t defer); +int dsl_destroy_head(const char *name); +int dsl_destroy_head_check_impl(struct dsl_dataset *ds, int expected_holds); +void dsl_destroy_head_sync_impl(struct dsl_dataset *ds, struct dmu_tx *tx); +int dsl_destroy_inconsistent(const char *dsname, void *arg); +void dsl_destroy_snapshot_sync_impl(struct dsl_dataset *ds, + boolean_t defer, struct dmu_tx *tx); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DSL_DESTROY_H */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h index 9f2ddc6fc..641bcfcdd 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dir.h @@ -20,8 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 Pawel Jakub Dawidek . - * All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_DSL_DIR_H @@ -103,18 +102,15 @@ struct dsl_dir { char dd_myname[MAXNAMELEN]; }; -void dsl_dir_close(dsl_dir_t *dd, void *tag); -int dsl_dir_open(const char *name, void *tag, dsl_dir_t **, const char **tail); -int dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, dsl_dir_t **, - const char **tailp); -int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, +void dsl_dir_rele(dsl_dir_t *dd, void *tag); +int dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag, + dsl_dir_t **, const char **tail); +int dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, const char *tail, void *tag, dsl_dir_t **); void dsl_dir_name(dsl_dir_t *dd, char *buf); int dsl_dir_namelen(dsl_dir_t *dd); uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, dmu_tx_t *tx); -dsl_checkfunc_t dsl_dir_destroy_check; -dsl_syncfunc_t dsl_dir_destroy_sync; void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv); uint64_t dsl_dir_space_available(dsl_dir_t *dd, dsl_dir_t *ancestor, int64_t delta, int ondiskonly); @@ -133,14 +129,15 @@ int dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota); int dsl_dir_set_reservation(const char *ddname, zprop_source_t source, uint64_t reservation); -int dsl_dir_rename(dsl_dir_t *dd, const char *newname, int flags); +int dsl_dir_rename(const char *oldname, const char *newname); int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space); -int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx); boolean_t dsl_dir_is_clone(dsl_dir_t *dd); void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds, uint64_t reservation, cred_t *cr, dmu_tx_t *tx); void dsl_dir_snap_cmtime_update(dsl_dir_t *dd); timestruc_t dsl_dir_snap_cmtime(dsl_dir_t *dd); +void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, + dmu_tx_t *tx); /* internal reserved dir name */ #define MOS_DIR_NAME "$MOS" diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h index ab1229a2e..b0160edfb 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h @@ -36,6 +36,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -113,7 +114,7 @@ typedef struct dsl_pool { * syncing context does not need to ever have it for read, since * nobody else could possibly have it for write. */ - krwlock_t dp_config_rwlock; + rrwlock_t dp_config_rwlock; zfs_all_blkstats_t *dp_blkstats; } dsl_pool_t; @@ -139,15 +140,20 @@ void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx); void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx); void dsl_pool_mos_diduse_space(dsl_pool_t *dp, int64_t used, int64_t comp, int64_t uncomp); +void dsl_pool_config_enter(dsl_pool_t *dp, void *tag); +void dsl_pool_config_exit(dsl_pool_t *dp, void *tag); +boolean_t dsl_pool_config_held(dsl_pool_t *dp); taskq_t *dsl_pool_vnrele_taskq(dsl_pool_t *dp); -extern int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, - const char *tag, uint64_t *now, dmu_tx_t *tx); -extern int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, +int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, + const char *tag, uint64_t now, dmu_tx_t *tx); +int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag, dmu_tx_t *tx); -extern void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp); +void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp); int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **); +int dsl_pool_hold(const char *name, void *tag, dsl_pool_t **dp); +void dsl_pool_rele(dsl_pool_t *dp, void *tag); #ifdef __cplusplus } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h index a636ad350..5fe18d6a7 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_prop.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_DSL_PROP_H @@ -53,60 +54,47 @@ typedef struct dsl_props_arg { zprop_source_t pa_source; } dsl_props_arg_t; -typedef struct dsl_prop_set_arg { - const char *psa_name; - zprop_source_t psa_source; - int psa_intsz; - int psa_numints; - const void *psa_value; - - /* - * Used to handle the special requirements of the quota and reservation - * properties. - */ - uint64_t psa_effective_value; -} dsl_prop_setarg_t; - int dsl_prop_register(struct dsl_dataset *ds, const char *propname, dsl_prop_changed_cb_t *callback, void *cbarg); int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname, dsl_prop_changed_cb_t *callback, void *cbarg); -int dsl_prop_numcb(struct dsl_dataset *ds); +void dsl_prop_notify_all(struct dsl_dir *dd); +boolean_t dsl_prop_hascb(struct dsl_dataset *ds); int dsl_prop_get(const char *ddname, const char *propname, int intsz, int numints, void *buf, char *setpoint); int dsl_prop_get_integer(const char *ddname, const char *propname, uint64_t *valuep, char *setpoint); int dsl_prop_get_all(objset_t *os, nvlist_t **nvp); -int dsl_prop_get_received(objset_t *os, nvlist_t **nvp); +int dsl_prop_get_received(const char *dsname, nvlist_t **nvp); int dsl_prop_get_ds(struct dsl_dataset *ds, const char *propname, int intsz, int numints, void *buf, char *setpoint); +int dsl_prop_get_int_ds(struct dsl_dataset *ds, const char *propname, + uint64_t *valuep); int dsl_prop_get_dd(struct dsl_dir *dd, const char *propname, int intsz, int numints, void *buf, char *setpoint, boolean_t snapshot); -dsl_syncfunc_t dsl_props_set_sync; -int dsl_prop_set(const char *ddname, const char *propname, - zprop_source_t source, int intsz, int numints, const void *buf); -int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl); -void dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val, +void dsl_props_set_sync_impl(struct dsl_dataset *ds, zprop_source_t source, + nvlist_t *props, dmu_tx_t *tx); +void dsl_prop_set_sync_impl(struct dsl_dataset *ds, const char *propname, + zprop_source_t source, int intsz, int numints, const void *value, dmu_tx_t *tx); +int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl); +int dsl_prop_set_int(const char *dsname, const char *propname, + zprop_source_t source, uint64_t value); +int dsl_prop_set_string(const char *dsname, const char *propname, + zprop_source_t source, const char *value); +int dsl_prop_inherit(const char *dsname, const char *propname, + zprop_source_t source); -void dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname, - zprop_source_t source, uint64_t *value); -int dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa); -#ifdef ZFS_DEBUG -void dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa); -#define DSL_PROP_CHECK_PREDICTION(dd, psa) \ - dsl_prop_check_prediction((dd), (psa)) -#else -#define DSL_PROP_CHECK_PREDICTION(dd, psa) /* nothing */ -#endif +int dsl_prop_predict(dsl_dir_t *dd, const char *propname, + zprop_source_t source, uint64_t value, uint64_t *newvalp); /* flag first receive on or after SPA_VERSION_RECVD_PROPS */ -boolean_t dsl_prop_get_hasrecvd(objset_t *os); -void dsl_prop_set_hasrecvd(objset_t *os); -void dsl_prop_unset_hasrecvd(objset_t *os); +boolean_t dsl_prop_get_hasrecvd(const char *dsname); +int dsl_prop_set_hasrecvd(const char *dsname); +void dsl_prop_unset_hasrecvd(const char *dsname); void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value); void dsl_prop_nvlist_add_string(nvlist_t *nv, diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_synctask.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_synctask.h index 9126290cd..ef86fb64c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_synctask.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_synctask.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_DSL_SYNCTASK_H @@ -34,43 +35,26 @@ extern "C" { struct dsl_pool; -typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *); -typedef void (dsl_syncfunc_t)(void *, void *, dmu_tx_t *); +typedef int (dsl_checkfunc_t)(void *, dmu_tx_t *); +typedef void (dsl_syncfunc_t)(void *, dmu_tx_t *); typedef struct dsl_sync_task { - list_node_t dst_node; + txg_node_t dst_node; + struct dsl_pool *dst_pool; + uint64_t dst_txg; + int dst_space; dsl_checkfunc_t *dst_checkfunc; dsl_syncfunc_t *dst_syncfunc; - void *dst_arg1; - void *dst_arg2; - int dst_err; + void *dst_arg; + int dst_error; + boolean_t dst_nowaiter; } dsl_sync_task_t; -typedef struct dsl_sync_task_group { - txg_node_t dstg_node; - list_t dstg_tasks; - struct dsl_pool *dstg_pool; - uint64_t dstg_txg; - int dstg_err; - int dstg_space; - boolean_t dstg_nowaiter; -} dsl_sync_task_group_t; - -dsl_sync_task_group_t *dsl_sync_task_group_create(struct dsl_pool *dp); -void dsl_sync_task_create(dsl_sync_task_group_t *dstg, - dsl_checkfunc_t *, dsl_syncfunc_t *, - void *arg1, void *arg2, int blocks_modified); -int dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg); -void dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx); -void dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg); -void dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx); - -int dsl_sync_task_do(struct dsl_pool *dp, - dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, - void *arg1, void *arg2, int blocks_modified); -void dsl_sync_task_do_nowait(struct dsl_pool *dp, - dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, - void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx); +void dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx); +int dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc, + dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified); +void dsl_sync_task_nowait(struct dsl_pool *dp, dsl_syncfunc_t *syncfunc, + void *arg, int blocks_modified, dmu_tx_t *tx); #ifdef __cplusplus } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_userhold.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_userhold.h new file mode 100644 index 000000000..56c6c8f47 --- /dev/null +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_userhold.h @@ -0,0 +1,57 @@ + +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_DSL_USERHOLD_H +#define _SYS_DSL_USERHOLD_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct dsl_pool; +struct dsl_dataset; +struct dmu_tx; + +int dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, + nvlist_t *errlist); +int dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist); +int dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl); +void dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj, + const char *htag); +int dsl_dataset_user_hold_check_one(struct dsl_dataset *ds, const char *htag, + boolean_t temphold, struct dmu_tx *tx); +void dsl_dataset_user_hold_sync_one(struct dsl_dataset *ds, const char *htag, + minor_t minor, uint64_t now, struct dmu_tx *tx); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DSL_USERHOLD_H */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h index 2cf4d2b48..d6c0bf4c9 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_METASLAB_H @@ -56,6 +56,7 @@ extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now); extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg); +extern void metaslab_check_free(spa_t *spa, const blkptr_t *bp); extern metaslab_class_t *metaslab_class_create(spa_t *spa, space_map_ops_t *ops); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/refcount.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/refcount.h index 75e8e0052..ee927c1c1 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/refcount.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/refcount.h @@ -53,15 +53,17 @@ typedef struct reference { typedef struct refcount { kmutex_t rc_mtx; + boolean_t rc_tracked; list_t rc_list; list_t rc_removed; uint64_t rc_count; uint64_t rc_removed_count; } refcount_t; -/* Note: refcount_t must be initialized with refcount_create() */ +/* Note: refcount_t must be initialized with refcount_create[_untracked]() */ void refcount_create(refcount_t *rc); +void refcount_create_untracked(refcount_t *rc); void refcount_destroy(refcount_t *rc); void refcount_destroy_many(refcount_t *rc, uint64_t number); int refcount_is_zero(refcount_t *rc); @@ -82,6 +84,7 @@ typedef struct refcount { } refcount_t; #define refcount_create(rc) ((rc)->rc_count = 0) +#define refcount_create_untracked(rc) ((rc)->rc_count = 0) #define refcount_destroy(rc) ((rc)->rc_count = 0) #define refcount_destroy_many(rc, number) ((rc)->rc_count = 0) #define refcount_is_zero(rc) ((rc)->rc_count == 0) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h index 760fc822d..fb44fbe18 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/rrwlock.h @@ -22,12 +22,13 @@ * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ #ifndef _SYS_RR_RW_LOCK_H #define _SYS_RR_RW_LOCK_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -56,6 +57,7 @@ typedef struct rrwlock { refcount_t rr_anon_rcount; refcount_t rr_linked_rcount; boolean_t rr_writer_wanted; + boolean_t rr_track_all; } rrwlock_t; /* @@ -63,14 +65,19 @@ typedef struct rrwlock { * 'tag' must be the same in a rrw_enter() as in its * corresponding rrw_exit(). */ -void rrw_init(rrwlock_t *rrl); +void rrw_init(rrwlock_t *rrl, boolean_t track_all); void rrw_destroy(rrwlock_t *rrl); void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag); +void rrw_enter_read(rrwlock_t *rrl, void *tag); +void rrw_enter_write(rrwlock_t *rrl); void rrw_exit(rrwlock_t *rrl, void *tag); boolean_t rrw_held(rrwlock_t *rrl, krw_t rw); +void rrw_tsd_destroy(void *arg); #define RRW_READ_HELD(x) rrw_held(x, RW_READER) #define RRW_WRITE_HELD(x) rrw_held(x, RW_WRITER) +#define RRW_LOCK_HELD(x) \ + (rrw_held(x, RW_WRITER) || rrw_held(x, RW_READER)) #ifdef __cplusplus } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h index de4bf4e44..a905cb4a8 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h @@ -52,6 +52,7 @@ typedef struct spa_aux_vdev spa_aux_vdev_t; typedef struct ddt ddt_t; typedef struct ddt_entry ddt_entry_t; struct dsl_pool; +struct dsl_dataset; /* * General-purpose 32-bit and 64-bit bitfield encodings. @@ -418,7 +419,7 @@ extern int spa_open_rewind(const char *pool, spa_t **, void *tag, extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot, size_t buflen); extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props, - const char *history_str, nvlist_t *zplprops); + nvlist_t *zplprops); #if defined(sun) extern int spa_import_rootpool(char *devpath, char *devid); #else @@ -630,31 +631,20 @@ extern int spa_mode(spa_t *spa); extern uint64_t zfs_strtonum(const char *str, char **nptr); #define strtonum(str, nptr) zfs_strtonum((str), (nptr)) -/* history logging */ -typedef enum history_log_type { - LOG_CMD_POOL_CREATE, - LOG_CMD_NORMAL, - LOG_INTERNAL -} history_log_type_t; - -typedef struct history_arg { - char *ha_history_str; - history_log_type_t ha_log_type; - history_internal_events_t ha_event; - char *ha_zone; - uid_t ha_uid; -} history_arg_t; - extern char *spa_his_ievent_table[]; extern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx); extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read, char *his_buf); -extern int spa_history_log(spa_t *spa, const char *his_buf, - history_log_type_t what); -extern void spa_history_log_internal(history_internal_events_t event, - spa_t *spa, dmu_tx_t *tx, const char *fmt, ...); -extern void spa_history_log_version(spa_t *spa, history_internal_events_t evt); +extern int spa_history_log(spa_t *spa, const char *his_buf); +extern int spa_history_log_nvl(spa_t *spa, nvlist_t *nvl); +extern void spa_history_log_version(spa_t *spa, const char *operation); +extern void spa_history_log_internal(spa_t *spa, const char *operation, + dmu_tx_t *tx, const char *fmt, ...); +extern void spa_history_log_internal_ds(struct dsl_dataset *ds, const char *op, + dmu_tx_t *tx, const char *fmt, ...); +extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, + dmu_tx_t *tx, const char *fmt, ...); /* error handling */ struct zbookmark; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h index 2da50fb7b..64223daf6 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/space_map.h @@ -149,6 +149,8 @@ extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size); extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size); extern boolean_t space_map_contains(space_map_t *sm, uint64_t start, uint64_t size); +extern space_seg_t *space_map_find(space_map_t *sm, uint64_t start, + uint64_t size, avl_index_t *wherep); extern void space_map_swap(space_map_t **msrc, space_map_t **mdest); extern void space_map_vacate(space_map_t *sm, space_map_func_t *func, space_map_t *mdest); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h index 1287f09c7..2df33f0fb 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h @@ -45,9 +45,6 @@ extern "C" { /* Number of txgs worth of frees we defer adding to in-core spacemaps */ #define TXG_DEFER_SIZE 2 -#define TXG_WAIT 1ULL -#define TXG_NOWAIT 2ULL - typedef struct tx_cpu tx_cpu_t; typedef struct txg_handle { @@ -119,11 +116,11 @@ extern boolean_t txg_sync_waiting(struct dsl_pool *dp); extern void txg_list_create(txg_list_t *tl, size_t offset); extern void txg_list_destroy(txg_list_t *tl); extern boolean_t txg_list_empty(txg_list_t *tl, uint64_t txg); -extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg); -extern int txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg); +extern boolean_t txg_list_add(txg_list_t *tl, void *p, uint64_t txg); +extern boolean_t txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg); extern void *txg_list_remove(txg_list_t *tl, uint64_t txg); extern void *txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg); -extern int txg_list_member(txg_list_t *tl, void *p, uint64_t txg); +extern boolean_t txg_list_member(txg_list_t *tl, void *p, uint64_t txg); extern void *txg_list_head(txg_list_t *tl, uint64_t txg); extern void *txg_list_next(txg_list_t *tl, void *p, uint64_t txg); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h index d21fd52cf..9825d06ae 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h @@ -104,8 +104,7 @@ struct vdev_queue { avl_tree_t vq_read_tree; avl_tree_t vq_write_tree; avl_tree_t vq_pending_tree; - uint64_t vq_io_complete_ts; - uint64_t vq_io_delta_ts; + hrtime_t vq_io_complete_ts; kmutex_t vq_lock; }; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfeature.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfeature.h index 481e85b1b..1a081e422 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfeature.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfeature.h @@ -26,7 +26,6 @@ #ifndef _SYS_ZFEATURE_H #define _SYS_ZFEATURE_H -#include #include #include "zfeature_common.h" @@ -34,14 +33,18 @@ extern "C" { #endif -extern boolean_t feature_is_supported(objset_t *os, uint64_t obj, +struct spa; +struct dmu_tx; +struct objset; + +extern boolean_t feature_is_supported(struct objset *os, uint64_t obj, uint64_t desc_obj, nvlist_t *unsup_feat, nvlist_t *enabled_feat); -struct spa; -extern void spa_feature_create_zap_objects(struct spa *, dmu_tx_t *); -extern void spa_feature_enable(struct spa *, zfeature_info_t *, dmu_tx_t *); -extern void spa_feature_incr(struct spa *, zfeature_info_t *, dmu_tx_t *); -extern void spa_feature_decr(struct spa *, zfeature_info_t *, dmu_tx_t *); +extern void spa_feature_create_zap_objects(struct spa *, struct dmu_tx *); +extern void spa_feature_enable(struct spa *, zfeature_info_t *, + struct dmu_tx *); +extern void spa_feature_incr(struct spa *, zfeature_info_t *, struct dmu_tx *); +extern void spa_feature_decr(struct spa *, zfeature_info_t *, struct dmu_tx *); extern boolean_t spa_feature_is_enabled(struct spa *, zfeature_info_t *); extern boolean_t spa_feature_is_active(struct spa *, zfeature_info_t *); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h index 43d037d7b..7907fa597 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h @@ -50,11 +50,13 @@ extern "C" { extern int zfs_flags; -#define ZFS_DEBUG_DPRINTF 0x0001 -#define ZFS_DEBUG_DBUF_VERIFY 0x0002 -#define ZFS_DEBUG_DNODE_VERIFY 0x0004 -#define ZFS_DEBUG_SNAPNAMES 0x0008 -#define ZFS_DEBUG_MODIFY 0x0010 +#define ZFS_DEBUG_DPRINTF (1<<0) +#define ZFS_DEBUG_DBUF_VERIFY (1<<1) +#define ZFS_DEBUG_DNODE_VERIFY (1<<2) +#define ZFS_DEBUG_SNAPNAMES (1<<3) +#define ZFS_DEBUG_MODIFY (1<<4) +#define ZFS_DEBUG_SPA (1<<5) +#define ZFS_DEBUG_ZIO_FREE (1<<6) #ifdef ZFS_DEBUG extern void __dprintf(const char *file, const char *func, diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h index 5c333f7bd..b27f11502 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_ZFS_IOCTL_H @@ -40,6 +41,15 @@ extern "C" { #endif +/* + * The structures in this file are passed between userland and the + * kernel. Userland may be running a 32-bit process, while the kernel + * is 64-bit. Therefore, these structures need to compile the same in + * 32-bit and 64-bit. This means not using type "long", and adding + * explicit padding so that the 32-bit structure will not be packed more + * tightly than the 64-bit structure (which requires 64-bit alignment). + */ + /* * Property values for snapdir */ @@ -284,22 +294,28 @@ typedef enum zfs_case { } zfs_case_t; typedef struct zfs_cmd { - char zc_name[MAXPATHLEN]; + char zc_name[MAXPATHLEN]; /* name of pool or dataset */ + uint64_t zc_nvlist_src; /* really (char *) */ + uint64_t zc_nvlist_src_size; + uint64_t zc_nvlist_dst; /* really (char *) */ + uint64_t zc_nvlist_dst_size; + boolean_t zc_nvlist_dst_filled; /* put an nvlist in dst? */ + int zc_pad2; + + /* + * The following members are for legacy ioctls which haven't been + * converted to the new method. + */ + uint64_t zc_history; /* really (char *) */ char zc_value[MAXPATHLEN * 2]; char zc_string[MAXNAMELEN]; - char zc_top_ds[MAXPATHLEN]; uint64_t zc_guid; uint64_t zc_nvlist_conf; /* really (char *) */ uint64_t zc_nvlist_conf_size; - uint64_t zc_nvlist_src; /* really (char *) */ - uint64_t zc_nvlist_src_size; - uint64_t zc_nvlist_dst; /* really (char *) */ - uint64_t zc_nvlist_dst_size; uint64_t zc_cookie; uint64_t zc_objset_type; uint64_t zc_perm_action; - uint64_t zc_history; /* really (char *) */ - uint64_t zc_history_len; + uint64_t zc_history_len; uint64_t zc_history_offset; uint64_t zc_obj; uint64_t zc_iflags; /* internal to zfs(7fs) */ @@ -344,7 +360,8 @@ extern int zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr); extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr); extern int zfs_busy(void); -extern int zfs_unmount_snap(const char *, void *); +extern void zfs_unmount_snap(const char *); +extern void zfs_destroy_unmount_origin(const char *); /* * ZFS minor numbers can refer to either a control device instance or diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h index 6281f8e2a..0014135c0 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_FS_ZFS_ZNODE_H @@ -259,7 +260,7 @@ VTOZ(vnode_t *vp) */ #define ZFS_ENTER(zfsvfs) \ { \ - rrw_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG); \ + rrw_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \ if ((zfsvfs)->z_unmounted) { \ ZFS_EXIT(zfsvfs); \ return (EIO); \ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h index e52c65bb7..a212e4f0e 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h @@ -411,8 +411,8 @@ extern int zil_check_log_chain(const char *osname, void *txarg); extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx); extern void zil_clean(zilog_t *zilog, uint64_t synced_txg); -extern int zil_suspend(zilog_t *zilog); -extern void zil_resume(zilog_t *zilog); +extern int zil_suspend(const char *osname, void **cookiep); +extern void zil_resume(void *cookie); extern void zil_add_block(zilog_t *zilog, const blkptr_t *bp); extern int zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h index d6ae38ff9..13c901ead 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h @@ -408,7 +408,7 @@ struct zio { uint64_t io_offset; uint64_t io_deadline; - uint64_t io_timestamp; + hrtime_t io_timestamp; avl_node_t io_offset_node; avl_node_t io_deadline_node; avl_tree_t *io_vdev_tree; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c index ce0579cff..7be37dbf5 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c @@ -585,6 +585,8 @@ txg_wait_synced(dsl_pool_t *dp, uint64_t txg) { tx_state_t *tx = &dp->dp_tx; + ASSERT(!dsl_pool_config_held(dp)); + mutex_enter(&tx->tx_sync_lock); ASSERT(tx->tx_threads == 2); if (txg == 0) @@ -608,6 +610,8 @@ txg_wait_open(dsl_pool_t *dp, uint64_t txg) { tx_state_t *tx = &dp->dp_tx; + ASSERT(!dsl_pool_config_held(dp)); + mutex_enter(&tx->tx_sync_lock); ASSERT(tx->tx_threads == 2); if (txg == 0) @@ -673,42 +677,43 @@ txg_list_empty(txg_list_t *tl, uint64_t txg) } /* - * Add an entry to the list. - * Returns 0 if it's a new entry, 1 if it's already there. + * Add an entry to the list (unless it's already on the list). + * Returns B_TRUE if it was actually added. */ -int +boolean_t txg_list_add(txg_list_t *tl, void *p, uint64_t txg) { int t = txg & TXG_MASK; txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset); - int already_on_list; + boolean_t add; mutex_enter(&tl->tl_lock); - already_on_list = tn->tn_member[t]; - if (!already_on_list) { + add = (tn->tn_member[t] == 0); + if (add) { tn->tn_member[t] = 1; tn->tn_next[t] = tl->tl_head[t]; tl->tl_head[t] = tn; } mutex_exit(&tl->tl_lock); - return (already_on_list); + return (add); } /* - * Add an entry to the end of the list (walks list to find end). - * Returns 0 if it's a new entry, 1 if it's already there. + * Add an entry to the end of the list, unless it's already on the list. + * (walks list to find end) + * Returns B_TRUE if it was actually added. */ -int +boolean_t txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg) { int t = txg & TXG_MASK; txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset); - int already_on_list; + boolean_t add; mutex_enter(&tl->tl_lock); - already_on_list = tn->tn_member[t]; - if (!already_on_list) { + add = (tn->tn_member[t] == 0); + if (add) { txg_node_t **tp; for (tp = &tl->tl_head[t]; *tp != NULL; tp = &(*tp)->tn_next[t]) @@ -720,7 +725,7 @@ txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg) } mutex_exit(&tl->tl_lock); - return (already_on_list); + return (add); } /* @@ -771,13 +776,13 @@ txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg) return (NULL); } -int +boolean_t txg_list_member(txg_list_t *tl, void *p, uint64_t txg) { int t = txg & TXG_MASK; txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset); - return (tn->tn_member[t]); + return (tn->tn_member[t] != 0); } /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/uberblock.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/uberblock.c index 692cda137..a07dc00ae 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/uberblock.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/uberblock.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -33,7 +34,7 @@ uberblock_verify(uberblock_t *ub) byteswap_uint64_array(ub, sizeof (uberblock_t)); if (ub->ub_magic != UBERBLOCK_MAGIC) - return (EINVAL); + return (SET_ERROR(EINVAL)); return (0); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c index 03f8a11df..631254cb7 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright 2013 Martin Matuska . All rights reserved. */ @@ -356,10 +356,10 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0) - return (EINVAL); + return (SET_ERROR(EINVAL)); if ((ops = vdev_getops(type)) == NULL) - return (EINVAL); + return (SET_ERROR(EINVAL)); /* * If this is a load, get the vdev guid from the nvlist. @@ -370,26 +370,26 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, &label_id) || label_id != id) - return (EINVAL); + return (SET_ERROR(EINVAL)); if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) - return (EINVAL); + return (SET_ERROR(EINVAL)); } else if (alloctype == VDEV_ALLOC_SPARE) { if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) - return (EINVAL); + return (SET_ERROR(EINVAL)); } else if (alloctype == VDEV_ALLOC_L2CACHE) { if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) - return (EINVAL); + return (SET_ERROR(EINVAL)); } else if (alloctype == VDEV_ALLOC_ROOTPOOL) { if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) - return (EINVAL); + return (SET_ERROR(EINVAL)); } /* * The first allocated vdev must be of type 'root'. */ if (ops != &vdev_root_ops && spa->spa_root_vdev == NULL) - return (EINVAL); + return (SET_ERROR(EINVAL)); /* * Determine whether we're a log vdev. @@ -397,10 +397,10 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, islog = 0; (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &islog); if (islog && spa_version(spa) < SPA_VERSION_SLOGS) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); if (ops == &vdev_hole_ops && spa_version(spa) < SPA_VERSION_HOLES) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); /* * Set the nparity property for RAID-Z vdevs. @@ -410,24 +410,24 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, &nparity) == 0) { if (nparity == 0 || nparity > VDEV_RAIDZ_MAXPARITY) - return (EINVAL); + return (SET_ERROR(EINVAL)); /* * Previous versions could only support 1 or 2 parity * device. */ if (nparity > 1 && spa_version(spa) < SPA_VERSION_RAIDZ2) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); if (nparity > 2 && spa_version(spa) < SPA_VERSION_RAIDZ3) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } else { /* * We require the parity to be specified for SPAs that * support multiple parity levels. */ if (spa_version(spa) >= SPA_VERSION_RAIDZ2) - return (EINVAL); + return (SET_ERROR(EINVAL)); /* * Otherwise, we default to 1 parity device for RAID-Z. */ @@ -945,7 +945,7 @@ vdev_probe_done(zio_t *zio) ASSERT(zio->io_error != 0); zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE, spa, vd, NULL, 0, 0); - zio->io_error = ENXIO; + zio->io_error = SET_ERROR(ENXIO); } mutex_enter(&vd->vdev_probe_lock); @@ -955,7 +955,7 @@ vdev_probe_done(zio_t *zio) while ((pio = zio_walk_parents(zio)) != NULL) if (!vdev_accessible(vd, pio)) - pio->io_error = ENXIO; + pio->io_error = SET_ERROR(ENXIO); kmem_free(vps, sizeof (*vps)); } @@ -1140,11 +1140,11 @@ vdev_open(vdev_t *vd) vd->vdev_label_aux == VDEV_AUX_EXTERNAL); vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED, vd->vdev_label_aux); - return (ENXIO); + return (SET_ERROR(ENXIO)); } else if (vd->vdev_offline) { ASSERT(vd->vdev_children == 0); vdev_set_state(vd, B_TRUE, VDEV_STATE_OFFLINE, VDEV_AUX_NONE); - return (ENXIO); + return (SET_ERROR(ENXIO)); } error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, &ashift); @@ -1179,7 +1179,7 @@ vdev_open(vdev_t *vd) vd->vdev_label_aux == VDEV_AUX_EXTERNAL); vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED, vd->vdev_label_aux); - return (ENXIO); + return (SET_ERROR(ENXIO)); } if (vd->vdev_degraded) { @@ -1211,7 +1211,7 @@ vdev_open(vdev_t *vd) if (osize < SPA_MINDEVSIZE) { vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, VDEV_AUX_TOO_SMALL); - return (EOVERFLOW); + return (SET_ERROR(EOVERFLOW)); } psize = osize; asize = osize - (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE); @@ -1222,7 +1222,7 @@ vdev_open(vdev_t *vd) (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE)) { vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, VDEV_AUX_TOO_SMALL); - return (EOVERFLOW); + return (SET_ERROR(EOVERFLOW)); } psize = 0; asize = osize; @@ -1237,7 +1237,7 @@ vdev_open(vdev_t *vd) if (asize < vd->vdev_min_asize) { vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, VDEV_AUX_BAD_LABEL); - return (EINVAL); + return (SET_ERROR(EINVAL)); } if (vd->vdev_asize == 0) { @@ -1319,7 +1319,7 @@ vdev_validate(vdev_t *vd, boolean_t strict) for (int c = 0; c < vd->vdev_children; c++) if (vdev_validate(vd->vdev_child[c], strict) != 0) - return (EBADF); + return (SET_ERROR(EBADF)); /* * If the device has already failed, or was marked offline, don't do @@ -1405,7 +1405,7 @@ vdev_validate(vdev_t *vd, boolean_t strict) if (!(spa->spa_import_flags & ZFS_IMPORT_VERBATIM) && spa_load_state(spa) == SPA_LOAD_OPEN && state != POOL_STATE_ACTIVE) - return (EBADF); + return (SET_ERROR(EBADF)); /* * If we were able to open and validate a vdev that was @@ -3191,10 +3191,10 @@ vdev_deadman(vdev_t *vd) * the spa_deadman_synctime we panic the system. */ fio = avl_first(&vq->vq_pending_tree); - delta = ddi_get_lbolt64() - fio->io_timestamp; - if (delta > NSEC_TO_TICK(spa_deadman_synctime(spa))) { - zfs_dbgmsg("SLOW IO: zio timestamp %llu, " - "delta %llu, last io %llu", + delta = gethrtime() - fio->io_timestamp; + if (delta > spa_deadman_synctime(spa)) { + zfs_dbgmsg("SLOW IO: zio timestamp %lluns, " + "delta %lluns, last io %lluns", fio->io_timestamp, delta, vq->vq_io_complete_ts); fm_panic("I/O to pool '%s' appears to be " diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c index ef9d4b5dc..1cc5bf973 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c @@ -22,6 +22,9 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + */ #include #include @@ -271,16 +274,16 @@ vdev_cache_read(zio_t *zio) ASSERT(zio->io_type == ZIO_TYPE_READ); if (zio->io_flags & ZIO_FLAG_DONT_CACHE) - return (EINVAL); + return (SET_ERROR(EINVAL)); if (zio->io_size > zfs_vdev_cache_max) - return (EOVERFLOW); + return (SET_ERROR(EOVERFLOW)); /* * If the I/O straddles two or more cache blocks, don't cache it. */ if (P2BOUNDARY(zio->io_offset, zio->io_size, VCBS)) - return (EXDEV); + return (SET_ERROR(EXDEV)); ASSERT(cache_phase + zio->io_size <= VCBS); @@ -292,7 +295,7 @@ vdev_cache_read(zio_t *zio) if (ve != NULL) { if (ve->ve_missed_update) { mutex_exit(&vc->vc_lock); - return (ESTALE); + return (SET_ERROR(ESTALE)); } if ((fio = ve->ve_fill_io) != NULL) { @@ -315,7 +318,7 @@ vdev_cache_read(zio_t *zio) if (ve == NULL) { mutex_exit(&vc->vc_lock); - return (ENOMEM); + return (SET_ERROR(ENOMEM)); } fio = zio_vdev_delegated_io(zio->io_vd, cache_offset, diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c index 759f0f84f..ed77f544f 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -144,13 +144,15 @@ vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, int error; dev_t dev; int otyp; + boolean_t validate_devid = B_FALSE; + ddi_devid_t devid; /* * We must have a pathname, and it must be absolute. */ if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; - return (EINVAL); + return (SET_ERROR(EINVAL)); } /* @@ -185,14 +187,13 @@ vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid, &dvd->vd_minor) != 0) { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; - return (EINVAL); + return (SET_ERROR(EINVAL)); } } error = EINVAL; /* presume failure */ if (vd->vdev_path != NULL) { - ddi_devid_t devid; if (vd->vdev_wholedisk == -1ULL) { size_t len = strlen(vd->vdev_path) + 3; @@ -221,7 +222,7 @@ vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, if (error == 0 && vd->vdev_devid != NULL && ldi_get_devid(dvd->vd_lh, &devid) == 0) { if (ddi_devid_compare(devid, dvd->vd_devid) != 0) { - error = EINVAL; + error = SET_ERROR(EINVAL); (void) ldi_close(dvd->vd_lh, spa_mode(spa), kcred); dvd->vd_lh = NULL; @@ -241,9 +242,10 @@ vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, * If we were unable to open by path, or the devid check fails, open by * devid instead. */ - if (error != 0 && vd->vdev_devid != NULL) + if (error != 0 && vd->vdev_devid != NULL) { error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); + } /* * If all else fails, then try opening by physical path (if available) @@ -252,6 +254,9 @@ vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, * level vdev validation will prevent us from opening the wrong device. */ if (error) { + if (vd->vdev_devid != NULL) + validate_devid = B_TRUE; + if (vd->vdev_physpath != NULL && (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != NODEV) error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa), @@ -272,6 +277,25 @@ vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, return (error); } + /* + * Now that the device has been successfully opened, update the devid + * if necessary. + */ + if (validate_devid && spa_writeable(spa) && + ldi_get_devid(dvd->vd_lh, &devid) == 0) { + if (ddi_devid_compare(devid, dvd->vd_devid) != 0) { + char *vd_devid; + + vd_devid = ddi_devid_str_encode(devid, dvd->vd_minor); + zfs_dbgmsg("vdev %s: update devid from %s, " + "to %s", vd->vdev_path, vd->vdev_devid, vd_devid); + spa_strfree(vd->vdev_devid); + vd->vdev_devid = spa_strdup(vd_devid); + ddi_devid_str_free(vd_devid); + } + ddi_devid_free(devid); + } + /* * Once a device is opened, verify that the physical device path (if * available) is up to date. @@ -303,7 +327,7 @@ skip_open: */ if (ldi_get_size(dvd->vd_lh, psize) != 0) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; - return (EINVAL); + return (SET_ERROR(EINVAL)); } /* @@ -374,7 +398,7 @@ vdev_disk_physio(ldi_handle_t vd_lh, caddr_t data, size_t size, int error = 0; if (vd_lh == NULL) - return (EINVAL); + return (SET_ERROR(EINVAL)); ASSERT(flags & B_READ || flags & B_WRITE); @@ -388,7 +412,7 @@ vdev_disk_physio(ldi_handle_t vd_lh, caddr_t data, size_t size, error = ldi_strategy(vd_lh, bp); ASSERT(error == 0); if ((error = biowait(bp)) == 0 && bp->b_resid != 0) - error = EIO; + error = SET_ERROR(EIO); freerbuf(bp); return (error); @@ -408,7 +432,7 @@ vdev_disk_io_intr(buf_t *bp) zio->io_error = (geterror(bp) != 0 ? EIO : 0); if (zio->io_error == 0 && bp->b_resid != 0) - zio->io_error = EIO; + zio->io_error = SET_ERROR(EIO); kmem_free(vdb, sizeof (vdev_disk_buf_t)); @@ -449,7 +473,7 @@ vdev_disk_io_start(zio_t *zio) if (zio->io_type == ZIO_TYPE_IOCTL) { /* XXPOLICY */ if (!vdev_readable(vd)) { - zio->io_error = ENXIO; + zio->io_error = SET_ERROR(ENXIO); return (ZIO_PIPELINE_CONTINUE); } @@ -461,7 +485,7 @@ vdev_disk_io_start(zio_t *zio) break; if (vd->vdev_nowritecache) { - zio->io_error = ENOTSUP; + zio->io_error = SET_ERROR(ENOTSUP); break; } @@ -499,7 +523,7 @@ vdev_disk_io_start(zio_t *zio) break; default: - zio->io_error = ENOTSUP; + zio->io_error = SET_ERROR(ENOTSUP); } return (ZIO_PIPELINE_CONTINUE); @@ -604,7 +628,7 @@ vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config) if (ldi_get_size(vd_lh, &s)) { (void) ldi_close(vd_lh, FREAD, kcred); - return (EIO); + return (SET_ERROR(EIO)); } size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t); @@ -646,7 +670,7 @@ vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config) kmem_free(label, sizeof (vdev_label_t)); (void) ldi_close(vd_lh, FREAD, kcred); if (*config == NULL) - error = EIDRM; + error = SET_ERROR(EIDRM); return (error); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c index f03a9234c..cab5b44bd 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -61,7 +61,7 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, */ if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; - return (EINVAL); + return (SET_ERROR(EINVAL)); } /* @@ -101,13 +101,17 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, * Make sure it's a regular file. */ if (vp->v_type != VREG) { +#ifdef __FreeBSD__ (void) VOP_CLOSE(vp, spa_mode(vd->vdev_spa), 1, 0, kcred, NULL); +#endif vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; +#ifdef __FreeBSD__ kmem_free(vd->vdev_tsd, sizeof (vdev_file_t)); vd->vdev_tsd = NULL; - return (ENODEV); - } #endif + return (SET_ERROR(ENODEV)); + } +#endif /* _KERNEL */ skip_open: /* @@ -160,7 +164,7 @@ vdev_file_io_start(zio_t *zio) ssize_t resid; if (!vdev_readable(vd)) { - zio->io_error = ENXIO; + zio->io_error = SET_ERROR(ENXIO); return (ZIO_PIPELINE_CONTINUE); } @@ -174,7 +178,7 @@ vdev_file_io_start(zio_t *zio) kcred, NULL); break; default: - zio->io_error = ENOTSUP; + zio->io_error = SET_ERROR(ENOTSUP); } return (ZIO_PIPELINE_CONTINUE); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c index e573feb5d..5ee7c7d15 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ /* @@ -663,14 +663,14 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) * Dead vdevs cannot be initialized. */ if (vdev_is_dead(vd)) - return (EIO); + return (SET_ERROR(EIO)); /* * Determine if the vdev is in use. */ if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPLIT && vdev_inuse(vd, crtxg, reason, &spare_guid, &l2cache_guid)) - return (EBUSY); + return (SET_ERROR(EBUSY)); /* * If this is a request to add or replace a spare or l2cache device @@ -1078,7 +1078,7 @@ vdev_label_sync_top_done(zio_t *zio) uint64_t *good_writes = zio->io_private; if (*good_writes == 0) - zio->io_error = EIO; + zio->io_error = SET_ERROR(EIO); kmem_free(good_writes, sizeof (uint64_t)); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c index a28ca3e39..fa3bc02a2 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -139,7 +139,7 @@ vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, if (vd->vdev_children == 0) { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; - return (EINVAL); + return (SET_ERROR(EINVAL)); } vdev_open_children(vd); @@ -234,14 +234,14 @@ vdev_mirror_child_select(zio_t *zio) if (mc->mc_tried || mc->mc_skipped) continue; if (!vdev_readable(mc->mc_vd)) { - mc->mc_error = ENXIO; + mc->mc_error = SET_ERROR(ENXIO); mc->mc_tried = 1; /* don't even try */ mc->mc_skipped = 1; continue; } if (!vdev_dtl_contains(mc->mc_vd, DTL_MISSING, txg, 1)) return (c); - mc->mc_error = ESTALE; + mc->mc_error = SET_ERROR(ESTALE); mc->mc_skipped = 1; mc->mc_speculative = 1; } @@ -426,7 +426,7 @@ vdev_mirror_io_done(zio_t *zio) !vdev_dtl_contains(mc->mc_vd, DTL_PARTIAL, zio->io_txg, 1)) continue; - mc->mc_error = ESTALE; + mc->mc_error = SET_ERROR(ESTALE); } zio_nowait(zio_vdev_child_io(zio, zio->io_bp, diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c index 3bd8c90e0..b9eb99d18 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ /* @@ -69,7 +69,7 @@ vdev_missing_close(vdev_t *vd) static int vdev_missing_io_start(zio_t *zio) { - zio->io_error = ENOTSUP; + zio->io_error = SET_ERROR(ENOTSUP); return (ZIO_PIPELINE_CONTINUE); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c index 8bde2ebcf..4f84312e1 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c @@ -44,8 +44,11 @@ int zfs_vdev_max_pending = 10; int zfs_vdev_min_pending = 4; -/* deadline = pri + ddi_get_lbolt64() >> time_shift) */ -int zfs_vdev_time_shift = 6; +/* + * The deadlines are grouped into buckets based on zfs_vdev_time_shift: + * deadline = pri + gethrtime() >> time_shift) + */ +int zfs_vdev_time_shift = 29; /* each bucket is 0.537 seconds */ /* exponential I/O issue ramp-up rate */ int zfs_vdev_ramp_rate = 2; @@ -391,7 +394,7 @@ vdev_queue_io(zio_t *zio) mutex_enter(&vq->vq_lock); - zio->io_timestamp = ddi_get_lbolt64(); + zio->io_timestamp = gethrtime(); zio->io_deadline = (zio->io_timestamp >> zfs_vdev_time_shift) + zio->io_priority; @@ -424,8 +427,7 @@ vdev_queue_io_done(zio_t *zio) avl_remove(&vq->vq_pending_tree, zio); - vq->vq_io_complete_ts = ddi_get_lbolt64(); - vq->vq_io_delta_ts = vq->vq_io_complete_ts - zio->io_timestamp; + vq->vq_io_complete_ts = gethrtime(); for (int i = 0; i < zfs_vdev_ramp_rate; i++) { zio_t *nio = vdev_queue_io_to_issue(vq, zfs_vdev_max_pending); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c index 5c3675340..1d6cac0b6 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -1457,7 +1457,7 @@ vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, if (nparity > VDEV_RAIDZ_MAXPARITY || vd->vdev_children < nparity + 1) { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; - return (EINVAL); + return (SET_ERROR(EINVAL)); } vdev_open_children(vd); @@ -1582,7 +1582,7 @@ vdev_raidz_io_start(zio_t *zio) rm->rm_missingdata++; else rm->rm_missingparity++; - rc->rc_error = ENXIO; + rc->rc_error = SET_ERROR(ENXIO); rc->rc_tried = 1; /* don't even try */ rc->rc_skipped = 1; continue; @@ -1592,7 +1592,7 @@ vdev_raidz_io_start(zio_t *zio) rm->rm_missingdata++; else rm->rm_missingparity++; - rc->rc_error = ESTALE; + rc->rc_error = SET_ERROR(ESTALE); rc->rc_skipped = 1; continue; } @@ -1680,7 +1680,7 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm) continue; if (bcmp(orig[c], rc->rc_data, rc->rc_size) != 0) { raidz_checksum_error(zio, rc, orig[c]); - rc->rc_error = ECKSUM; + rc->rc_error = SET_ERROR(ECKSUM); ret++; } zio_buf_free(orig[c], rc->rc_size); @@ -1804,7 +1804,7 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors) if (rc->rc_tried) raidz_checksum_error(zio, rc, orig[i]); - rc->rc_error = ECKSUM; + rc->rc_error = SET_ERROR(ECKSUM); } ret = code; @@ -2080,7 +2080,7 @@ vdev_raidz_io_done(zio_t *zio) * Start checksum ereports for all children which haven't * failed, and the IO wasn't speculative. */ - zio->io_error = ECKSUM; + zio->io_error = SET_ERROR(ECKSUM); if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { for (c = 0; c < rm->rm_cols; c++) { diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c index 1abc79d33..a5442a55e 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -62,7 +62,7 @@ vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, if (vd->vdev_children == 0) { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; - return (EINVAL); + return (SET_ERROR(EINVAL)); } vdev_open_children(vd); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c index 009f0a4dd..0e637d56c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ /* @@ -325,7 +325,7 @@ zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx) * this is already an aberrant condition. */ if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift >= zap_hashbits(zap) - 2) - return (ENOSPC); + return (SET_ERROR(ENOSPC)); if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { /* @@ -714,7 +714,7 @@ static int fzap_checkname(zap_name_t *zn) { if (zn->zn_key_orig_numints * zn->zn_key_intlen > ZAP_MAXNAMELEN) - return (ENAMETOOLONG); + return (SET_ERROR(ENAMETOOLONG)); return (0); } @@ -729,7 +729,7 @@ fzap_checksize(uint64_t integer_size, uint64_t num_integers) case 8: break; default: - return (EINVAL); + return (SET_ERROR(EINVAL)); } if (integer_size * num_integers > ZAP_MAXVALUELEN) @@ -805,7 +805,7 @@ fzap_add_cd(zap_name_t *zn, retry: err = zap_leaf_lookup(l, zn, &zeh); if (err == 0) { - err = EEXIST; + err = SET_ERROR(EEXIST); goto out; } if (err != ENOENT) @@ -996,7 +996,7 @@ zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx) zap_cursor_retrieve(&zc, &za) == 0; (void) zap_cursor_advance(&zc)) { if (za.za_integer_length != 8 || za.za_num_integers != 1) - return (EINVAL); + return (SET_ERROR(EINVAL)); err = zap_add(os, intoobj, za.za_name, 8, 1, &za.za_first_integer, tx); if (err) @@ -1018,7 +1018,7 @@ zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj, zap_cursor_retrieve(&zc, &za) == 0; (void) zap_cursor_advance(&zc)) { if (za.za_integer_length != 8 || za.za_num_integers != 1) - return (EINVAL); + return (SET_ERROR(EINVAL)); err = zap_add(os, intoobj, za.za_name, 8, 1, &value, tx); if (err) @@ -1042,7 +1042,7 @@ zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj, uint64_t delta = 0; if (za.za_integer_length != 8 || za.za_num_integers != 1) - return (EINVAL); + return (SET_ERROR(EINVAL)); err = zap_lookup(os, intoobj, za.za_name, 8, 1, &delta); if (err != 0 && err != ENOENT) @@ -1250,7 +1250,7 @@ fzap_cursor_move_to_key(zap_cursor_t *zc, zap_name_t *zn) zap_entry_handle_t zeh; if (zn->zn_key_orig_numints * zn->zn_key_intlen > ZAP_MAXNAMELEN) - return (ENAMETOOLONG); + return (SET_ERROR(ENAMETOOLONG)); err = zap_deref_leaf(zc->zc_zap, zn->zn_hash, NULL, RW_READER, &l); if (err != 0) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c index b867ac407..30cb7d2df 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ /* @@ -434,7 +435,7 @@ again: goto again; } - return (ENOENT); + return (SET_ERROR(ENOENT)); } /* Return (h1,cd1 >= h2,cd2) */ @@ -492,14 +493,14 @@ zap_entry_read(const zap_entry_handle_t *zeh, ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY); if (le->le_value_intlen > integer_size) - return (EINVAL); + return (SET_ERROR(EINVAL)); zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk, le->le_value_intlen, le->le_value_numints, integer_size, num_integers, buf); if (zeh->zeh_num_integers > num_integers) - return (EOVERFLOW); + return (SET_ERROR(EOVERFLOW)); return (0); } @@ -520,7 +521,7 @@ zap_entry_read_name(zap_t *zap, const zap_entry_handle_t *zeh, uint16_t buflen, le->le_name_numints, 1, buflen, buf); } if (le->le_name_numints > buflen) - return (EOVERFLOW); + return (SET_ERROR(EOVERFLOW)); return (0); } @@ -536,7 +537,7 @@ zap_entry_update(zap_entry_handle_t *zeh, ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * le->le_value_intlen); if ((int)l->l_phys->l_hdr.lh_nfree < delta_chunks) - return (EAGAIN); + return (SET_ERROR(EAGAIN)); zap_leaf_array_free(l, &le->le_value_chunk); le->le_value_chunk = @@ -626,7 +627,7 @@ zap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd, } if (l->l_phys->l_hdr.lh_nfree < numchunks) - return (EAGAIN); + return (SET_ERROR(EAGAIN)); /* make the entry */ chunk = zap_leaf_chunk_alloc(l); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c index 05f890b89..47356c24f 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -791,7 +791,7 @@ zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name, zn = zap_name_alloc(zap, name, mt); if (zn == NULL) { zap_unlockdir(zap); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } if (!zap->zap_ismicro) { @@ -800,12 +800,12 @@ zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name, } else { mze = mze_find(zn); if (mze == NULL) { - err = ENOENT; + err = SET_ERROR(ENOENT); } else { if (num_integers < 1) { - err = EOVERFLOW; + err = SET_ERROR(EOVERFLOW); } else if (integer_size != 8) { - err = EINVAL; + err = SET_ERROR(EINVAL); } else { *(uint64_t *)buf = MZE_PHYS(zap, mze)->mze_value; @@ -837,7 +837,7 @@ zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, zn = zap_name_alloc_uint64(zap, key, key_numints); if (zn == NULL) { zap_unlockdir(zap); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } fzap_prefetch(zn); @@ -860,7 +860,7 @@ zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, zn = zap_name_alloc_uint64(zap, key, key_numints); if (zn == NULL) { zap_unlockdir(zap); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } err = fzap_lookup(zn, integer_size, num_integers, buf, @@ -895,14 +895,14 @@ zap_length(objset_t *os, uint64_t zapobj, const char *name, zn = zap_name_alloc(zap, name, MT_EXACT); if (zn == NULL) { zap_unlockdir(zap); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } if (!zap->zap_ismicro) { err = fzap_length(zn, integer_size, num_integers); } else { mze = mze_find(zn); if (mze == NULL) { - err = ENOENT; + err = SET_ERROR(ENOENT); } else { if (integer_size) *integer_size = 8; @@ -929,7 +929,7 @@ zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, zn = zap_name_alloc_uint64(zap, key, key_numints); if (zn == NULL) { zap_unlockdir(zap); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } err = fzap_length(zn, integer_size, num_integers); zap_name_free(zn); @@ -998,7 +998,7 @@ zap_add(objset_t *os, uint64_t zapobj, const char *key, zn = zap_name_alloc(zap, key, MT_EXACT); if (zn == NULL) { zap_unlockdir(zap); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } if (!zap->zap_ismicro) { err = fzap_add(zn, integer_size, num_integers, val, tx); @@ -1012,7 +1012,7 @@ zap_add(objset_t *os, uint64_t zapobj, const char *key, } else { mze = mze_find(zn); if (mze != NULL) { - err = EEXIST; + err = SET_ERROR(EEXIST); } else { mzap_addent(zn, *intval); } @@ -1039,7 +1039,7 @@ zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, zn = zap_name_alloc_uint64(zap, key, key_numints); if (zn == NULL) { zap_unlockdir(zap); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } err = fzap_add(zn, integer_size, num_integers, val, tx); zap = zn->zn_zap; /* fzap_add() may change zap */ @@ -1075,7 +1075,7 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name, zn = zap_name_alloc(zap, name, MT_EXACT); if (zn == NULL) { zap_unlockdir(zap); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } if (!zap->zap_ismicro) { err = fzap_update(zn, integer_size, num_integers, val, tx); @@ -1120,7 +1120,7 @@ zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, zn = zap_name_alloc_uint64(zap, key, key_numints); if (zn == NULL) { zap_unlockdir(zap); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } err = fzap_update(zn, integer_size, num_integers, val, tx); zap = zn->zn_zap; /* fzap_update() may change zap */ @@ -1151,14 +1151,14 @@ zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name, zn = zap_name_alloc(zap, name, mt); if (zn == NULL) { zap_unlockdir(zap); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } if (!zap->zap_ismicro) { err = fzap_remove(zn, tx); } else { mze = mze_find(zn); if (mze == NULL) { - err = ENOENT; + err = SET_ERROR(ENOENT); } else { zap->zap_m.zap_num_entries--; bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid], @@ -1185,7 +1185,7 @@ zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, zn = zap_name_alloc_uint64(zap, key, key_numints); if (zn == NULL) { zap_unlockdir(zap); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } err = fzap_remove(zn, tx); zap_name_free(zn); @@ -1263,7 +1263,7 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) mzap_ent_t *mze; if (zc->zc_hash == -1ULL) - return (ENOENT); + return (SET_ERROR(ENOENT)); if (zc->zc_zap == NULL) { int hb; @@ -1289,8 +1289,6 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) if (!zc->zc_zap->zap_ismicro) { err = fzap_cursor_retrieve(zc->zc_zap, zc, za); } else { - err = ENOENT; - mze_tofind.mze_hash = zc->zc_hash; mze_tofind.mze_cd = zc->zc_cd; @@ -1313,6 +1311,7 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) err = 0; } else { zc->zc_hash = -1ULL; + err = SET_ERROR(ENOENT); } } rw_exit(&zc->zc_zap->zap_rwlock); @@ -1346,7 +1345,7 @@ zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt) zn = zap_name_alloc(zc->zc_zap, name, mt); if (zn == NULL) { rw_exit(&zc->zc_zap->zap_rwlock); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } if (!zc->zc_zap->zap_ismicro) { @@ -1354,7 +1353,7 @@ zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt) } else { mze = mze_find(zn); if (mze == NULL) { - err = ENOENT; + err = SET_ERROR(ENOENT); goto out; } zc->zc_hash = mze->mze_hash; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c index d532ccc63..638b14139 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c @@ -20,7 +20,7 @@ */ /* - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -226,13 +226,13 @@ feature_get_refcount(objset_t *os, uint64_t read_obj, uint64_t write_obj, * have been allocated yet. Act as though all features are disabled. */ if (zapobj == 0) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); err = zap_lookup(os, zapobj, feature->fi_guid, sizeof (uint64_t), 1, &refcount); if (err != 0) { if (err == ENOENT) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); else return (err); } @@ -273,16 +273,16 @@ feature_do_action(objset_t *os, uint64_t read_obj, uint64_t write_obj, break; case FEATURE_ACTION_INCR: if (error == ENOENT) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); if (refcount == UINT64_MAX) - return (EOVERFLOW); + return (SET_ERROR(EOVERFLOW)); refcount++; break; case FEATURE_ACTION_DECR: if (error == ENOENT) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); if (refcount == 0) - return (EOVERFLOW); + return (SET_ERROR(EOVERFLOW)); refcount--; break; default: diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c index 73219ea31..6fe7aab53 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -681,7 +682,7 @@ zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, vtype_t obj_type, zfs_acl_t *aclp, */ if (zfs_ace_valid(obj_type, aclp, aceptr->z_hdr.z_type, aceptr->z_hdr.z_flags) != B_TRUE) - return (EINVAL); + return (SET_ERROR(EINVAL)); switch (acep->a_type) { case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: @@ -788,7 +789,7 @@ zfs_copy_ace_2_oldace(vtype_t obj_type, zfs_acl_t *aclp, ace_t *acep, */ if (zfs_ace_valid(obj_type, aclp, aceptr->z_type, aceptr->z_flags) != B_TRUE) - return (EINVAL); + return (SET_ERROR(EINVAL)); } *size = (caddr_t)aceptr - (caddr_t)z_acl; return (0); @@ -1122,7 +1123,7 @@ zfs_acl_node_read(znode_t *zp, boolean_t have_lock, zfs_acl_t **aclpp, zfs_acl_node_free(aclnode); /* convert checksum errors into IO errors */ if (error == ECKSUM) - error = EIO; + error = SET_ERROR(EIO); goto done; } @@ -1781,7 +1782,7 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES); if (mask == 0) - return (ENOSYS); + return (SET_ERROR(ENOSYS)); if (error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr)) return (error); @@ -1875,7 +1876,7 @@ zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, vtype_t obj_type, int error; if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0) - return (EINVAL); + return (SET_ERROR(EINVAL)); aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version)); @@ -1937,10 +1938,10 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) uint64_t acl_obj; if (mask == 0) - return (ENOSYS); + return (SET_ERROR(ENOSYS)); if (zp->z_pflags & ZFS_IMMUTABLE) - return (EPERM); + return (SET_ERROR(EPERM)); if (error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr)) return (error); @@ -2037,7 +2038,7 @@ zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode) (zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) && (!IS_DEVVP(ZTOV(zp)) || (IS_DEVVP(ZTOV(zp)) && (v4_mode & WRITE_MASK_ATTRS)))) { - return (EROFS); + return (SET_ERROR(EROFS)); } /* @@ -2048,13 +2049,13 @@ zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode) (zp->z_pflags & (ZFS_READONLY | ZFS_IMMUTABLE))) || (ZTOV(zp)->v_type == VDIR && (zp->z_pflags & ZFS_IMMUTABLE)))) { - return (EPERM); + return (SET_ERROR(EPERM)); } #ifdef sun if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) && (zp->z_pflags & ZFS_NOUNLINK)) { - return (EPERM); + return (SET_ERROR(EPERM)); } #else /* @@ -2070,7 +2071,7 @@ zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode) if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) && (zp->z_pflags & ZFS_AV_QUARANTINED))) { - return (EACCES); + return (SET_ERROR(EACCES)); } return (0); @@ -2178,7 +2179,7 @@ zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode, break; } else { mutex_exit(&zp->z_acl_lock); - return (EIO); + return (SET_ERROR(EIO)); } } @@ -2212,7 +2213,7 @@ zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode, /* Put the found 'denies' back on the working mode */ if (deny_mask) { *working_mode |= deny_mask; - return (EACCES); + return (SET_ERROR(EACCES)); } else if (*working_mode) { return (-1); } @@ -2279,7 +2280,7 @@ zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs, cred_t *cr) { if (*working_mode != ACE_WRITE_DATA) - return (EACCES); + return (SET_ERROR(EACCES)); return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode, check_privs, B_FALSE, cr)); @@ -2295,7 +2296,7 @@ zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr) int error; if (zdp->z_pflags & ZFS_AV_QUARANTINED) - return (EACCES); + return (SET_ERROR(EACCES)); is_attr = ((zdp->z_pflags & ZFS_XATTR) && (ZTOV(zdp)->v_type == VDIR)); @@ -2501,7 +2502,7 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr) * for are still present. If so then return EACCES */ if (working_mode & ~(ZFS_CHECKED_MASKS)) { - error = EACCES; + error = SET_ERROR(EACCES); } } } else if (error == 0) { @@ -2615,7 +2616,7 @@ zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr) */ if (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_NOUNLINK)) - return (EPERM); + return (SET_ERROR(EPERM)); /* * First row @@ -2682,7 +2683,7 @@ zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp, int error; if (szp->z_pflags & ZFS_AV_QUARANTINED) - return (EACCES); + return (SET_ERROR(EACCES)); add_perm = (ZTOV(szp)->v_type == VDIR) ? ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c index 86561e9f6..c155e4764 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c @@ -20,8 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 Pawel Jakub Dawidek . - * All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ /* @@ -73,6 +72,7 @@ #include #include #include +#include #include #include #include @@ -303,7 +303,7 @@ zfsctl_common_open(struct vop_open_args *ap) int flags = ap->a_mode; if (flags & FWRITE) - return (EACCES); + return (SET_ERROR(EACCES)); return (0); } @@ -336,11 +336,11 @@ zfsctl_common_access(ap) #ifdef TODO if (flags & V_ACE_MASK) { if (accmode & ACE_ALL_WRITE_PERMS) - return (EACCES); + return (SET_ERROR(EACCES)); } else { #endif if (accmode & VWRITE) - return (EACCES); + return (SET_ERROR(EACCES)); #ifdef TODO } #endif @@ -397,7 +397,15 @@ zfsctl_common_fid(ap) ZFS_ENTER(zfsvfs); +#ifdef illumos + if (fidp->fid_len < SHORT_FID_LEN) { + fidp->fid_len = SHORT_FID_LEN; + ZFS_EXIT(zfsvfs); + return (SET_ERROR(ENOSPC)); + } +#else fidp->fid_len = SHORT_FID_LEN; +#endif zfid = (zfid_short_t *)fidp; @@ -433,7 +441,7 @@ zfsctl_shares_fid(ap) if (zfsvfs->z_shares_dir == 0) { ZFS_EXIT(zfsvfs); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) { @@ -523,7 +531,7 @@ zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, * No extended attributes allowed under .zfs */ if (flags & LOOKUP_XATTR) - return (EINVAL); + return (SET_ERROR(EINVAL)); ZFS_ENTER(zfsvfs); @@ -631,10 +639,10 @@ zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname) objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os; if (snapshot_namecheck(name, NULL, NULL) != 0) - return (EILSEQ); + return (SET_ERROR(EILSEQ)); dmu_objset_name(os, zname); if (strlen(zname) + 1 + strlen(name) >= len) - return (ENAMETOOLONG); + return (SET_ERROR(ENAMETOOLONG)); (void) strcat(zname, "@"); (void) strcat(zname, name); return (0); @@ -743,7 +751,7 @@ zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, zfsvfs_t *zfsvfs; avl_index_t where; char from[MAXNAMELEN], to[MAXNAMELEN]; - char real[MAXNAMELEN]; + char real[MAXNAMELEN], fsname[MAXNAMELEN]; int err; zfsvfs = sdvp->v_vfsp->vfs_data; @@ -762,19 +770,21 @@ zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, ZFS_EXIT(zfsvfs); + dmu_objset_name(zfsvfs->z_os, fsname); + err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from); - if (!err) + if (err == 0) err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to); - if (!err) + if (err == 0) err = zfs_secpolicy_rename_perms(from, to, cr); - if (err) + if (err != 0) return (err); /* * Cannot move snapshots out of the snapdir. */ if (sdvp != tdvp) - return (EINVAL); + return (SET_ERROR(EINVAL)); if (strcmp(snm, tnm) == 0) return (0); @@ -784,10 +794,10 @@ zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, search.se_name = (char *)snm; if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) { mutex_exit(&sdp->sd_lock); - return (ENOENT); + return (SET_ERROR(ENOENT)); } - err = dmu_objset_rename(from, to, 0); + err = dsl_dataset_rename_snapshot(fsname, snm, tnm, 0); if (err == 0) zfsctl_rename_snap(sdp, sep, tnm); @@ -829,9 +839,9 @@ zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, ZFS_EXIT(zfsvfs); err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname); - if (!err) + if (err == 0) err = zfs_secpolicy_destroy_perms(snapname, cr); - if (err) + if (err != 0) return (err); mutex_enter(&sdp->sd_lock); @@ -841,15 +851,12 @@ zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, if (sep) { avl_remove(&sdp->sd_snaps, sep); err = zfsctl_unmount_snap(sep, MS_FORCE, cr); - if (err) { - avl_index_t where; - - if (avl_find(&sdp->sd_snaps, sep, &where) == NULL) - avl_insert(&sdp->sd_snaps, sep, where); - } else - err = dmu_objset_destroy(snapname, B_FALSE); + if (err != 0) + avl_add(&sdp->sd_snaps, sep); + else + err = dsl_destroy_snapshot(snapname, B_FALSE); } else { - err = ENOENT; + err = SET_ERROR(ENOENT); } mutex_exit(&sdp->sd_lock); @@ -873,20 +880,19 @@ zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, static enum uio_seg seg = UIO_SYSSPACE; if (snapshot_namecheck(dirname, NULL, NULL) != 0) - return (EILSEQ); + return (SET_ERROR(EILSEQ)); dmu_objset_name(zfsvfs->z_os, name); *vpp = NULL; err = zfs_secpolicy_snapshot_perms(name, cr); - if (err) + if (err != 0) return (err); if (err == 0) { - err = dmu_objset_snapshot(name, dirname, NULL, NULL, - B_FALSE, B_FALSE, -1); - if (err) + err = dmu_objset_snapshot_one(name, dirname); + if (err != 0) return (err); err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp); } @@ -944,7 +950,7 @@ zfsctl_snapdir_lookup(ap) * No extended attributes allowed under .zfs */ if (flags & LOOKUP_XATTR) - return (EINVAL); + return (SET_ERROR(EINVAL)); ASSERT(ap->a_cnp->cn_namelen < sizeof(nm)); strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1); @@ -959,7 +965,7 @@ zfsctl_snapdir_lookup(ap) * add some flag to domount() to tell it not to do this lookup. */ if (MUTEX_HELD(&sdp->sd_lock)) - return (ENOENT); + return (SET_ERROR(ENOENT)); ZFS_ENTER(zfsvfs); @@ -994,7 +1000,7 @@ zfsctl_snapdir_lookup(ap) *vpp = sep->se_root; VN_HOLD(*vpp); err = traverse(vpp, LK_EXCLUSIVE | LK_RETRY); - if (err) { + if (err != 0) { VN_RELE(*vpp); *vpp = NULL; } else if (*vpp == sep->se_root) { @@ -1021,7 +1027,7 @@ zfsctl_snapdir_lookup(ap) * The requested snapshot is not currently mounted, look it up. */ err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname); - if (err) { + if (err != 0) { mutex_exit(&sdp->sd_lock); ZFS_EXIT(zfsvfs); /* @@ -1033,15 +1039,20 @@ zfsctl_snapdir_lookup(ap) } if (dmu_objset_hold(snapname, FTAG, &snap) != 0) { mutex_exit(&sdp->sd_lock); +#ifdef illumos + ZFS_EXIT(zfsvfs); + return (SET_ERROR(ENOENT)); +#else /* !illumos */ /* Translate errors and add SAVENAME when needed. */ if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE) { err = EJUSTRETURN; cnp->cn_flags |= SAVENAME; } else { - err = ENOENT; + err = SET_ERROR(ENOENT); } ZFS_EXIT(zfsvfs); return (err); +#endif /* !illumos */ } sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP); @@ -1074,8 +1085,20 @@ domount: } mutex_exit(&sdp->sd_lock); ZFS_EXIT(zfsvfs); + +#ifdef illumos + /* + * If we had an error, drop our hold on the vnode and + * zfsctl_snapshot_inactive() will clean up. + */ + if (err != 0) { + VN_RELE(*vpp); + *vpp = NULL; + } +#else if (err != 0) *vpp = NULL; +#endif return (err); } @@ -1108,7 +1131,7 @@ zfsctl_shares_lookup(ap) if (zfsvfs->z_shares_dir == 0) { ZFS_EXIT(zfsvfs); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) error = VOP_LOOKUP(ZTOV(dzp), vpp, cnp); @@ -1133,8 +1156,10 @@ zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp, ZFS_ENTER(zfsvfs); cookie = *offp; + dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG); error = dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id, &cookie, &case_conflict); + dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG); if (error) { ZFS_EXIT(zfsvfs); if (error == ENOENT) { @@ -1187,7 +1212,7 @@ zfsctl_shares_readdir(ap) if (zfsvfs->z_shares_dir == 0) { ZFS_EXIT(zfsvfs); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) { vn_lock(ZTOV(dzp), LK_SHARED | LK_RETRY); @@ -1195,7 +1220,7 @@ zfsctl_shares_readdir(ap) VN_URELE(ZTOV(dzp)); } else { *eofp = 1; - error = ENOENT; + error = SET_ERROR(ENOENT); } ZFS_EXIT(zfsvfs); @@ -1264,7 +1289,7 @@ zfsctl_shares_getattr(ap) ZFS_ENTER(zfsvfs); if (zfsvfs->z_shares_dir == 0) { ZFS_EXIT(zfsvfs); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) { vn_lock(ZTOV(dzp), LK_SHARED | LK_RETRY); @@ -1656,7 +1681,7 @@ zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp) error = traverse(&vp, LK_SHARED | LK_RETRY); if (error == 0) { if (vp == sep->se_root) - error = EINVAL; + error = SET_ERROR(EINVAL); else *zfsvfsp = VTOZ(vp)->z_zfsvfs; } @@ -1666,7 +1691,7 @@ zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp) else VN_RELE(vp); } else { - error = EINVAL; + error = SET_ERROR(EINVAL); mutex_exit(&sdp->sd_lock); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c index 9ef5e3962..ac1aa4240 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -154,7 +155,7 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, if (name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) || zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) - return (EEXIST); + return (SET_ERROR(EEXIST)); /* * Case sensitivity and normalization preferences are set when @@ -225,7 +226,7 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, mutex_exit(&dzp->z_lock); if (!(flag & ZHAVELOCK)) rw_exit(&dzp->z_name_lock); - return (ENOENT); + return (SET_ERROR(ENOENT)); } for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) { if ((u8_strcmp(name, dl->dl_name, 0, cmpflags, @@ -236,7 +237,7 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, mutex_exit(&dzp->z_lock); if (!(flag & ZHAVELOCK)) rw_exit(&dzp->z_name_lock); - return (ENOENT); + return (SET_ERROR(ENOENT)); } if (dl == NULL) { size_t namesize; @@ -289,12 +290,12 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, vp = dnlc_lookup(ZTOV(dzp), name); if (vp == DNLC_NO_VNODE) { VN_RELE(vp); - error = ENOENT; + error = SET_ERROR(ENOENT); } else if (vp) { if (flag & ZNEW) { zfs_dirent_unlock(dl); VN_RELE(vp); - return (EEXIST); + return (SET_ERROR(EEXIST)); } *dlpp = dl; *zpp = VTOZ(vp); @@ -312,7 +313,7 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, } else { if (flag & ZNEW) { zfs_dirent_unlock(dl); - return (EEXIST); + return (SET_ERROR(EEXIST)); } error = zfs_zget(zfsvfs, zoid, zpp); if (error) { @@ -719,7 +720,7 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) if (zp->z_unlinked) { /* no new links to unlinked zp */ ASSERT(!(flag & (ZNEW | ZEXISTS))); mutex_exit(&zp->z_lock); - return (ENOENT); + return (SET_ERROR(ENOENT)); } zp->z_links++; SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, @@ -820,11 +821,11 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, if (!(flag & ZRENAMING)) { if (vn_vfswlock(vp)) /* prevent new mounts on zp */ - return (EBUSY); + return (SET_ERROR(EBUSY)); if (vn_ismntpt(vp)) { /* don't remove mount point */ vn_vfsunlock(vp); - return (EBUSY); + return (SET_ERROR(EBUSY)); } mutex_enter(&zp->z_lock); @@ -832,7 +833,11 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, if (zp_is_dir && !zfs_dirempty(zp)) { mutex_exit(&zp->z_lock); vn_vfsunlock(vp); - return (ENOTEMPTY); +#ifdef illumos + return (SET_ERROR(EEXIST)); +#else + return (SET_ERROR(ENOTEMPTY)); +#endif } /* @@ -943,7 +948,7 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr) return (error); if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { zfs_acl_ids_free(&acl_ids); - return (EDQUOT); + return (SET_ERROR(EDQUOT)); } top: @@ -1026,16 +1031,16 @@ top: if (!(flags & CREATE_XATTR_DIR)) { zfs_dirent_unlock(dl); -#ifdef __FreeBSD__ - return (ENOATTR); +#ifdef illumos + return (SET_ERROR(ENOENT)); #else - return (ENOENT); + return (SET_ERROR(ENOATTR)); #endif } if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { zfs_dirent_unlock(dl); - return (EROFS); + return (SET_ERROR(EROFS)); } /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c index d2e6a18cf..483861e9d 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c @@ -25,11 +25,112 @@ * All rights reserved. * Copyright 2013 Martin Matuska . All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. */ +/* + * ZFS ioctls. + * + * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage + * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool. + * + * There are two ways that we handle ioctls: the legacy way where almost + * all of the logic is in the ioctl callback, and the new way where most + * of the marshalling is handled in the common entry point, zfsdev_ioctl(). + * + * Non-legacy ioctls should be registered by calling + * zfs_ioctl_register() from zfs_ioctl_init(). The ioctl is invoked + * from userland by lzc_ioctl(). + * + * The registration arguments are as follows: + * + * const char *name + * The name of the ioctl. This is used for history logging. If the + * ioctl returns successfully (the callback returns 0), and allow_log + * is true, then a history log entry will be recorded with the input & + * output nvlists. The log entry can be printed with "zpool history -i". + * + * zfs_ioc_t ioc + * The ioctl request number, which userland will pass to ioctl(2). + * The ioctl numbers can change from release to release, because + * the caller (libzfs) must be matched to the kernel. + * + * zfs_secpolicy_func_t *secpolicy + * This function will be called before the zfs_ioc_func_t, to + * determine if this operation is permitted. It should return EPERM + * on failure, and 0 on success. Checks include determining if the + * dataset is visible in this zone, and if the user has either all + * zfs privileges in the zone (SYS_MOUNT), or has been granted permission + * to do this operation on this dataset with "zfs allow". + * + * zfs_ioc_namecheck_t namecheck + * This specifies what to expect in the zfs_cmd_t:zc_name -- a pool + * name, a dataset name, or nothing. If the name is not well-formed, + * the ioctl will fail and the callback will not be called. + * Therefore, the callback can assume that the name is well-formed + * (e.g. is null-terminated, doesn't have more than one '@' character, + * doesn't have invalid characters). + * + * zfs_ioc_poolcheck_t pool_check + * This specifies requirements on the pool state. If the pool does + * not meet them (is suspended or is readonly), the ioctl will fail + * and the callback will not be called. If any checks are specified + * (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME. + * Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED | + * POOL_CHECK_READONLY). + * + * boolean_t smush_outnvlist + * If smush_outnvlist is true, then the output is presumed to be a + * list of errors, and it will be "smushed" down to fit into the + * caller's buffer, by removing some entries and replacing them with a + * single "N_MORE_ERRORS" entry indicating how many were removed. See + * nvlist_smush() for details. If smush_outnvlist is false, and the + * outnvlist does not fit into the userland-provided buffer, then the + * ioctl will fail with ENOMEM. + * + * zfs_ioc_func_t *func + * The callback function that will perform the operation. + * + * The callback should return 0 on success, or an error number on + * failure. If the function fails, the userland ioctl will return -1, + * and errno will be set to the callback's return value. The callback + * will be called with the following arguments: + * + * const char *name + * The name of the pool or dataset to operate on, from + * zfs_cmd_t:zc_name. The 'namecheck' argument specifies the + * expected type (pool, dataset, or none). + * + * nvlist_t *innvl + * The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src. Or + * NULL if no input nvlist was provided. Changes to this nvlist are + * ignored. If the input nvlist could not be deserialized, the + * ioctl will fail and the callback will not be called. + * + * nvlist_t *outnvl + * The output nvlist, initially empty. The callback can fill it in, + * and it will be returned to userland by serializing it into + * zfs_cmd_t:zc_nvlist_dst. If it is non-empty, and serialization + * fails (e.g. because the caller didn't supply a large enough + * buffer), then the overall ioctl will fail. See the + * 'smush_nvlist' argument above for additional behaviors. + * + * There are two typical uses of the output nvlist: + * - To return state, e.g. property values. In this case, + * smush_outnvlist should be false. If the buffer was not large + * enough, the caller will reallocate a larger buffer and try + * the ioctl again. + * + * - To return multiple errors from an ioctl which makes on-disk + * changes. In this case, smush_outnvlist should be true. + * Ioctls which make on-disk modifications should generally not + * use the outnvl if they succeed, because the caller can not + * distinguish between the operation failing, and + * deserialization failing. + */ + #include #include #include @@ -61,6 +162,7 @@ #include #include #include +#include #include #include #include @@ -76,6 +178,9 @@ #include #include #include +#include +#include +#include #include #include "zfs_namecheck.h" @@ -97,8 +202,13 @@ static struct cdev *zfsdev; extern void zfs_init(void); extern void zfs_fini(void); -typedef int zfs_ioc_func_t(zfs_cmd_t *); -typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *); +uint_t zfs_fsyncer_key; +extern uint_t rrw_tsd_key; +static uint_t zfs_allow_log_key; + +typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *); +typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *); +typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *); typedef enum { NO_NAME, @@ -109,15 +219,18 @@ typedef enum { typedef enum { POOL_CHECK_NONE = 1 << 0, POOL_CHECK_SUSPENDED = 1 << 1, - POOL_CHECK_READONLY = 1 << 2 + POOL_CHECK_READONLY = 1 << 2, } zfs_ioc_poolcheck_t; typedef struct zfs_ioc_vec { + zfs_ioc_legacy_func_t *zvec_legacy_func; zfs_ioc_func_t *zvec_func; zfs_secpolicy_func_t *zvec_secpolicy; zfs_ioc_namecheck_t zvec_namecheck; - boolean_t zvec_his_log; + boolean_t zvec_allow_log; zfs_ioc_poolcheck_t zvec_pool_check; + boolean_t zvec_smush_outnvlist; + const char *zvec_name; } zfs_ioc_vec_t; /* This array is indexed by zfs_userquota_prop_t */ @@ -135,15 +248,12 @@ static int zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errors); static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *, boolean_t *); -int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **); +int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *); +static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp); static void zfsdev_close(void *data); -static int zfs_prop_activate_feature(dsl_pool_t *dp, zfeature_info_t *feature); -static int zfs_prop_activate_feature_check(void *arg1, void *arg2, - dmu_tx_t *tx); -static void zfs_prop_activate_feature_sync(void *arg1, void *arg2, - dmu_tx_t *tx); +static int zfs_prop_activate_feature(spa_t *spa, zfeature_info_t *feature); /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */ void @@ -281,7 +391,7 @@ zfs_log_history(zfs_cmd_t *zc) if (spa_open(zc->zc_name, &spa, FTAG) == 0) { if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY) - (void) spa_history_log(spa, buf, LOG_CMD_NORMAL); + (void) spa_history_log(spa, buf); spa_close(spa, FTAG); } history_str_free(buf); @@ -293,7 +403,7 @@ zfs_log_history(zfs_cmd_t *zc) */ /* ARGSUSED */ static int -zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (0); } @@ -304,13 +414,13 @@ zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { if (INGLOBALZONE(curthread) || zone_dataset_visible(zc->zc_name, NULL)) return (0); - return (ENOENT); + return (SET_ERROR(ENOENT)); } static int @@ -324,7 +434,7 @@ zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr) */ if (!INGLOBALZONE(curthread) && !zone_dataset_visible(dataset, &writable)) - return (ENOENT); + return (SET_ERROR(ENOENT)); if (INGLOBALZONE(curthread)) { /* @@ -332,17 +442,17 @@ zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr) * global zone. */ if (secpolicy_zfs(cr) && zoned) - return (EPERM); + return (SET_ERROR(EPERM)); } else { /* * If we are in a local zone, the 'zoned' property must be set. */ if (!zoned) - return (EPERM); + return (SET_ERROR(EPERM)); /* must be writable by this zone */ if (!writable) - return (EPERM); + return (SET_ERROR(EPERM)); } return (0); } @@ -353,7 +463,7 @@ zfs_dozonecheck(const char *dataset, cred_t *cr) uint64_t zoned; if (dsl_prop_get_integer(dataset, "jailed", &zoned, NULL)) - return (ENOENT); + return (SET_ERROR(ENOENT)); return (zfs_dozonecheck_impl(dataset, zoned, cr)); } @@ -363,62 +473,48 @@ zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr) { uint64_t zoned; - rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); - if (dsl_prop_get_ds(ds, "jailed", 8, 1, &zoned, NULL)) { - rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); - return (ENOENT); - } - rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); + if (dsl_prop_get_int_ds(ds, "jailed", &zoned)) + return (SET_ERROR(ENOENT)); return (zfs_dozonecheck_impl(dataset, zoned, cr)); } -/* - * If name ends in a '@', then require recursive permissions. - */ -int -zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr) +static int +zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds, + const char *perm, cred_t *cr) { int error; - boolean_t descendent = B_FALSE; - dsl_dataset_t *ds; - char *at; - - at = strchr(name, '@'); - if (at != NULL && at[1] == '\0') { - *at = '\0'; - descendent = B_TRUE; - } - - error = dsl_dataset_hold(name, FTAG, &ds); - if (at != NULL) - *at = '@'; - if (error != 0) - return (error); error = zfs_dozonecheck_ds(name, ds, cr); if (error == 0) { error = secpolicy_zfs(cr); - if (error) - error = dsl_deleg_access_impl(ds, descendent, perm, cr); + if (error != 0) + error = dsl_deleg_access_impl(ds, perm, cr); } - - dsl_dataset_rele(ds, FTAG); return (error); } -int -zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds, - const char *perm, cred_t *cr) +static int +zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr) { int error; + dsl_dataset_t *ds; + dsl_pool_t *dp; - error = zfs_dozonecheck_ds(name, ds, cr); - if (error == 0) { - error = secpolicy_zfs(cr); - if (error) - error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr); + error = dsl_pool_hold(name, FTAG, &dp); + if (error != 0) + return (error); + + error = dsl_dataset_hold(dp, name, FTAG, &ds); + if (error != 0) { + dsl_pool_rele(dp, FTAG); + return (error); } + + error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr); + + dsl_dataset_rele(ds, FTAG); + dsl_pool_rele(dp, FTAG); return (error); } @@ -441,15 +537,15 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr) /* First get the existing dataset label. */ error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL), 1, sizeof (ds_hexsl), &ds_hexsl, NULL); - if (error) - return (EPERM); + if (error != 0) + return (SET_ERROR(EPERM)); if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0) new_default = TRUE; /* The label must be translatable */ if (!new_default && (hexstr_to_label(strval, &new_sl) != 0)) - return (EINVAL); + return (SET_ERROR(EINVAL)); /* * In a non-global zone, disallow attempts to set a label that @@ -458,7 +554,7 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr) */ if (!INGLOBALZONE(curproc)) { if (new_default || !blequal(&new_sl, CR_SL(CRED()))) - return (EPERM); + return (SET_ERROR(EPERM)); return (0); } @@ -469,10 +565,10 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr) */ if (dsl_prop_get_integer(name, zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL)) - return (EPERM); + return (SET_ERROR(EPERM)); if (!zoned) { if (zfs_check_global_label(name, strval) != 0) - return (EPERM); + return (SET_ERROR(EPERM)); } /* @@ -491,8 +587,8 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr) */ error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, setsl_tag, &os); - if (error) - return (EPERM); + if (error != 0) + return (SET_ERROR(EPERM)); dmu_objset_disown(os, setsl_tag); @@ -502,7 +598,7 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr) } if (hexstr_to_label(strval, &new_sl) != 0) - return (EPERM); + return (SET_ERROR(EPERM)); if (blstrictdom(&ds_sl, &new_sl)) needed_priv = PRIV_FILE_DOWNGRADE_SL; @@ -536,7 +632,7 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval, * Disallow setting of 'zoned' from within a local zone. */ if (!INGLOBALZONE(curthread)) - return (EPERM); + return (SET_ERROR(EPERM)); break; case ZFS_PROP_QUOTA: @@ -550,16 +646,16 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval, */ if (dsl_prop_get_integer(dsname, "jailed", &zoned, setpoint)) - return (EPERM); + return (SET_ERROR(EPERM)); if (!zoned || strlen(dsname) <= strlen(setpoint)) - return (EPERM); + return (SET_ERROR(EPERM)); } break; case ZFS_PROP_MLSLABEL: #ifdef SECLABEL if (!is_system_labeled()) - return (EPERM); + return (SET_ERROR(EPERM)); if (nvpair_value_string(propval, &strval) == 0) { int err; @@ -577,13 +673,14 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval, return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr)); } -int -zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr) +/* ARGSUSED */ +static int +zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { int error; error = zfs_dozonecheck(zc->zc_name, cr); - if (error) + if (error != 0) return (error); /* @@ -593,17 +690,18 @@ zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr) return (0); } -int -zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr) +/* ARGSUSED */ +static int +zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_ROLLBACK, cr)); } -int -zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr) +/* ARGSUSED */ +static int +zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - spa_t *spa; dsl_pool_t *dp; dsl_dataset_t *ds; char *cp; @@ -615,30 +713,38 @@ zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr) */ cp = strchr(zc->zc_name, '@'); if (cp == NULL) - return (EINVAL); - error = spa_open(zc->zc_name, &spa, FTAG); - if (error) + return (SET_ERROR(EINVAL)); + error = dsl_pool_hold(zc->zc_name, FTAG, &dp); + if (error != 0) return (error); - dp = spa_get_dsl(spa); - rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); - rw_exit(&dp->dp_config_rwlock); - spa_close(spa, FTAG); - if (error) + if (error != 0) { + dsl_pool_rele(dp, FTAG); return (error); + } dsl_dataset_name(ds, zc->zc_name); error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds, ZFS_DELEG_PERM_SEND, cr); dsl_dataset_rele(ds, FTAG); + dsl_pool_rele(dp, FTAG); return (error); } +/* ARGSUSED */ +static int +zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ + return (zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_SEND, cr)); +} + +/* ARGSUSED */ static int -zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { vnode_t *vp; int error; @@ -653,7 +759,7 @@ zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr) (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource), zc->zc_name) != 0)) { VN_RELE(vp); - return (EPERM); + return (SET_ERROR(EPERM)); } VN_RELE(vp); @@ -662,28 +768,28 @@ zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr) } int -zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { if (!INGLOBALZONE(curthread)) - return (EPERM); + return (SET_ERROR(EPERM)); if (secpolicy_nfs(cr) == 0) { return (0); } else { - return (zfs_secpolicy_deleg_share(zc, cr)); + return (zfs_secpolicy_deleg_share(zc, innvl, cr)); } } int -zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { if (!INGLOBALZONE(curthread)) - return (EPERM); + return (SET_ERROR(EPERM)); if (secpolicy_smb(cr) == 0) { return (0); } else { - return (zfs_secpolicy_deleg_share(zc, cr)); + return (zfs_secpolicy_deleg_share(zc, innvl, cr)); } } @@ -702,7 +808,7 @@ zfs_get_parent(const char *datasetname, char *parent, int parentsize) } else { cp = strrchr(parent, '/'); if (cp == NULL) - return (ENOENT); + return (SET_ERROR(ENOENT)); cp[0] = '\0'; } @@ -721,30 +827,60 @@ zfs_secpolicy_destroy_perms(const char *name, cred_t *cr) return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_destroy_perms(zc->zc_name, cr)); } /* * Destroying snapshots with delegated permissions requires - * descendent mount and destroy permissions. + * descendant mount and destroy permissions. */ +/* ARGSUSED */ static int -zfs_secpolicy_destroy_recursive(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - int error; - char *dsname; + nvlist_t *snaps; + nvpair_t *pair, *nextpair; + int error = 0; - dsname = kmem_asprintf("%s@", zc->zc_name); + if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) + return (SET_ERROR(EINVAL)); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nextpair) { + dsl_pool_t *dp; + dsl_dataset_t *ds; - error = zfs_secpolicy_destroy_perms(dsname, cr); + error = dsl_pool_hold(nvpair_name(pair), FTAG, &dp); + if (error != 0) + break; + nextpair = nvlist_next_nvpair(snaps, pair); + error = dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds); + if (error == 0) + dsl_dataset_rele(ds, FTAG); + dsl_pool_rele(dp, FTAG); - if (error == ENOENT) - error = zfs_secpolicy_destroy_perms(zc->zc_name, cr); + if (error == 0) { + error = zfs_secpolicy_destroy_perms(nvpair_name(pair), + cr); + } else if (error == ENOENT) { + /* + * Ignore any snapshots that don't exist (we consider + * them "already destroyed"). Remove the name from the + * nvl here in case the snapshot is created between + * now and when we try to destroy it (in which case + * we don't want to destroy it since we haven't + * checked for permission). + */ + fnvlist_remove_nvpair(snaps, pair); + error = 0; + } + if (error != 0) + break; + } - strfree(dsname); return (error); } @@ -777,8 +913,9 @@ zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr) return (error); } +/* ARGSUSED */ static int -zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { char *at = NULL; int error; @@ -802,49 +939,57 @@ zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr) return (error); } +/* ARGSUSED */ static int -zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - char parentname[MAXNAMELEN]; - objset_t *clone; + dsl_pool_t *dp; + dsl_dataset_t *clone; int error; error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_PROMOTE, cr); - if (error) + if (error != 0) return (error); - error = dmu_objset_hold(zc->zc_name, FTAG, &clone); + error = dsl_pool_hold(zc->zc_name, FTAG, &dp); + if (error != 0) + return (error); + + error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone); if (error == 0) { - dsl_dataset_t *pclone = NULL; + char parentname[MAXNAMELEN]; + dsl_dataset_t *origin = NULL; dsl_dir_t *dd; - dd = clone->os_dsl_dataset->ds_dir; + dd = clone->ds_dir; - rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dd->dd_pool, - dd->dd_phys->dd_origin_obj, FTAG, &pclone); - rw_exit(&dd->dd_pool->dp_config_rwlock); - if (error) { - dmu_objset_rele(clone, FTAG); + dd->dd_phys->dd_origin_obj, FTAG, &origin); + if (error != 0) { + dsl_dataset_rele(clone, FTAG); + dsl_pool_rele(dp, FTAG); return (error); } - error = zfs_secpolicy_write_perms(zc->zc_name, + error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone, ZFS_DELEG_PERM_MOUNT, cr); - dsl_dataset_name(pclone, parentname); - dmu_objset_rele(clone, FTAG); - dsl_dataset_rele(pclone, FTAG); - if (error == 0) - error = zfs_secpolicy_write_perms(parentname, + dsl_dataset_name(origin, parentname); + if (error == 0) { + error = zfs_secpolicy_write_perms_ds(parentname, origin, ZFS_DELEG_PERM_PROMOTE, cr); + } + dsl_dataset_rele(clone, FTAG); + dsl_dataset_rele(origin, FTAG); } + dsl_pool_rele(dp, FTAG); return (error); } +/* ARGSUSED */ static int -zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { int error; @@ -867,49 +1012,72 @@ zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr) ZFS_DELEG_PERM_SNAPSHOT, cr)); } +/* + * Check for permission to create each snapshot in the nvlist. + */ +/* ARGSUSED */ static int -zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { + nvlist_t *snaps; + int error; + nvpair_t *pair; + + if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) + return (SET_ERROR(EINVAL)); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { + char *name = nvpair_name(pair); + char *atp = strchr(name, '@'); + + if (atp == NULL) { + error = SET_ERROR(EINVAL); + break; + } + *atp = '\0'; + error = zfs_secpolicy_snapshot_perms(name, cr); + *atp = '@'; + if (error != 0) + break; + } + return (error); +} - return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr)); +/* ARGSUSED */ +static int +zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ + /* + * Even root must have a proper TSD so that we know what pool + * to log to. + */ + if (tsd_get(zfs_allow_log_key) == NULL) + return (SET_ERROR(EPERM)); + return (0); } static int -zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { char parentname[MAXNAMELEN]; int error; + char *origin; if ((error = zfs_get_parent(zc->zc_name, parentname, sizeof (parentname))) != 0) return (error); - if (zc->zc_value[0] != '\0') { - if ((error = zfs_secpolicy_write_perms(zc->zc_value, - ZFS_DELEG_PERM_CLONE, cr)) != 0) - return (error); - } + if (nvlist_lookup_string(innvl, "origin", &origin) == 0 && + (error = zfs_secpolicy_write_perms(origin, + ZFS_DELEG_PERM_CLONE, cr)) != 0) + return (error); if ((error = zfs_secpolicy_write_perms(parentname, ZFS_DELEG_PERM_CREATE, cr)) != 0) return (error); - error = zfs_secpolicy_write_perms(parentname, - ZFS_DELEG_PERM_MOUNT, cr); - - return (error); -} - -static int -zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr) -{ - int error; - - error = secpolicy_fs_unmount(cr, NULL); - if (error) { - error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr); - } - return (error); + return (zfs_secpolicy_write_perms(parentname, + ZFS_DELEG_PERM_MOUNT, cr)); } /* @@ -918,10 +1086,10 @@ zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { if (secpolicy_sys_config(cr, B_FALSE) != 0) - return (EPERM); + return (SET_ERROR(EPERM)); return (0); } @@ -931,7 +1099,7 @@ zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { int error; @@ -947,19 +1115,20 @@ zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (secpolicy_zinject(cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { zfs_prop_t prop = zfs_name_to_prop(zc->zc_value); if (prop == ZPROP_INVAL) { if (!zfs_prop_user(zc->zc_value)) - return (EINVAL); + return (SET_ERROR(EINVAL)); return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_USERPROP, cr)); } else { @@ -969,14 +1138,14 @@ zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr) } static int -zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - int err = zfs_secpolicy_read(zc, cr); + int err = zfs_secpolicy_read(zc, innvl, cr); if (err) return (err); if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS) - return (EINVAL); + return (SET_ERROR(EINVAL)); if (zc->zc_value[0] == 0) { /* @@ -998,45 +1167,79 @@ zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr) } static int -zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - int err = zfs_secpolicy_read(zc, cr); + int err = zfs_secpolicy_read(zc, innvl, cr); if (err) return (err); if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS) - return (EINVAL); + return (SET_ERROR(EINVAL)); return (zfs_secpolicy_write_perms(zc->zc_name, userquota_perms[zc->zc_objset_type], cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION, NULL, cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - return (zfs_secpolicy_write_perms(zc->zc_name, - ZFS_DELEG_PERM_HOLD, cr)); + nvpair_t *pair; + nvlist_t *holds; + int error; + + error = nvlist_lookup_nvlist(innvl, "holds", &holds); + if (error != 0) + return (SET_ERROR(EINVAL)); + + for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(holds, pair)) { + char fsname[MAXNAMELEN]; + error = dmu_fsname(nvpair_name(pair), fsname); + if (error != 0) + return (error); + error = zfs_secpolicy_write_perms(fsname, + ZFS_DELEG_PERM_HOLD, cr); + if (error != 0) + return (error); + } + return (0); } +/* ARGSUSED */ static int -zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - return (zfs_secpolicy_write_perms(zc->zc_name, - ZFS_DELEG_PERM_RELEASE, cr)); + nvpair_t *pair; + int error; + + for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL; + pair = nvlist_next_nvpair(innvl, pair)) { + char fsname[MAXNAMELEN]; + error = dmu_fsname(nvpair_name(pair), fsname); + if (error != 0) + return (error); + error = zfs_secpolicy_write_perms(fsname, + ZFS_DELEG_PERM_RELEASE, cr); + if (error != 0) + return (error); + } + return (0); } /* * Policy for allowing temporary snapshots to be taken or released */ static int -zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { /* * A temporary snapshot is the same as a snapshot, @@ -1049,13 +1252,13 @@ zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr) ZFS_DELEG_PERM_DIFF, cr)) == 0) return (0); - error = zfs_secpolicy_snapshot(zc, cr); - if (!error) - error = zfs_secpolicy_hold(zc, cr); - if (!error) - error = zfs_secpolicy_release(zc, cr); - if (!error) - error = zfs_secpolicy_destroy(zc, cr); + error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr); + if (error == 0) + error = zfs_secpolicy_hold(zc, innvl, cr); + if (error == 0) + error = zfs_secpolicy_release(zc, innvl, cr); + if (error == 0) + error = zfs_secpolicy_destroy(zc, innvl, cr); return (error); } @@ -1073,7 +1276,7 @@ get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp) * Read in and unpack the user-supplied nvlist. */ if (size == 0) - return (EINVAL); + return (SET_ERROR(EINVAL)); packed = kmem_alloc(size, KM_SLEEP); @@ -1094,36 +1297,40 @@ get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp) return (0); } +/* + * Reduce the size of this nvlist until it can be serialized in 'max' bytes. + * Entries will be removed from the end of the nvlist, and one int32 entry + * named "N_MORE_ERRORS" will be added indicating how many entries were + * removed. + */ static int -fit_error_list(zfs_cmd_t *zc, nvlist_t **errors) +nvlist_smush(nvlist_t *errors, size_t max) { size_t size; - VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); + size = fnvlist_size(errors); - if (size > zc->zc_nvlist_dst_size) { + if (size > max) { nvpair_t *more_errors; int n = 0; - if (zc->zc_nvlist_dst_size < 1024) - return (ENOMEM); + if (max < 1024) + return (SET_ERROR(ENOMEM)); - VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0); - more_errors = nvlist_prev_nvpair(*errors, NULL); + fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0); + more_errors = nvlist_prev_nvpair(errors, NULL); do { - nvpair_t *pair = nvlist_prev_nvpair(*errors, + nvpair_t *pair = nvlist_prev_nvpair(errors, more_errors); - VERIFY(nvlist_remove_nvpair(*errors, pair) == 0); + fnvlist_remove_nvpair(errors, pair); n++; - VERIFY(nvlist_size(*errors, &size, - NV_ENCODE_NATIVE) == 0); - } while (size > zc->zc_nvlist_dst_size); + size = fnvlist_size(errors); + } while (size > max); - VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0); - VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0); - ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); - ASSERT(size <= zc->zc_nvlist_dst_size); + fnvlist_remove_nvpair(errors, more_errors); + fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n); + ASSERT3U(fnvlist_size(errors), <=, max); } return (0); @@ -1136,7 +1343,7 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl) int error = 0; size_t size; - VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0); + size = fnvlist_size(nvl); if (size > zc->zc_nvlist_dst_size) { /* @@ -1149,16 +1356,15 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl) */ error = 0; } else { - packed = kmem_alloc(size, KM_SLEEP); - VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE, - KM_SLEEP) == 0); + packed = fnvlist_pack(nvl, &size); if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst, size, zc->zc_iflags) != 0) - error = EFAULT; - kmem_free(packed, size); + error = SET_ERROR(EFAULT); + fnvlist_pack_free(packed, size); } zc->zc_nvlist_dst_size = size; + zc->zc_nvlist_dst_filled = B_TRUE; return (error); } @@ -1169,11 +1375,11 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp) int error; error = dmu_objset_hold(dsname, FTAG, &os); - if (error) + if (error != 0) return (error); if (dmu_objset_type(os) != DMU_OST_ZFS) { dmu_objset_rele(os, FTAG); - return (EINVAL); + return (SET_ERROR(EINVAL)); } mutex_enter(&os->os_user_ptr_lock); @@ -1181,7 +1387,7 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp) if (*zfvp) { VFS_HOLD((*zfvp)->z_vfs); } else { - error = ESRCH; + error = SET_ERROR(ESRCH); } mutex_exit(&os->os_user_ptr_lock); dmu_objset_rele(os, FTAG); @@ -1211,7 +1417,7 @@ zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer) * objset from the zfsvfs. */ rrw_exit(&(*zfvp)->z_teardown_lock, tag); - return (EBUSY); + return (SET_ERROR(EBUSY)); } } return (error); @@ -1237,7 +1443,6 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) nvlist_t *config, *props = NULL; nvlist_t *rootprops = NULL; nvlist_t *zplprops = NULL; - char *buf; if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config)) @@ -1257,7 +1462,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) (void) nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), &version); if (!SPA_VERSION_IS_SUPPORTED(version)) { - error = EINVAL; + error = SET_ERROR(EINVAL); goto pool_props_bad; } (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl); @@ -1273,13 +1478,11 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); error = zfs_fill_zplprops_root(version, rootprops, zplprops, NULL); - if (error) + if (error != 0) goto pool_props_bad; } - buf = history_str_get(zc); - - error = spa_create(zc->zc_name, config, props, buf, zplprops); + error = spa_create(zc->zc_name, config, props, zplprops); /* * Set the remaining root properties @@ -1288,9 +1491,6 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) (void) spa_destroy(zc->zc_name); - if (buf != NULL) - history_str_free(buf); - pool_props_bad: nvlist_free(rootprops); nvlist_free(zplprops); @@ -1331,7 +1531,7 @@ zfs_ioc_pool_import(zfs_cmd_t *zc) if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 || guid != zc->zc_guid) - error = EINVAL; + error = SET_ERROR(EINVAL); else error = spa_import(zc->zc_name, config, props, zc->zc_cookie); @@ -1371,7 +1571,7 @@ zfs_ioc_pool_configs(zfs_cmd_t *zc) int error; if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL) - return (EEXIST); + return (SET_ERROR(EEXIST)); error = put_nvlist(zc, configs); @@ -1435,7 +1635,7 @@ zfs_ioc_pool_tryimport(zfs_cmd_t *zc) nvlist_free(tryconfig); if (config == NULL) - return (EINVAL); + return (SET_ERROR(EINVAL)); error = put_nvlist(zc, config); nvlist_free(config); @@ -1493,7 +1693,7 @@ zfs_ioc_pool_upgrade(zfs_cmd_t *zc) if (zc->zc_cookie < spa_version(spa) || !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) { spa_close(spa, FTAG); - return (EINVAL); + return (SET_ERROR(EINVAL)); } spa_upgrade(spa, zc->zc_cookie); @@ -1511,14 +1711,14 @@ zfs_ioc_pool_get_history(zfs_cmd_t *zc) int error; if ((size = zc->zc_history_len) == 0) - return (EINVAL); + return (SET_ERROR(EINVAL)); if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) { spa_close(spa, FTAG); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } hist_buf = kmem_alloc(size, KM_SLEEP); @@ -1551,12 +1751,7 @@ zfs_ioc_pool_reguid(zfs_cmd_t *zc) static int zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc) { - int error; - - if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value)) - return (error); - - return (0); + return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value)); } /* @@ -1578,7 +1773,7 @@ zfs_ioc_obj_to_path(zfs_cmd_t *zc) return (error); if (dmu_objset_type(os) != DMU_OST_ZFS) { dmu_objset_rele(os, FTAG); - return (EINVAL); + return (SET_ERROR(EINVAL)); } error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value, sizeof (zc->zc_value)); @@ -1607,7 +1802,7 @@ zfs_ioc_obj_to_stats(zfs_cmd_t *zc) return (error); if (dmu_objset_type(os) != DMU_OST_ZFS) { dmu_objset_rele(os, FTAG); - return (EINVAL); + return (SET_ERROR(EINVAL)); } error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value, sizeof (zc->zc_value)); @@ -1649,7 +1844,7 @@ zfs_ioc_vdev_add(zfs_cmd_t *zc) if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) { nvlist_free(config); spa_close(spa, FTAG); - return (EDOM); + return (SET_ERROR(EDOM)); } if (error == 0) { @@ -1715,7 +1910,7 @@ zfs_ioc_vdev_set_state(zfs_cmd_t *zc) break; default: - error = EINVAL; + error = SET_ERROR(EINVAL); } zc->zc_cookie = newstate; spa_close(spa, FTAG); @@ -1872,15 +2067,14 @@ zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os) static int zfs_ioc_objset_stats(zfs_cmd_t *zc) { - objset_t *os = NULL; + objset_t *os; int error; - if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) - return (error); - - error = zfs_ioc_objset_stats_impl(zc, os); - - dmu_objset_rele(os, FTAG); + error = dmu_objset_hold(zc->zc_name, FTAG, &os); + if (error == 0) { + error = zfs_ioc_objset_stats_impl(zc, os); + dmu_objset_rele(os, FTAG); + } if (error == ENOMEM) error = 0; @@ -1903,30 +2097,23 @@ zfs_ioc_objset_stats(zfs_cmd_t *zc) static int zfs_ioc_objset_recvd_props(zfs_cmd_t *zc) { - objset_t *os = NULL; - int error; + int error = 0; nvlist_t *nv; - if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) - return (error); - /* * Without this check, we would return local property values if the * caller has not already received properties on or after * SPA_VERSION_RECVD_PROPS. */ - if (!dsl_prop_get_hasrecvd(os)) { - dmu_objset_rele(os, FTAG); - return (ENOTSUP); - } + if (!dsl_prop_get_hasrecvd(zc->zc_name)) + return (SET_ERROR(ENOTSUP)); if (zc->zc_nvlist_dst != 0 && - (error = dsl_prop_get_received(os, &nv)) == 0) { + (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) { error = put_nvlist(zc, nv); nvlist_free(nv); } - dmu_objset_rele(os, FTAG); return (error); } @@ -1985,7 +2172,7 @@ zfs_ioc_objset_zplprops(zfs_cmd_t *zc) err = put_nvlist(zc, nv); nvlist_free(nv); } else { - err = ENOENT; + err = SET_ERROR(ENOENT); } dmu_objset_rele(os, FTAG); return (err); @@ -2032,7 +2219,7 @@ zfs_ioc_dataset_list_next(zfs_cmd_t *zc) top: if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) { if (error == ENOENT) - error = ESRCH; + error = SET_ERROR(ESRCH); return (error); } @@ -2041,26 +2228,12 @@ top: (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name)); p = zc->zc_name + strlen(zc->zc_name); - /* - * Pre-fetch the datasets. dmu_objset_prefetch() always returns 0 - * but is not declared void because its called by dmu_objset_find(). - */ - if (zc->zc_cookie == 0) { - uint64_t cookie = 0; - int len = sizeof (zc->zc_name) - (p - zc->zc_name); - - while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) { - if (!dataset_name_hidden(zc->zc_name)) - (void) dmu_objset_prefetch(zc->zc_name, NULL); - } - } - do { error = dmu_dir_list_next(os, sizeof (zc->zc_name) - (p - zc->zc_name), p, NULL, &zc->zc_cookie); if (error == ENOENT) - error = ESRCH; + error = SET_ERROR(ESRCH); } while (error == 0 && dataset_name_hidden(zc->zc_name)); dmu_objset_rele(os, FTAG); @@ -2098,14 +2271,10 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc) objset_t *os; int error; -top: - if (snapshot_list_prefetch && zc->zc_cookie == 0 && !zc->zc_simple) - (void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch, - NULL, DS_FIND_SNAPSHOTS); - error = dmu_objset_hold(zc->zc_name, FTAG, &os); - if (error) + if (error != 0) { return (error == ENOENT ? ESRCH : error); + } /* * A dataset name of maximum length cannot have any snapshots, @@ -2113,7 +2282,7 @@ top: */ if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) { dmu_objset_rele(os, FTAG); - return (ESRCH); + return (SET_ERROR(ESRCH)); } error = dmu_snapshot_list_next(os, @@ -2125,24 +2294,8 @@ top: dsl_dataset_t *ds; dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; - /* - * Since we probably don't have a hold on this snapshot, - * it's possible that the objsetid could have been destroyed - * and reused for a new objset. It's OK if this happens during - * a zfs send operation, since the new createtxg will be - * beyond the range we're interested in. - */ - rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds); - rw_exit(&dp->dp_config_rwlock); - if (error) { - if (error == ENOENT) { - /* Racing with destroy, get the next one. */ - *strchr(zc->zc_name, '@') = '\0'; - dmu_objset_rele(os, FTAG); - goto top; - } - } else { + if (error == 0) { objset_t *ossnap; error = dmu_objset_from_ds(ds, &ossnap); @@ -2151,12 +2304,12 @@ top: dsl_dataset_rele(ds, FTAG); } } else if (error == ENOENT) { - error = ESRCH; + error = SET_ERROR(ESRCH); } dmu_objset_rele(os, FTAG); /* if we failed, undo the @ that we tacked on to zc_name */ - if (error) + if (error != 0) *strchr(zc->zc_name, '@') = '\0'; return (error); } @@ -2180,7 +2333,7 @@ zfs_prop_set_userquota(const char *dsname, nvpair_t *pair) VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair) != 0) - return (EINVAL); + return (SET_ERROR(EINVAL)); } /* @@ -2190,7 +2343,7 @@ zfs_prop_set_userquota(const char *dsname, nvpair_t *pair) if ((dash = strchr(propname, '-')) == NULL || nvpair_value_uint64_array(pair, &valary, &vallen) != 0 || vallen != 3) - return (EINVAL); + return (SET_ERROR(EINVAL)); domain = dash + 1; type = valary[0]; @@ -2246,13 +2399,13 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, err = dsl_dir_set_quota(dsname, source, intval); break; case ZFS_PROP_REFQUOTA: - err = dsl_dataset_set_quota(dsname, source, intval); + err = dsl_dataset_set_refquota(dsname, source, intval); break; case ZFS_PROP_RESERVATION: err = dsl_dir_set_reservation(dsname, source, intval); break; case ZFS_PROP_REFRESERVATION: - err = dsl_dataset_set_reservation(dsname, source, intval); + err = dsl_dataset_set_refreservation(dsname, source, intval); break; case ZFS_PROP_VOLSIZE: err = zvol_set_volsize(dsname, ddi_driver_major(zfs_dip), @@ -2284,19 +2437,16 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, zfeature_info_t *feature = &spa_feature_table[SPA_FEATURE_LZ4_COMPRESS]; spa_t *spa; - dsl_pool_t *dp; if ((err = spa_open(dsname, &spa, FTAG)) != 0) return (err); - dp = spa->spa_dsl_pool; - /* * Setting the LZ4 compression algorithm activates * the feature. */ if (!spa_feature_is_active(spa, feature)) { - if ((err = zfs_prop_activate_feature(dp, + if ((err = zfs_prop_activate_feature(spa, feature)) != 0) { spa_close(spa, FTAG); return (err); @@ -2322,31 +2472,25 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, /* * This function is best effort. If it fails to set any of the given properties, - * it continues to set as many as it can and returns the first error - * encountered. If the caller provides a non-NULL errlist, it also gives the - * complete list of names of all the properties it failed to set along with the - * corresponding error numbers. The caller is responsible for freeing the - * returned errlist. + * it continues to set as many as it can and returns the last error + * encountered. If the caller provides a non-NULL errlist, it will be filled in + * with the list of names of all the properties that failed along with the + * corresponding error numbers. * - * If every property is set successfully, zero is returned and the list pointed - * at by errlist is NULL. + * If every property is set successfully, zero is returned and errlist is not + * modified. */ int zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, - nvlist_t **errlist) + nvlist_t *errlist) { nvpair_t *pair; nvpair_t *propval; int rv = 0; uint64_t intval; char *strval; - nvlist_t *genericnvl; - nvlist_t *errors; - nvlist_t *retrynvl; - - VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); + nvlist_t *genericnvl = fnvlist_alloc(); + nvlist_t *retrynvl = fnvlist_alloc(); retry: pair = NULL; @@ -2359,51 +2503,50 @@ retry: propval = pair; if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; - VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); + attrs = fnvpair_value_nvlist(pair); if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &propval) != 0) - err = EINVAL; + err = SET_ERROR(EINVAL); } /* Validate value type */ if (err == 0 && prop == ZPROP_INVAL) { if (zfs_prop_user(propname)) { if (nvpair_type(propval) != DATA_TYPE_STRING) - err = EINVAL; + err = SET_ERROR(EINVAL); } else if (zfs_prop_userquota(propname)) { if (nvpair_type(propval) != DATA_TYPE_UINT64_ARRAY) - err = EINVAL; + err = SET_ERROR(EINVAL); } else { - err = EINVAL; + err = SET_ERROR(EINVAL); } } else if (err == 0) { if (nvpair_type(propval) == DATA_TYPE_STRING) { if (zfs_prop_get_type(prop) != PROP_TYPE_STRING) - err = EINVAL; + err = SET_ERROR(EINVAL); } else if (nvpair_type(propval) == DATA_TYPE_UINT64) { const char *unused; - VERIFY(nvpair_value_uint64(propval, - &intval) == 0); + intval = fnvpair_value_uint64(propval); switch (zfs_prop_get_type(prop)) { case PROP_TYPE_NUMBER: break; case PROP_TYPE_STRING: - err = EINVAL; + err = SET_ERROR(EINVAL); break; case PROP_TYPE_INDEX: if (zfs_prop_index_to_string(prop, intval, &unused) != 0) - err = EINVAL; + err = SET_ERROR(EINVAL); break; default: cmn_err(CE_PANIC, "unknown property type"); } } else { - err = EINVAL; + err = SET_ERROR(EINVAL); } } @@ -2429,8 +2572,11 @@ retry: } } - if (err != 0) - VERIFY(nvlist_add_int32(errors, propname, err) == 0); + if (err != 0) { + if (errlist != NULL) + fnvlist_add_int32(errlist, propname, err); + rv = err; + } } if (nvl != retrynvl && !nvlist_empty(retrynvl)) { @@ -2452,44 +2598,33 @@ retry: propval = pair; if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; - VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); - VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, - &propval) == 0); + attrs = fnvpair_value_nvlist(pair); + propval = fnvlist_lookup_nvpair(attrs, + ZPROP_VALUE); } if (nvpair_type(propval) == DATA_TYPE_STRING) { - VERIFY(nvpair_value_string(propval, - &strval) == 0); - err = dsl_prop_set(dsname, propname, source, 1, - strlen(strval) + 1, strval); + strval = fnvpair_value_string(propval); + err = dsl_prop_set_string(dsname, propname, + source, strval); } else { - VERIFY(nvpair_value_uint64(propval, - &intval) == 0); - err = dsl_prop_set(dsname, propname, source, 8, - 1, &intval); + intval = fnvpair_value_uint64(propval); + err = dsl_prop_set_int(dsname, propname, source, + intval); } if (err != 0) { - VERIFY(nvlist_add_int32(errors, propname, - err) == 0); + if (errlist != NULL) { + fnvlist_add_int32(errlist, propname, + err); + } + rv = err; } } } nvlist_free(genericnvl); nvlist_free(retrynvl); - if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) { - nvlist_free(errors); - errors = NULL; - } else { - VERIFY(nvpair_value_int32(pair, &rv) == 0); - } - - if (errlist == NULL) - nvlist_free(errors); - else - *errlist = errors; - return (rv); } @@ -2497,7 +2632,7 @@ retry: * Check that all the properties are valid user properties. */ static int -zfs_check_userprops(char *fsname, nvlist_t *nvl) +zfs_check_userprops(const char *fsname, nvlist_t *nvl) { nvpair_t *pair = NULL; int error = 0; @@ -2508,14 +2643,14 @@ zfs_check_userprops(char *fsname, nvlist_t *nvl) if (!zfs_prop_user(propname) || nvpair_type(pair) != DATA_TYPE_STRING) - return (EINVAL); + return (SET_ERROR(EINVAL)); if (error = zfs_secpolicy_write_perms(fsname, ZFS_DELEG_PERM_USERPROP, CRED())) return (error); if (strlen(propname) >= ZAP_MAXNAMELEN) - return (ENAMETOOLONG); + return (SET_ERROR(ENAMETOOLONG)); VERIFY(nvpair_value_string(pair, &valstr) == 0); if (strlen(valstr) >= ZAP_MAXVALUELEN) @@ -2541,7 +2676,7 @@ props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops) } static int -clear_received_props(objset_t *os, const char *fs, nvlist_t *props, +clear_received_props(const char *dsname, nvlist_t *props, nvlist_t *skipped) { int err = 0; @@ -2553,8 +2688,8 @@ clear_received_props(objset_t *os, const char *fs, nvlist_t *props, * properties at least once on or after SPA_VERSION_RECVD_PROPS. */ zprop_source_t flags = (ZPROP_SRC_NONE | - (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0)); - err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL); + (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0)); + err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL); } nvlist_free(cleared_props); return (err); @@ -2577,7 +2712,7 @@ zfs_ioc_set_prop(zfs_cmd_t *zc) boolean_t received = zc->zc_cookie; zprop_source_t source = (received ? ZPROP_SRC_RECEIVED : ZPROP_SRC_LOCAL); - nvlist_t *errors = NULL; + nvlist_t *errors; int error; if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, @@ -2586,21 +2721,19 @@ zfs_ioc_set_prop(zfs_cmd_t *zc) if (received) { nvlist_t *origprops; - objset_t *os; - - if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) { - if (dsl_prop_get_received(os, &origprops) == 0) { - (void) clear_received_props(os, - zc->zc_name, origprops, nvl); - nvlist_free(origprops); - } - dsl_prop_set_hasrecvd(os); - dmu_objset_rele(os, FTAG); + if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) { + (void) clear_received_props(zc->zc_name, + origprops, nvl); + nvlist_free(origprops); } + + error = dsl_prop_set_hasrecvd(zc->zc_name); } - error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors); + errors = fnvlist_alloc(); + if (error == 0) + error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors); if (zc->zc_nvlist_dst != 0 && errors != NULL) { (void) put_nvlist(zc, errors); @@ -2641,12 +2774,12 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc) */ if (prop == ZPROP_INVAL) { if (!zfs_prop_user(propname)) - return (EINVAL); + return (SET_ERROR(EINVAL)); type = PROP_TYPE_STRING; } else if (prop == ZFS_PROP_VOLSIZE || prop == ZFS_PROP_VERSION) { - return (EINVAL); + return (SET_ERROR(EINVAL)); } else { type = zfs_prop_get_type(prop); } @@ -2663,7 +2796,7 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc) break; default: nvlist_free(dummy); - return (EINVAL); + return (SET_ERROR(EINVAL)); } pair = nvlist_next_nvpair(dummy, NULL); @@ -2679,11 +2812,11 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc) * they are not considered inheritable. */ if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop)) - return (EINVAL); + return (SET_ERROR(EINVAL)); } - /* the property name has been validated by zfs_secpolicy_inherit() */ - return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL)); + /* property name has been validated by zfs_secpolicy_inherit_prop() */ + return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source)); } static int @@ -2756,7 +2889,7 @@ zfs_ioc_pool_get_props(zfs_cmd_t *zc) if (error == 0 && zc->zc_nvlist_dst != 0) error = put_nvlist(zc, nvp); else - error = EFAULT; + error = SET_ERROR(EFAULT); nvlist_free(nvp); return (error); @@ -2785,7 +2918,7 @@ zfs_ioc_set_fsacl(zfs_cmd_t *zc) */ if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) { nvlist_free(fsaclnv); - return (EINVAL); + return (SET_ERROR(EINVAL)); } /* @@ -2795,7 +2928,7 @@ zfs_ioc_set_fsacl(zfs_cmd_t *zc) */ error = secpolicy_zfs(CRED()); - if (error) { + if (error != 0) { if (zc->zc_perm_action == B_FALSE) { error = dsl_deleg_can_allow(zc->zc_name, fsaclnv, CRED()); @@ -2928,7 +3061,7 @@ zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver, (zplver < ZPL_VERSION_NORMALIZATION && (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED || sense != ZFS_PROP_UNDEFINED))) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); /* * Put the version in the zplprops @@ -3021,26 +3154,30 @@ zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops, } /* - * inputs: - * zc_objset_type type of objset to create (fs vs zvol) - * zc_name name of new objset - * zc_value name of snapshot to clone from (may be empty) - * zc_nvlist_src{_size} nvlist of properties to apply + * innvl: { + * "type" -> dmu_objset_type_t (int32) + * (optional) "props" -> { prop -> value } + * } * - * outputs: none + * outnvl: propname -> error code (int32) */ static int -zfs_ioc_create(zfs_cmd_t *zc) +zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) { - objset_t *clone; int error = 0; - zfs_creat_t zct; + zfs_creat_t zct = { 0 }; nvlist_t *nvprops = NULL; void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); - dmu_objset_type_t type = zc->zc_objset_type; + int32_t type32; + dmu_objset_type_t type; + boolean_t is_insensitive = B_FALSE; - switch (type) { + if (nvlist_lookup_int32(innvl, "type", &type32) != 0) + return (SET_ERROR(EINVAL)); + type = type32; + (void) nvlist_lookup_nvlist(innvl, "props", &nvprops); + switch (type) { case DMU_OST_ZFS: cbfunc = zfs_create_cb; break; @@ -3053,253 +3190,333 @@ zfs_ioc_create(zfs_cmd_t *zc) cbfunc = NULL; break; } - if (strchr(zc->zc_name, '@') || - strchr(zc->zc_name, '%')) - return (EINVAL); - - if (zc->zc_nvlist_src != 0 && - (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, - zc->zc_iflags, &nvprops)) != 0) - return (error); + if (strchr(fsname, '@') || + strchr(fsname, '%')) + return (SET_ERROR(EINVAL)); - zct.zct_zplprops = NULL; zct.zct_props = nvprops; - if (zc->zc_value[0] != '\0') { - /* - * We're creating a clone of an existing snapshot. - */ - zc->zc_value[sizeof (zc->zc_value) - 1] = '\0'; - if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) { - nvlist_free(nvprops); - return (EINVAL); - } - - error = dmu_objset_hold(zc->zc_value, FTAG, &clone); - if (error) { - nvlist_free(nvprops); - return (error); - } - - error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0); - dmu_objset_rele(clone, FTAG); - if (error) { - nvlist_free(nvprops); - return (error); - } - } else { - boolean_t is_insensitive = B_FALSE; + if (cbfunc == NULL) + return (SET_ERROR(EINVAL)); - if (cbfunc == NULL) { - nvlist_free(nvprops); - return (EINVAL); - } + if (type == DMU_OST_ZVOL) { + uint64_t volsize, volblocksize; - if (type == DMU_OST_ZVOL) { - uint64_t volsize, volblocksize; + if (nvprops == NULL) + return (SET_ERROR(EINVAL)); + if (nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0) + return (SET_ERROR(EINVAL)); - if (nvprops == NULL || - nvlist_lookup_uint64(nvprops, - zfs_prop_to_name(ZFS_PROP_VOLSIZE), - &volsize) != 0) { - nvlist_free(nvprops); - return (EINVAL); - } + if ((error = nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), + &volblocksize)) != 0 && error != ENOENT) + return (SET_ERROR(EINVAL)); - if ((error = nvlist_lookup_uint64(nvprops, - zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), - &volblocksize)) != 0 && error != ENOENT) { - nvlist_free(nvprops); - return (EINVAL); - } + if (error != 0) + volblocksize = zfs_prop_default_numeric( + ZFS_PROP_VOLBLOCKSIZE); - if (error != 0) - volblocksize = zfs_prop_default_numeric( - ZFS_PROP_VOLBLOCKSIZE); - - if ((error = zvol_check_volblocksize( - volblocksize)) != 0 || - (error = zvol_check_volsize(volsize, - volblocksize)) != 0) { - nvlist_free(nvprops); - return (error); - } - } else if (type == DMU_OST_ZFS) { - int error; + if ((error = zvol_check_volblocksize( + volblocksize)) != 0 || + (error = zvol_check_volsize(volsize, + volblocksize)) != 0) + return (error); + } else if (type == DMU_OST_ZFS) { + int error; - /* - * We have to have normalization and - * case-folding flags correct when we do the - * file system creation, so go figure them out - * now. - */ - VERIFY(nvlist_alloc(&zct.zct_zplprops, - NV_UNIQUE_NAME, KM_SLEEP) == 0); - error = zfs_fill_zplprops(zc->zc_name, nvprops, - zct.zct_zplprops, &is_insensitive); - if (error != 0) { - nvlist_free(nvprops); - nvlist_free(zct.zct_zplprops); - return (error); - } + /* + * We have to have normalization and + * case-folding flags correct when we do the + * file system creation, so go figure them out + * now. + */ + VERIFY(nvlist_alloc(&zct.zct_zplprops, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + error = zfs_fill_zplprops(fsname, nvprops, + zct.zct_zplprops, &is_insensitive); + if (error != 0) { + nvlist_free(zct.zct_zplprops); + return (error); } - error = dmu_objset_create(zc->zc_name, type, - is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); - nvlist_free(zct.zct_zplprops); } + error = dmu_objset_create(fsname, type, + is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); + nvlist_free(zct.zct_zplprops); + /* * It would be nice to do this atomically. */ if (error == 0) { - error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL, - nvprops, NULL); + error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL, + nvprops, outnvl); if (error != 0) - (void) dmu_objset_destroy(zc->zc_name, B_FALSE); + (void) dsl_destroy_head(fsname); } - nvlist_free(nvprops); #ifdef __FreeBSD__ if (error == 0 && type == DMU_OST_ZVOL) - zvol_create_minors(zc->zc_name); + zvol_create_minors(fsname); #endif return (error); } /* - * inputs: - * zc_name name of filesystem - * zc_value short name of snapshot - * zc_cookie recursive flag - * zc_nvlist_src[_size] property list + * innvl: { + * "origin" -> name of origin snapshot + * (optional) "props" -> { prop -> value } + * } * - * outputs: - * zc_value short snapname (i.e. part after the '@') + * outnvl: propname -> error code (int32) */ static int -zfs_ioc_snapshot(zfs_cmd_t *zc) +zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) { + int error = 0; nvlist_t *nvprops = NULL; - int error; - boolean_t recursive = zc->zc_cookie; + char *origin_name; - if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) - return (EINVAL); + if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0) + return (SET_ERROR(EINVAL)); + (void) nvlist_lookup_nvlist(innvl, "props", &nvprops); - if (zc->zc_nvlist_src != 0 && - (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, - zc->zc_iflags, &nvprops)) != 0) - return (error); + if (strchr(fsname, '@') || + strchr(fsname, '%')) + return (SET_ERROR(EINVAL)); - error = zfs_check_userprops(zc->zc_name, nvprops); - if (error) - goto out; + if (dataset_namecheck(origin_name, NULL, NULL) != 0) + return (SET_ERROR(EINVAL)); + error = dmu_objset_clone(fsname, origin_name); + if (error != 0) + return (error); - if (!nvlist_empty(nvprops) && - zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) { - error = ENOTSUP; - goto out; + /* + * It would be nice to do this atomically. + */ + if (error == 0) { + error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL, + nvprops, outnvl); + if (error != 0) + (void) dsl_destroy_head(fsname); } - - error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL, - nvprops, recursive, B_FALSE, -1); - -out: - nvlist_free(nvprops); return (error); } -int -zfs_unmount_snap(const char *name, void *arg) +/* + * innvl: { + * "snaps" -> { snapshot1, snapshot2 } + * (optional) "props" -> { prop -> value (string) } + * } + * + * outnvl: snapshot -> error code (int32) + */ +static int +zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) { - vfs_t *vfsp = NULL; + nvlist_t *snaps; + nvlist_t *props = NULL; + int error, poollen; + nvpair_t *pair; - if (arg) { - char *snapname = arg; - char *fullname = kmem_asprintf("%s@%s", name, snapname); - vfsp = zfs_get_vfs(fullname); - strfree(fullname); - } else if (strchr(name, '@')) { - vfsp = zfs_get_vfs(name); - } + (void) nvlist_lookup_nvlist(innvl, "props", &props); + if ((error = zfs_check_userprops(poolname, props)) != 0) + return (error); + + if (!nvlist_empty(props) && + zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS)) + return (SET_ERROR(ENOTSUP)); + + if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) + return (SET_ERROR(EINVAL)); + poollen = strlen(poolname); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { + const char *name = nvpair_name(pair); + const char *cp = strchr(name, '@'); - if (vfsp) { /* - * Always force the unmount for snapshots. + * The snap name must contain an @, and the part after it must + * contain only valid characters. */ - int flag = MS_FORCE; - int err; + if (cp == NULL || snapshot_namecheck(cp + 1, NULL, NULL) != 0) + return (SET_ERROR(EINVAL)); - if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) { - VFS_RELE(vfsp); - return (err); + /* + * The snap must be in the specified pool. + */ + if (strncmp(name, poolname, poollen) != 0 || + (name[poollen] != '/' && name[poollen] != '@')) + return (SET_ERROR(EXDEV)); + + /* This must be the only snap of this fs. */ + for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair); + pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) { + if (strncmp(name, nvpair_name(pair2), cp - name + 1) + == 0) { + return (SET_ERROR(EXDEV)); + } } + } + + error = dsl_dataset_snapshot(snaps, props, outnvl); + return (error); +} + +/* + * innvl: "message" -> string + */ +/* ARGSUSED */ +static int +zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl) +{ + char *message; + spa_t *spa; + int error; + char *poolname; + + /* + * The poolname in the ioctl is not set, we get it from the TSD, + * which was set at the end of the last successful ioctl that allows + * logging. The secpolicy func already checked that it is set. + * Only one log ioctl is allowed after each successful ioctl, so + * we clear the TSD here. + */ + poolname = tsd_get(zfs_allow_log_key); + (void) tsd_set(zfs_allow_log_key, NULL); + error = spa_open(poolname, &spa, FTAG); + strfree(poolname); + if (error != 0) + return (error); + + if (nvlist_lookup_string(innvl, "message", &message) != 0) { + spa_close(spa, FTAG); + return (SET_ERROR(EINVAL)); + } + + if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) { + spa_close(spa, FTAG); + return (SET_ERROR(ENOTSUP)); + } + + error = spa_history_log(spa, message); + spa_close(spa, FTAG); + return (error); +} + +/* + * The dp_config_rwlock must not be held when calling this, because the + * unmount may need to write out data. + * + * This function is best-effort. Callers must deal gracefully if it + * remains mounted (or is remounted after this call). + */ +void +zfs_unmount_snap(const char *snapname) +{ + vfs_t *vfsp; + zfsvfs_t *zfsvfs; + + if (strchr(snapname, '@') == NULL) + return; + + vfsp = zfs_get_vfs(snapname); + if (vfsp == NULL) + return; + + zfsvfs = vfsp->vfs_data; + ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os))); + + if (vn_vfswlock(vfsp->vfs_vnodecovered) != 0) { VFS_RELE(vfsp); - mtx_lock(&Giant); /* dounmount() */ - dounmount(vfsp, flag, curthread); - mtx_unlock(&Giant); /* dounmount() */ + return; } + VFS_RELE(vfsp); + + /* + * Always force the unmount for snapshots. + */ + +#ifdef illumos + (void) dounmount(vfsp, MS_FORCE, kcred); +#else + mtx_lock(&Giant); /* dounmount() */ + dounmount(vfsp, MS_FORCE, curthread); + mtx_unlock(&Giant); /* dounmount() */ +#endif +} + +/* ARGSUSED */ +static int +zfs_unmount_snap_cb(const char *snapname, void *arg) +{ + zfs_unmount_snap(snapname); return (0); } /* - * inputs: - * zc_name name of filesystem, snaps must be under it - * zc_nvlist_src[_size] full names of snapshots to destroy - * zc_defer_destroy mark for deferred destroy + * When a clone is destroyed, its origin may also need to be destroyed, + * in which case it must be unmounted. This routine will do that unmount + * if necessary. + */ +void +zfs_destroy_unmount_origin(const char *fsname) +{ + int error; + objset_t *os; + dsl_dataset_t *ds; + + error = dmu_objset_hold(fsname, FTAG, &os); + if (error != 0) + return; + ds = dmu_objset_ds(os); + if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) { + char originname[MAXNAMELEN]; + dsl_dataset_name(ds->ds_prev, originname); + dmu_objset_rele(os, FTAG); + zfs_unmount_snap(originname); + } else { + dmu_objset_rele(os, FTAG); + } +} + +/* + * innvl: { + * "snaps" -> { snapshot1, snapshot2 } + * (optional boolean) "defer" + * } + * + * outnvl: snapshot -> error code (int32) * - * outputs: - * zc_name on failure, name of failed snapshot */ static int -zfs_ioc_destroy_snaps_nvl(zfs_cmd_t *zc) +zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) { - int err, len; - nvlist_t *nvl; + int poollen; + nvlist_t *snaps; nvpair_t *pair; + boolean_t defer; - if ((err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, - zc->zc_iflags, &nvl)) != 0) { -#ifndef __FreeBSD__ - return (err); -#else - /* - * We are probably called by older binaries, - * allocate and populate nvlist with recursive snapshots - */ - if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) - return (EINVAL); - VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); - err = dmu_get_recursive_snaps_nvl(zc->zc_name, - zc->zc_value, nvl); - if (err) { - nvlist_free(nvl); - return (err); - } -#endif /* __FreeBSD__ */ - } + if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) + return (SET_ERROR(EINVAL)); + defer = nvlist_exists(innvl, "defer"); - len = strlen(zc->zc_name); - for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL; - pair = nvlist_next_nvpair(nvl, pair)) { + poollen = strlen(poolname); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { const char *name = nvpair_name(pair); + /* - * The snap name must be underneath the zc_name. This ensures - * that our permission checks were legitimate. + * The snap must be in the specified pool. */ - if (strncmp(zc->zc_name, name, len) != 0 || - (name[len] != '@' && name[len] != '/')) { - nvlist_free(nvl); - return (EINVAL); - } + if (strncmp(name, poolname, poollen) != 0 || + (name[poollen] != '/' && name[poollen] != '@')) + return (SET_ERROR(EXDEV)); - (void) zfs_unmount_snap(name, NULL); + zfs_unmount_snap(name); (void) zvol_remove_minor(name); } - err = dmu_snapshots_destroy_nvl(nvl, zc->zc_defer_destroy, - zc->zc_name); - nvlist_free(nvl); - return (err); + return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl)); } /* @@ -3314,13 +3531,13 @@ static int zfs_ioc_destroy(zfs_cmd_t *zc) { int err; - if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) { - err = zfs_unmount_snap(zc->zc_name, NULL); - if (err) - return (err); - } + if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) + zfs_unmount_snap(zc->zc_name); - err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy); + if (strchr(zc->zc_name, '@')) + err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy); + else + err = dsl_destroy_head(zc->zc_name); if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0) (void) zvol_remove_minor(zc->zc_name); return (err); @@ -3335,79 +3552,34 @@ zfs_ioc_destroy(zfs_cmd_t *zc) static int zfs_ioc_rollback(zfs_cmd_t *zc) { - dsl_dataset_t *ds, *clone; - int error; zfsvfs_t *zfsvfs; - char *clone_name; - - error = dsl_dataset_hold(zc->zc_name, FTAG, &ds); - if (error) - return (error); - - /* must not be a snapshot */ - if (dsl_dataset_is_snapshot(ds)) { - dsl_dataset_rele(ds, FTAG); - return (EINVAL); - } - - /* must have a most recent snapshot */ - if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) { - dsl_dataset_rele(ds, FTAG); - return (EINVAL); - } - - /* - * Create clone of most recent snapshot. - */ - clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name); - error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT); - if (error) - goto out; - - error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone); - if (error) - goto out; + int error; - /* - * Do clone swap. - */ if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) { error = zfs_suspend_fs(zfsvfs); if (error == 0) { int resume_err; - if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) { - error = dsl_dataset_clone_swap(clone, ds, - B_TRUE); - dsl_dataset_disown(ds, FTAG); - ds = NULL; - } else { - error = EBUSY; - } + error = dsl_dataset_rollback(zc->zc_name); resume_err = zfs_resume_fs(zfsvfs, zc->zc_name); error = error ? error : resume_err; } VFS_RELE(zfsvfs->z_vfs); } else { - if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) { - error = dsl_dataset_clone_swap(clone, ds, B_TRUE); - dsl_dataset_disown(ds, FTAG); - ds = NULL; - } else { - error = EBUSY; - } + error = dsl_dataset_rollback(zc->zc_name); } + return (error); +} - /* - * Destroy clone (which also closes it). - */ - (void) dsl_dataset_destroy(clone, FTAG, B_FALSE); +static int +recursive_unmount(const char *fsname, void *arg) +{ + const char *snapname = arg; + char fullname[MAXNAMELEN]; -out: - strfree(clone_name); - if (ds) - dsl_dataset_rele(ds, FTAG); - return (error); + (void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname); + zfs_unmount_snap(fullname); + return (0); } /* @@ -3421,31 +3593,43 @@ out: static int zfs_ioc_rename(zfs_cmd_t *zc) { - int flags = 0; - - if (zc->zc_cookie & 1) - flags |= ZFS_RENAME_RECURSIVE; - if (zc->zc_cookie & 2) - flags |= ZFS_RENAME_ALLOW_MOUNTED; + boolean_t recursive = zc->zc_cookie & 1; +#ifdef __FreeBSD__ + boolean_t allow_mounted = zc->zc_cookie & 2; +#endif + char *at; zc->zc_value[sizeof (zc->zc_value) - 1] = '\0'; if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 || strchr(zc->zc_value, '%')) - return (EINVAL); + return (SET_ERROR(EINVAL)); - /* - * Unmount snapshot unless we're doing a recursive rename, - * in which case the dataset code figures out which snapshots - * to unmount. - */ - if (!(flags & ZFS_RENAME_RECURSIVE) && - strchr(zc->zc_name, '@') != NULL && - zc->zc_objset_type == DMU_OST_ZFS) { - int err = zfs_unmount_snap(zc->zc_name, NULL); - if (err) - return (err); + at = strchr(zc->zc_name, '@'); + if (at != NULL) { + /* snaps must be in same fs */ + if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1)) + return (SET_ERROR(EXDEV)); + *at = '\0'; +#ifdef illumos + if (zc->zc_objset_type == DMU_OST_ZFS) { +#else + if (zc->zc_objset_type == DMU_OST_ZFS && allow_mounted) { +#endif + int error = dmu_objset_find(zc->zc_name, + recursive_unmount, at + 1, + recursive ? DS_FIND_CHILDREN : 0); + if (error != 0) + return (error); + } + return (dsl_dataset_rename_snapshot(zc->zc_name, + at + 1, strchr(zc->zc_value, '@') + 1, recursive)); + } else { +#ifdef illumos + if (zc->zc_objset_type == DMU_OST_ZVOL) + (void) zvol_remove_minor(zc->zc_name); +#endif + return (dsl_dir_rename(zc->zc_name, zc->zc_value)); } - return (dmu_objset_rename(zc->zc_name, zc->zc_value, flags)); } static int @@ -3480,7 +3664,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) perm = ZFS_DELEG_PERM_GROUPQUOTA; } else { /* USERUSED and GROUPUSED are read-only */ - return (EINVAL); + return (SET_ERROR(EINVAL)); } if (err = zfs_secpolicy_write_perms(dsname, perm, cr)) @@ -3488,11 +3672,11 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) return (0); } - return (EINVAL); + return (SET_ERROR(EINVAL)); } if (issnap) - return (EINVAL); + return (SET_ERROR(EINVAL)); if (nvpair_type(pair) == DATA_TYPE_NVLIST) { /* @@ -3521,13 +3705,13 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) intval <= ZIO_COMPRESS_GZIP_9 && zfs_earlier_version(dsname, SPA_VERSION_GZIP_COMPRESSION)) { - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } if (intval == ZIO_COMPRESS_ZLE && zfs_earlier_version(dsname, SPA_VERSION_ZLE_COMPRESSION)) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); if (intval == ZIO_COMPRESS_LZ4) { zfeature_info_t *feature = @@ -3540,7 +3724,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) if (!spa_feature_is_enabled(spa, feature)) { spa_close(spa, FTAG); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } spa_close(spa, FTAG); } @@ -3554,24 +3738,24 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) */ if (zfs_is_bootfs(dsname) && !BOOTFS_COMPRESS_VALID(intval)) { - return (ERANGE); + return (SET_ERROR(ERANGE)); } } break; case ZFS_PROP_COPIES: if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS)) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); break; case ZFS_PROP_DEDUP: if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP)) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); break; case ZFS_PROP_SHARESMB: if (zpl_earlier_version(dsname, ZPL_VERSION_FUID)) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); break; case ZFS_PROP_ACLINHERIT: @@ -3580,7 +3764,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) if (intval == ZFS_ACL_PASSTHROUGH_X && zfs_earlier_version(dsname, SPA_VERSION_PASSTHROUGH_X)) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } break; } @@ -3588,41 +3772,20 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) return (zfs_secpolicy_setprop(dsname, prop, pair, CRED())); } -/* - * Activates a feature on a pool in response to a property setting. This - * creates a new sync task which modifies the pool to reflect the feature - * as being active. - */ -static int -zfs_prop_activate_feature(dsl_pool_t *dp, zfeature_info_t *feature) -{ - int err; - - /* EBUSY here indicates that the feature is already active */ - err = dsl_sync_task_do(dp, zfs_prop_activate_feature_check, - zfs_prop_activate_feature_sync, dp->dp_spa, feature, 2); - - if (err != 0 && err != EBUSY) - return (err); - else - return (0); -} - /* * Checks for a race condition to make sure we don't increment a feature flag * multiple times. */ -/*ARGSUSED*/ static int -zfs_prop_activate_feature_check(void *arg1, void *arg2, dmu_tx_t *tx) +zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; - zfeature_info_t *feature = arg2; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; + zfeature_info_t *feature = arg; if (!spa_feature_is_active(spa, feature)) return (0); else - return (EBUSY); + return (SET_ERROR(EBUSY)); } /* @@ -3630,14 +3793,35 @@ zfs_prop_activate_feature_check(void *arg1, void *arg2, dmu_tx_t *tx) * zfs_prop_activate_feature. */ static void -zfs_prop_activate_feature_sync(void *arg1, void *arg2, dmu_tx_t *tx) +zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; - zfeature_info_t *feature = arg2; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; + zfeature_info_t *feature = arg; spa_feature_incr(spa, feature, tx); } +/* + * Activates a feature on a pool in response to a property setting. This + * creates a new sync task which modifies the pool to reflect the feature + * as being active. + */ +static int +zfs_prop_activate_feature(spa_t *spa, zfeature_info_t *feature) +{ + int err; + + /* EBUSY here indicates that the feature is already active */ + err = dsl_sync_task(spa_name(spa), + zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync, + feature, 2); + + if (err != 0 && err != EBUSY) + return (err); + else + return (0); +} + /* * Removes properties from the given props list that fail permission checks * needed to clear them and to restore them in case of a receive error. For each @@ -3673,7 +3857,7 @@ zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist) (void) strcpy(zc->zc_value, nvpair_name(pair)); if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 || - (err = zfs_secpolicy_inherit(zc, CRED())) != 0) { + (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) { VERIFY(nvlist_remove_nvpair(props, pair) == 0); VERIFY(nvlist_add_int32(errors, zc->zc_value, err) == 0); @@ -3792,7 +3976,6 @@ static int zfs_ioc_recv(zfs_cmd_t *zc) { file_t *fp; - objset_t *os; dmu_recv_cookie_t drc; boolean_t force = (boolean_t)zc->zc_guid; int fd; @@ -3802,7 +3985,7 @@ zfs_ioc_recv(zfs_cmd_t *zc) offset_t off; nvlist_t *props = NULL; /* sent properties */ nvlist_t *origprops = NULL; /* existing properties */ - objset_t *origin = NULL; + char *origin = NULL; char *tosnap; char tofs[ZFS_MAXNAMELEN]; boolean_t first_recvd_props = B_FALSE; @@ -3810,7 +3993,7 @@ zfs_ioc_recv(zfs_cmd_t *zc) if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 || strchr(zc->zc_value, '@') == NULL || strchr(zc->zc_value, '%')) - return (EINVAL); + return (SET_ERROR(EINVAL)); (void) strcpy(tofs, zc->zc_value); tosnap = strchr(tofs, '@'); @@ -3825,23 +4008,36 @@ zfs_ioc_recv(zfs_cmd_t *zc) fp = getf(fd); if (fp == NULL) { nvlist_free(props); - return (EBADF); + return (SET_ERROR(EBADF)); } VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); - if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) { - if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) && - !dsl_prop_get_hasrecvd(os)) { - first_recvd_props = B_TRUE; - } + if (zc->zc_string[0]) + origin = zc->zc_string; - /* - * If new received properties are supplied, they are to - * completely replace the existing received properties, so stash - * away the existing ones. - */ - if (dsl_prop_get_received(os, &origprops) == 0) { + error = dmu_recv_begin(tofs, tosnap, + &zc->zc_begin_record, force, origin, &drc); + if (error != 0) + goto out; + + /* + * Set properties before we receive the stream so that they are applied + * to the new data. Note that we must call dmu_recv_stream() if + * dmu_recv_begin() succeeds. + */ + if (props != NULL && !drc.drc_newfs) { + if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >= + SPA_VERSION_RECVD_PROPS && + !dsl_prop_get_hasrecvd(tofs)) + first_recvd_props = B_TRUE; + + /* + * If new received properties are supplied, they are to + * completely replace the existing received properties, so stash + * away the existing ones. + */ + if (dsl_prop_get_received(tofs, &origprops) == 0) { nvlist_t *errlist = NULL; /* * Don't bother writing a property if its value won't @@ -3853,65 +4049,35 @@ zfs_ioc_recv(zfs_cmd_t *zc) */ if (!first_recvd_props) props_reduce(props, origprops); - if (zfs_check_clearable(tofs, origprops, - &errlist) != 0) + if (zfs_check_clearable(tofs, origprops, &errlist) != 0) (void) nvlist_merge(errors, errlist, 0); nvlist_free(errlist); - } - - dmu_objset_rele(os, FTAG); - } - - if (zc->zc_string[0]) { - error = dmu_objset_hold(zc->zc_string, FTAG, &origin); - if (error) - goto out; - } - error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds, - &zc->zc_begin_record, force, origin, &drc); - if (origin) - dmu_objset_rele(origin, FTAG); - if (error) - goto out; - - /* - * Set properties before we receive the stream so that they are applied - * to the new data. Note that we must call dmu_recv_stream() if - * dmu_recv_begin() succeeds. - */ - if (props) { - nvlist_t *errlist; - - if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) { - if (drc.drc_newfs) { - if (spa_version(os->os_spa) >= - SPA_VERSION_RECVD_PROPS) - first_recvd_props = B_TRUE; - } else if (origprops != NULL) { - if (clear_received_props(os, tofs, origprops, - first_recvd_props ? NULL : props) != 0) - zc->zc_obj |= ZPROP_ERR_NOCLEAR; - } else { + if (clear_received_props(tofs, origprops, + first_recvd_props ? NULL : props) != 0) zc->zc_obj |= ZPROP_ERR_NOCLEAR; - } - dsl_prop_set_hasrecvd(os); - } else if (!drc.drc_newfs) { + } else { zc->zc_obj |= ZPROP_ERR_NOCLEAR; } + } + + if (props != NULL) { + props_error = dsl_prop_set_hasrecvd(tofs); - (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED, - props, &errlist); - (void) nvlist_merge(errors, errlist, 0); - nvlist_free(errlist); + if (props_error == 0) { + (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED, + props, errors); + } } - if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) { + if (zc->zc_nvlist_dst_size != 0 && + (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 || + put_nvlist(zc, errors) != 0)) { /* * Caller made zc->zc_nvlist_dst less than the minimum expected * size or supplied an invalid address. */ - props_error = EINVAL; + props_error = SET_ERROR(EINVAL); } off = fp->f_offset; @@ -3959,22 +4125,16 @@ zfs_ioc_recv(zfs_cmd_t *zc) /* * On error, restore the original props. */ - if (error && props) { - if (dmu_objset_hold(tofs, FTAG, &os) == 0) { - if (clear_received_props(os, tofs, props, NULL) != 0) { - /* - * We failed to clear the received properties. - * Since we may have left a $recvd value on the - * system, we can't clear the $hasrecvd flag. - */ - zc->zc_obj |= ZPROP_ERR_NORESTORE; - } else if (first_recvd_props) { - dsl_prop_unset_hasrecvd(os); - } - dmu_objset_rele(os, FTAG); - } else if (!drc.drc_newfs) { - /* We failed to clear the received properties. */ + if (error != 0 && props != NULL && !drc.drc_newfs) { + if (clear_received_props(tofs, props, NULL) != 0) { + /* + * We failed to clear the received properties. + * Since we may have left a $recvd value on the + * system, we can't clear the $hasrecvd flag. + */ zc->zc_obj |= ZPROP_ERR_NORESTORE; + } else if (first_recvd_props) { + dsl_prop_unset_hasrecvd(tofs); } if (origprops == NULL && !drc.drc_newfs) { @@ -4026,78 +4186,79 @@ out: static int zfs_ioc_send(zfs_cmd_t *zc) { - objset_t *fromsnap = NULL; - objset_t *tosnap; int error; offset_t off; - dsl_dataset_t *ds; - dsl_dataset_t *dsfrom = NULL; - spa_t *spa; - dsl_pool_t *dp; boolean_t estimate = (zc->zc_guid != 0); - error = spa_open(zc->zc_name, &spa, FTAG); - if (error) - return (error); + if (zc->zc_obj != 0) { + dsl_pool_t *dp; + dsl_dataset_t *tosnap; - dp = spa_get_dsl(spa); - rw_enter(&dp->dp_config_rwlock, RW_READER); - error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); - rw_exit(&dp->dp_config_rwlock); - if (error) { - spa_close(spa, FTAG); - return (error); - } + error = dsl_pool_hold(zc->zc_name, FTAG, &dp); + if (error != 0) + return (error); - error = dmu_objset_from_ds(ds, &tosnap); - if (error) { - dsl_dataset_rele(ds, FTAG); - spa_close(spa, FTAG); - return (error); + error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap); + if (error != 0) { + dsl_pool_rele(dp, FTAG); + return (error); + } + + if (dsl_dir_is_clone(tosnap->ds_dir)) + zc->zc_fromobj = tosnap->ds_dir->dd_phys->dd_origin_obj; + dsl_dataset_rele(tosnap, FTAG); + dsl_pool_rele(dp, FTAG); } - if (zc->zc_fromobj != 0) { - rw_enter(&dp->dp_config_rwlock, RW_READER); - error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom); - rw_exit(&dp->dp_config_rwlock); - spa_close(spa, FTAG); - if (error) { - dsl_dataset_rele(ds, FTAG); + if (estimate) { + dsl_pool_t *dp; + dsl_dataset_t *tosnap; + dsl_dataset_t *fromsnap = NULL; + + error = dsl_pool_hold(zc->zc_name, FTAG, &dp); + if (error != 0) return (error); - } - error = dmu_objset_from_ds(dsfrom, &fromsnap); - if (error) { - dsl_dataset_rele(dsfrom, FTAG); - dsl_dataset_rele(ds, FTAG); + + error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap); + if (error != 0) { + dsl_pool_rele(dp, FTAG); return (error); } - } else { - spa_close(spa, FTAG); - } - if (estimate) { - error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj, + if (zc->zc_fromobj != 0) { + error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, + FTAG, &fromsnap); + if (error != 0) { + dsl_dataset_rele(tosnap, FTAG); + dsl_pool_rele(dp, FTAG); + return (error); + } + } + + error = dmu_send_estimate(tosnap, fromsnap, &zc->zc_objset_type); + + if (fromsnap != NULL) + dsl_dataset_rele(fromsnap, FTAG); + dsl_dataset_rele(tosnap, FTAG); + dsl_pool_rele(dp, FTAG); } else { file_t *fp = getf(zc->zc_cookie); - if (fp == NULL) { - dsl_dataset_rele(ds, FTAG); - if (dsfrom) - dsl_dataset_rele(dsfrom, FTAG); - return (EBADF); - } + if (fp == NULL) + return (SET_ERROR(EBADF)); off = fp->f_offset; - error = dmu_send(tosnap, fromsnap, zc->zc_obj, - zc->zc_cookie, fp, &off); + error = dmu_send_obj(zc->zc_name, zc->zc_sendobj, +#ifdef illumos + zc->zc_fromobj, zc->zc_cookie, fp->f_vnode, &off); +#else + zc->zc_fromobj, zc->zc_cookie, fp, &off); +#endif if (off >= 0 && off <= MAXOFFSET_T) fp->f_offset = off; releasef(zc->zc_cookie); } - if (dsfrom) - dsl_dataset_rele(dsfrom, FTAG); - dsl_dataset_rele(ds, FTAG); return (error); } @@ -4112,13 +4273,21 @@ zfs_ioc_send(zfs_cmd_t *zc) static int zfs_ioc_send_progress(zfs_cmd_t *zc) { + dsl_pool_t *dp; dsl_dataset_t *ds; dmu_sendarg_t *dsp = NULL; int error; - if ((error = dsl_dataset_hold(zc->zc_name, FTAG, &ds)) != 0) + error = dsl_pool_hold(zc->zc_name, FTAG, &dp); + if (error != 0) return (error); + error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds); + if (error != 0) { + dsl_pool_rele(dp, FTAG); + return (error); + } + mutex_enter(&ds->ds_sendstream_lock); /* @@ -4137,10 +4306,11 @@ zfs_ioc_send_progress(zfs_cmd_t *zc) if (dsp != NULL) zc->zc_cookie = *(dsp->dsa_off); else - error = ENOENT; + error = SET_ERROR(ENOENT); mutex_exit(&ds->ds_sendstream_lock); dsl_dataset_rele(ds, FTAG); + dsl_pool_rele(dp, FTAG); return (error); } @@ -4214,7 +4384,7 @@ zfs_ioc_clear(zfs_cmd_t *zc) spa = spa_lookup(zc->zc_name); if (spa == NULL) { mutex_exit(&spa_namespace_lock); - return (EIO); + return (SET_ERROR(EIO)); } if (spa_get_log_state(spa) == SPA_LOG_MISSING) { /* we need to let spa_open/spa_load clear the chains */ @@ -4230,7 +4400,7 @@ zfs_ioc_clear(zfs_cmd_t *zc) nvlist_t *config = NULL; if (zc->zc_nvlist_src == 0) - return (EINVAL); + return (SET_ERROR(EINVAL)); if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) { @@ -4247,7 +4417,7 @@ zfs_ioc_clear(zfs_cmd_t *zc) } } - if (error) + if (error != 0) return (error); spa_vdev_state_enter(spa, SCL_NONE); @@ -4259,7 +4429,7 @@ zfs_ioc_clear(zfs_cmd_t *zc) if (vd == NULL) { (void) spa_vdev_state_exit(spa, NULL, ENODEV); spa_close(spa, FTAG); - return (ENODEV); + return (SET_ERROR(ENODEV)); } } @@ -4271,7 +4441,7 @@ zfs_ioc_clear(zfs_cmd_t *zc) * Resume any suspended I/Os. */ if (zio_resume(spa) != 0) - error = EIO; + error = SET_ERROR(EIO); spa_close(spa, FTAG); @@ -4285,7 +4455,7 @@ zfs_ioc_pool_reopen(zfs_cmd_t *zc) int error; error = spa_open(zc->zc_name, &spa, FTAG); - if (error) + if (error != 0) return (error); spa_vdev_state_enter(spa, SCL_NONE); @@ -4325,7 +4495,7 @@ zfs_ioc_promote(zfs_cmd_t *zc) if (cp) *cp = '\0'; (void) dmu_objset_find(zc->zc_value, - zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS); + zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS); return (dsl_dataset_promote(zc->zc_name, zc->zc_string)); } @@ -4348,10 +4518,10 @@ zfs_ioc_userspace_one(zfs_cmd_t *zc) int error; if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS) - return (EINVAL); + return (SET_ERROR(EINVAL)); error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE); - if (error) + if (error != 0) return (error); error = zfs_userspace_one(zfsvfs, @@ -4379,10 +4549,10 @@ zfs_ioc_userspace_many(zfs_cmd_t *zc) int bufsize = zc->zc_nvlist_dst_size; if (bufsize <= 0) - return (ENOMEM); + return (SET_ERROR(ENOMEM)); int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE); - if (error) + if (error != 0) return (error); void *buf = kmem_alloc(bufsize, KM_SLEEP); @@ -4432,7 +4602,7 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc) } else { /* XXX kind of reading contents without owning */ error = dmu_objset_hold(zc->zc_name, FTAG, &os); - if (error) + if (error != 0) return (error); error = dmu_objset_userspace_upgrade(os); @@ -4474,12 +4644,12 @@ zfs_init_sharefs() if (sharefs_mod == NULL && ((sharefs_mod = ddi_modopen("fs/sharefs", KRTLD_MODE_FIRST, &error)) == NULL)) { - return (ENOSYS); + return (SET_ERROR(ENOSYS)); } if (zshare_fs == NULL && ((zshare_fs = (int (*)(enum sharefs_sys_op, share_t *, uint32_t)) ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) { - return (ENOSYS); + return (SET_ERROR(ENOSYS)); } return (0); } @@ -4500,19 +4670,19 @@ zfs_ioc_share(zfs_cmd_t *zc) if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs", KRTLD_MODE_FIRST, &error)) == NULL)) { mutex_exit(&zfs_share_lock); - return (ENOSYS); + return (SET_ERROR(ENOSYS)); } if (znfsexport_fs == NULL && ((znfsexport_fs = (int (*)(void *)) ddi_modsym(nfs_mod, "nfs_export", &error)) == NULL)) { mutex_exit(&zfs_share_lock); - return (ENOSYS); + return (SET_ERROR(ENOSYS)); } error = zfs_init_sharefs(); - if (error) { + if (error != 0) { mutex_exit(&zfs_share_lock); - return (ENOSYS); + return (SET_ERROR(ENOSYS)); } zfs_nfsshare_inited = 1; mutex_exit(&zfs_share_lock); @@ -4526,25 +4696,25 @@ zfs_ioc_share(zfs_cmd_t *zc) ddi_modopen("drv/smbsrv", KRTLD_MODE_FIRST, &error)) == NULL)) { mutex_exit(&zfs_share_lock); - return (ENOSYS); + return (SET_ERROR(ENOSYS)); } if (zsmbexport_fs == NULL && ((zsmbexport_fs = (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod, "smb_server_share", &error)) == NULL)) { mutex_exit(&zfs_share_lock); - return (ENOSYS); + return (SET_ERROR(ENOSYS)); } error = zfs_init_sharefs(); - if (error) { + if (error != 0) { mutex_exit(&zfs_share_lock); - return (ENOSYS); + return (SET_ERROR(ENOSYS)); } zfs_smbshare_inited = 1; mutex_exit(&zfs_share_lock); } break; default: - return (EINVAL); + return (SET_ERROR(EINVAL)); } switch (zc->zc_share.z_sharetype) { @@ -4603,7 +4773,7 @@ zfs_ioc_next_obj(zfs_cmd_t *zc) int error; error = dmu_objset_hold(zc->zc_name, FTAG, &os); - if (error) + if (error != 0) return (error); error = dmu_object_next(os, &zc->zc_obj, B_FALSE, @@ -4620,31 +4790,32 @@ zfs_ioc_next_obj(zfs_cmd_t *zc) * zc_cleanup_fd cleanup-on-exit file descriptor for calling process * * outputs: + * zc_value short name of new snapshot */ static int zfs_ioc_tmp_snapshot(zfs_cmd_t *zc) { char *snap_name; + char *hold_name; int error; + minor_t minor; + + error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor); + if (error != 0) + return (error); snap_name = kmem_asprintf("%s-%016llx", zc->zc_value, (u_longlong_t)ddi_get_lbolt64()); + hold_name = kmem_asprintf("%%%s", zc->zc_value); - if (strlen(snap_name) >= MAXNAMELEN) { - strfree(snap_name); - return (E2BIG); - } - - error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name, - NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd); - if (error != 0) { - strfree(snap_name); - return (error); - } - - (void) strcpy(zc->zc_value, snap_name); + error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor, + hold_name); + if (error == 0) + (void) strcpy(zc->zc_value, snap_name); strfree(snap_name); - return (0); + strfree(hold_name); + zfs_onexit_fd_rele(zc->zc_cleanup_fd); + return (error); } /* @@ -4659,39 +4830,26 @@ zfs_ioc_tmp_snapshot(zfs_cmd_t *zc) static int zfs_ioc_diff(zfs_cmd_t *zc) { - objset_t *fromsnap; - objset_t *tosnap; file_t *fp; offset_t off; int error; - error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap); - if (error) - return (error); - - error = dmu_objset_hold(zc->zc_value, FTAG, &fromsnap); - if (error) { - dmu_objset_rele(tosnap, FTAG); - return (error); - } - fp = getf(zc->zc_cookie); - if (fp == NULL) { - dmu_objset_rele(fromsnap, FTAG); - dmu_objset_rele(tosnap, FTAG); - return (EBADF); - } + if (fp == NULL) + return (SET_ERROR(EBADF)); off = fp->f_offset; - error = dmu_diff(tosnap, fromsnap, fp, &off); +#ifdef illumos + error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off); +#else + error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off); +#endif if (off >= 0 && off <= MAXOFFSET_T) fp->f_offset = off; releasef(zc->zc_cookie); - dmu_objset_rele(fromsnap, FTAG); - dmu_objset_rele(tosnap, FTAG); return (error); } @@ -4744,7 +4902,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc) (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource), zc->zc_name) != 0)) { VN_RELE(vp); - return (EINVAL); + return (SET_ERROR(EINVAL)); } dzp = VTOZ(vp); @@ -4763,13 +4921,13 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc) ZFS_SHARES_DIR); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); error = dmu_tx_assign(tx, TXG_WAIT); - if (error) { + if (error != 0) { dmu_tx_abort(tx); } else { error = zfs_create_share_dir(zfsvfs, tx); dmu_tx_commit(tx); } - if (error) { + if (error != 0) { mutex_exit(&zfsvfs->z_lock); VN_RELE(vp); ZFS_EXIT(zfsvfs); @@ -4835,7 +4993,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc) break; default: - error = EINVAL; + error = SET_ERROR(EINVAL); break; } @@ -4851,124 +5009,82 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc) } /* - * inputs: - * zc_name name of filesystem - * zc_value short name of snap - * zc_string user-supplied tag for this hold - * zc_cookie recursive flag - * zc_temphold set if hold is temporary - * zc_cleanup_fd cleanup-on-exit file descriptor for calling process - * zc_sendobj if non-zero, the objid for zc_name@zc_value - * zc_createtxg if zc_sendobj is non-zero, snap must have zc_createtxg + * innvl: { + * "holds" -> { snapname -> holdname (string), ... } + * (optional) "cleanup_fd" -> fd (int32) + * } * - * outputs: none + * outnvl: { + * snapname -> error value (int32) + * ... + * } */ +/* ARGSUSED */ static int -zfs_ioc_hold(zfs_cmd_t *zc) +zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist) { - boolean_t recursive = zc->zc_cookie; - spa_t *spa; - dsl_pool_t *dp; - dsl_dataset_t *ds; + nvlist_t *holds; + int cleanup_fd = -1; int error; minor_t minor = 0; - if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) - return (EINVAL); - - if (zc->zc_sendobj == 0) { - return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value, - zc->zc_string, recursive, zc->zc_temphold, - zc->zc_cleanup_fd)); - } - - if (recursive) - return (EINVAL); - - error = spa_open(zc->zc_name, &spa, FTAG); - if (error) - return (error); - - dp = spa_get_dsl(spa); - rw_enter(&dp->dp_config_rwlock, RW_READER); - error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); - rw_exit(&dp->dp_config_rwlock); - spa_close(spa, FTAG); - if (error) - return (error); - - /* - * Until we have a hold on this snapshot, it's possible that - * zc_sendobj could've been destroyed and reused as part - * of a later txg. Make sure we're looking at the right object. - */ - if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) { - dsl_dataset_rele(ds, FTAG); - return (ENOENT); - } + error = nvlist_lookup_nvlist(args, "holds", &holds); + if (error != 0) + return (SET_ERROR(EINVAL)); - if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) { - error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor); - if (error) { - dsl_dataset_rele(ds, FTAG); + if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) { + error = zfs_onexit_fd_hold(cleanup_fd, &minor); + if (error != 0) return (error); - } } - error = dsl_dataset_user_hold_for_send(ds, zc->zc_string, - zc->zc_temphold); - if (minor != 0) { - if (error == 0) { - dsl_register_onexit_hold_cleanup(ds, zc->zc_string, - minor); - } - zfs_onexit_fd_rele(zc->zc_cleanup_fd); - } - dsl_dataset_rele(ds, FTAG); - + error = dsl_dataset_user_hold(holds, minor, errlist); + if (minor != 0) + zfs_onexit_fd_rele(cleanup_fd); return (error); } /* - * inputs: - * zc_name name of dataset from which we're releasing a user hold - * zc_value short name of snap - * zc_string user-supplied tag for this hold - * zc_cookie recursive flag + * innvl is not used. * - * outputs: none + * outnvl: { + * holdname -> time added (uint64 seconds since epoch) + * ... + * } */ +/* ARGSUSED */ static int -zfs_ioc_release(zfs_cmd_t *zc) +zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl) { - boolean_t recursive = zc->zc_cookie; - - if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) - return (EINVAL); - - return (dsl_dataset_user_release(zc->zc_name, zc->zc_value, - zc->zc_string, recursive)); + return (dsl_dataset_get_holds(snapname, outnvl)); } /* - * inputs: - * zc_name name of filesystem + * innvl: { + * snapname -> { holdname, ... } + * ... + * } * - * outputs: - * zc_nvlist_src{_size} nvlist of snapshot holds + * outnvl: { + * snapname -> error value (int32) + * ... + * } */ +/* ARGSUSED */ static int -zfs_ioc_get_holds(zfs_cmd_t *zc) +zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist) { - nvlist_t *nvp; - int error; + nvpair_t *pair; - if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) { - error = put_nvlist(zc, nvp); - nvlist_free(nvp); - } + /* + * The release may cause the snapshot to be destroyed; make sure it + * is not mounted. + */ + for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(holds, pair)) + zfs_unmount_snap(nvpair_name(pair)); - return (error); + return (dsl_dataset_user_release(holds, errlist)); } /* @@ -4985,14 +5101,21 @@ static int zfs_ioc_space_written(zfs_cmd_t *zc) { int error; + dsl_pool_t *dp; dsl_dataset_t *new, *old; - error = dsl_dataset_hold(zc->zc_name, FTAG, &new); + error = dsl_pool_hold(zc->zc_name, FTAG, &dp); if (error != 0) return (error); - error = dsl_dataset_hold(zc->zc_value, FTAG, &old); + error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new); + if (error != 0) { + dsl_pool_rele(dp, FTAG); + return (error); + } + error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old); if (error != 0) { dsl_dataset_rele(new, FTAG); + dsl_pool_rele(dp, FTAG); return (error); } @@ -5000,46 +5123,59 @@ zfs_ioc_space_written(zfs_cmd_t *zc) &zc->zc_objset_type, &zc->zc_perm_action); dsl_dataset_rele(old, FTAG); dsl_dataset_rele(new, FTAG); + dsl_pool_rele(dp, FTAG); return (error); } /* - * inputs: - * zc_name full name of last snapshot - * zc_value full name of first snapshot + * innvl: { + * "firstsnap" -> snapshot name + * } * - * outputs: - * zc_cookie space in bytes - * zc_objset_type compressed space in bytes - * zc_perm_action uncompressed space in bytes + * outnvl: { + * "used" -> space in bytes + * "compressed" -> compressed space in bytes + * "uncompressed" -> uncompressed space in bytes + * } */ static int -zfs_ioc_space_snaps(zfs_cmd_t *zc) +zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl) { int error; + dsl_pool_t *dp; dsl_dataset_t *new, *old; + char *firstsnap; + uint64_t used, comp, uncomp; + + if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0) + return (SET_ERROR(EINVAL)); - error = dsl_dataset_hold(zc->zc_name, FTAG, &new); + error = dsl_pool_hold(lastsnap, FTAG, &dp); if (error != 0) return (error); - error = dsl_dataset_hold(zc->zc_value, FTAG, &old); + + error = dsl_dataset_hold(dp, lastsnap, FTAG, &new); + if (error != 0) { + dsl_pool_rele(dp, FTAG); + return (error); + } + error = dsl_dataset_hold(dp, firstsnap, FTAG, &old); if (error != 0) { dsl_dataset_rele(new, FTAG); + dsl_pool_rele(dp, FTAG); return (error); } - error = dsl_dataset_space_wouldfree(old, new, &zc->zc_cookie, - &zc->zc_objset_type, &zc->zc_perm_action); + error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp); dsl_dataset_rele(old, FTAG); dsl_dataset_rele(new, FTAG); + dsl_pool_rele(dp, FTAG); + fnvlist_add_uint64(outnvl, "used", used); + fnvlist_add_uint64(outnvl, "compressed", comp); + fnvlist_add_uint64(outnvl, "uncompressed", uncomp); return (error); } -/* - * pool create, destroy, and export don't log the history as part of - * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export - * do the logging of those commands. - */ static int zfs_ioc_jail(zfs_cmd_t *zc) { @@ -5056,138 +5192,390 @@ zfs_ioc_unjail(zfs_cmd_t *zc) (int)zc->zc_jailid)); } -static zfs_ioc_vec_t zfs_ioc_vec[] = { - { zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_destroy, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_configs, zfs_secpolicy_none, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE, - POOL_CHECK_READONLY }, - { zfs_ioc_pool_upgrade, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_set_state, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_setpath, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_setfru, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_objset_stats, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY}, - { zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_rename, zfs_secpolicy_rename, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_inject_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_NONE }, - { zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_destroy_snaps_nvl, zfs_secpolicy_destroy_recursive, - DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_pool_set_props, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, DATASET_NAME, - B_FALSE, POOL_CHECK_NONE }, - { zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, DATASET_NAME, - B_FALSE, POOL_CHECK_NONE }, - { zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade, - DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME, - B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_jail, zfs_secpolicy_config, DATASET_NAME, B_TRUE, - POOL_CHECK_NONE }, - { zfs_ioc_unjail, zfs_secpolicy_config, DATASET_NAME, B_TRUE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_space_written, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_send_progress, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED }, -}; +/* + * innvl: { + * "fd" -> file descriptor to write stream to (int32) + * (optional) "fromsnap" -> full snap name to send an incremental from + * } + * + * outnvl is unused + */ +/* ARGSUSED */ +static int +zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) +{ + int error; + offset_t off; + char *fromname = NULL; + int fd; + + error = nvlist_lookup_int32(innvl, "fd", &fd); + if (error != 0) + return (SET_ERROR(EINVAL)); + + (void) nvlist_lookup_string(innvl, "fromsnap", &fromname); + + file_t *fp = getf(fd); + if (fp == NULL) + return (SET_ERROR(EBADF)); + + off = fp->f_offset; +#ifdef illumos + error = dmu_send(snapname, fromname, fd, fp->f_vnode, &off); +#else + error = dmu_send(snapname, fromname, fd, fp, &off); +#endif + +#ifdef illumos + if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) + fp->f_offset = off; +#else + fp->f_offset = off; +#endif + + releasef(fd); + return (error); +} + +/* + * Determine approximately how large a zfs send stream will be -- the number + * of bytes that will be written to the fd supplied to zfs_ioc_send_new(). + * + * innvl: { + * (optional) "fromsnap" -> full snap name to send an incremental from + * } + * + * outnvl: { + * "space" -> bytes of space (uint64) + * } + */ +static int +zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) +{ + dsl_pool_t *dp; + dsl_dataset_t *fromsnap = NULL; + dsl_dataset_t *tosnap; + int error; + char *fromname; + uint64_t space; + + error = dsl_pool_hold(snapname, FTAG, &dp); + if (error != 0) + return (error); + + error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap); + if (error != 0) { + dsl_pool_rele(dp, FTAG); + return (error); + } + + error = nvlist_lookup_string(innvl, "fromsnap", &fromname); + if (error == 0) { + error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap); + if (error != 0) { + dsl_dataset_rele(tosnap, FTAG); + dsl_pool_rele(dp, FTAG); + return (error); + } + } + + error = dmu_send_estimate(tosnap, fromsnap, &space); + fnvlist_add_uint64(outnvl, "space", space); + + if (fromsnap != NULL) + dsl_dataset_rele(fromsnap, FTAG); + dsl_dataset_rele(tosnap, FTAG); + dsl_pool_rele(dp, FTAG); + return (error); +} + + +static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST]; + +static void +zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck, + boolean_t log_history, zfs_ioc_poolcheck_t pool_check) +{ + zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST]; + + ASSERT3U(ioc, >=, ZFS_IOC_FIRST); + ASSERT3U(ioc, <, ZFS_IOC_LAST); + ASSERT3P(vec->zvec_legacy_func, ==, NULL); + ASSERT3P(vec->zvec_func, ==, NULL); + + vec->zvec_legacy_func = func; + vec->zvec_secpolicy = secpolicy; + vec->zvec_namecheck = namecheck; + vec->zvec_allow_log = log_history; + vec->zvec_pool_check = pool_check; +} + +/* + * See the block comment at the beginning of this file for details on + * each argument to this function. + */ +static void +zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func, + zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck, + zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist, + boolean_t allow_log) +{ + zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST]; + + ASSERT3U(ioc, >=, ZFS_IOC_FIRST); + ASSERT3U(ioc, <, ZFS_IOC_LAST); + ASSERT3P(vec->zvec_legacy_func, ==, NULL); + ASSERT3P(vec->zvec_func, ==, NULL); + + /* if we are logging, the name must be valid */ + ASSERT(!allow_log || namecheck != NO_NAME); + + vec->zvec_name = name; + vec->zvec_func = func; + vec->zvec_secpolicy = secpolicy; + vec->zvec_namecheck = namecheck; + vec->zvec_pool_check = pool_check; + vec->zvec_smush_outnvlist = smush_outnvlist; + vec->zvec_allow_log = allow_log; +} + +static void +zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy, boolean_t log_history, + zfs_ioc_poolcheck_t pool_check) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + POOL_NAME, log_history, pool_check); +} + +static void +zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + DATASET_NAME, B_FALSE, pool_check); +} + +static void +zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func) +{ + zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config, + POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); +} + +static void +zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + NO_NAME, B_FALSE, POOL_CHECK_NONE); +} + +static void +zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc, + zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED); +} + +static void +zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func) +{ + zfs_ioctl_register_dataset_read_secpolicy(ioc, func, + zfs_secpolicy_read); +} + +static void +zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); +} + +static void +zfs_ioctl_init(void) +{ + zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT, + zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY, + zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE); + + zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS, + zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); + + zfs_ioctl_register("send", ZFS_IOC_SEND_NEW, + zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); + + zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE, + zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); + + zfs_ioctl_register("create", ZFS_IOC_CREATE, + zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + zfs_ioctl_register("clone", ZFS_IOC_CLONE, + zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS, + zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + zfs_ioctl_register("hold", ZFS_IOC_HOLD, + zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + zfs_ioctl_register("release", ZFS_IOC_RELEASE, + zfs_ioc_release, zfs_secpolicy_release, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS, + zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); + + /* IOCTLS that use the legacy function signature */ + + zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, + zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY); + + zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create, + zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE); + zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN, + zfs_ioc_pool_scan); + zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE, + zfs_ioc_pool_upgrade); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD, + zfs_ioc_vdev_add); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE, + zfs_ioc_vdev_remove); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE, + zfs_ioc_vdev_set_state); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH, + zfs_ioc_vdev_attach); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH, + zfs_ioc_vdev_detach); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH, + zfs_ioc_vdev_setpath); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU, + zfs_ioc_vdev_setfru); + zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS, + zfs_ioc_pool_set_props); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT, + zfs_ioc_vdev_split); + zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID, + zfs_ioc_pool_reguid); + + zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS, + zfs_ioc_pool_configs, zfs_secpolicy_none); + zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT, + zfs_ioc_pool_tryimport, zfs_secpolicy_config); + zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT, + zfs_ioc_inject_fault, zfs_secpolicy_inject); + zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT, + zfs_ioc_clear_fault, zfs_secpolicy_inject); + zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT, + zfs_ioc_inject_list_next, zfs_secpolicy_inject); + + /* + * pool destroy, and export don't log the history as part of + * zfsdev_ioctl, but rather zfs_ioc_pool_export + * does the logging of those commands. + */ + zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy, + zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE); + zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export, + zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE); + + zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats, + zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE); + zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props, + zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE); + + zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log, + zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED); + zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME, + zfs_ioc_dsobj_to_dsname, + zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED); + zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY, + zfs_ioc_pool_get_history, + zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED); + + zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import, + zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE); + + zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear, + zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED); + zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen, + zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED); + + zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN, + zfs_ioc_space_written); + zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS, + zfs_ioc_objset_recvd_props); + zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ, + zfs_ioc_next_obj); + zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL, + zfs_ioc_get_fsacl); + zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS, + zfs_ioc_objset_stats); + zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS, + zfs_ioc_objset_zplprops); + zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT, + zfs_ioc_dataset_list_next); + zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT, + zfs_ioc_snapshot_list_next); + zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS, + zfs_ioc_send_progress); + + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF, + zfs_ioc_diff, zfs_secpolicy_diff); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS, + zfs_ioc_obj_to_stats, zfs_secpolicy_diff); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH, + zfs_ioc_obj_to_path, zfs_secpolicy_diff); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE, + zfs_ioc_userspace_one, zfs_secpolicy_userspace_one); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY, + zfs_ioc_userspace_many, zfs_secpolicy_userspace_many); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND, + zfs_ioc_send, zfs_secpolicy_send); + + zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop, + zfs_secpolicy_none); + zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy, + zfs_secpolicy_destroy); + zfs_ioctl_register_dataset_modify(ZFS_IOC_ROLLBACK, zfs_ioc_rollback, + zfs_secpolicy_rollback); + zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename, + zfs_secpolicy_rename); + zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv, + zfs_secpolicy_recv); + zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote, + zfs_secpolicy_promote); + zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP, + zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop); + zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl, + zfs_secpolicy_set_fsacl); + + zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share, + zfs_secpolicy_share, POOL_CHECK_NONE); + zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl, + zfs_secpolicy_smb_acl, POOL_CHECK_NONE); + zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE, + zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); + zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT, + zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); +} int pool_status_check(const char *name, zfs_ioc_namecheck_t type, @@ -5204,9 +5592,9 @@ pool_status_check(const char *name, zfs_ioc_namecheck_t type, error = spa_open(name, &spa, FTAG); if (error == 0) { if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa)) - error = EAGAIN; + error = SET_ERROR(EAGAIN); else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa)) - error = EROFS; + error = SET_ERROR(EROFS); spa_close(spa, FTAG); } return (error); @@ -5245,10 +5633,10 @@ zfs_ctldev_init(struct cdev *devp) minor = zfsdev_minor_alloc(); if (minor == 0) - return (ENXIO); + return (SET_ERROR(ENXIO)); if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) - return (EAGAIN); + return (SET_ERROR(EAGAIN)); devfs_set_cdevpriv((void *)(uintptr_t)minor, zfsdev_close); @@ -5320,36 +5708,66 @@ zfsdev_close(void *data) } static int -zfsdev_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, +zfsdev_ioctl(struct cdev *dev, u_long zcmd, caddr_t arg, int flag, struct thread *td) { zfs_cmd_t *zc; - uint_t vec; - int cflag, error, len; + uint_t vecnum; + int error, rc, len; +#ifdef illumos + minor_t minor = getminor(dev); +#else + zfs_iocparm_t *zc_iocparm; + int cflag, cmd, oldvecnum; + boolean_t newioc, compat; + cred_t *cr = td->td_ucred; +#endif + const zfs_ioc_vec_t *vec; + char *saved_poolname = NULL; + nvlist_t *innvl = NULL; cflag = ZFS_CMD_COMPAT_NONE; - len = IOCPARM_LEN(cmd); + compat = B_FALSE; + newioc = B_TRUE; + + len = IOCPARM_LEN(zcmd); + cmd = zcmd & 0xff; /* * Check if we are talking to supported older binaries * and translate zfs_cmd if necessary */ - if (len < sizeof(zfs_cmd_t)) - if (len == sizeof(zfs_cmd_v28_t)) { + if (len != sizeof(zfs_iocparm_t)) { + newioc = B_FALSE; + if (len == sizeof(zfs_cmd_t)) { + cflag = ZFS_CMD_COMPAT_LZC; + vecnum = cmd; + } else if (len == sizeof(zfs_cmd_deadman_t)) { + cflag = ZFS_CMD_COMPAT_DEADMAN; + compat = B_TRUE; + vecnum = cmd; + } else if (len == sizeof(zfs_cmd_v28_t)) { cflag = ZFS_CMD_COMPAT_V28; - vec = ZFS_IOC(cmd); + compat = B_TRUE; + vecnum = cmd; } else if (len == sizeof(zfs_cmd_v15_t)) { cflag = ZFS_CMD_COMPAT_V15; - vec = zfs_ioctl_v15_to_v28[ZFS_IOC(cmd)]; + compat = B_TRUE; + vecnum = zfs_ioctl_v15_to_v28[cmd]; } else return (EINVAL); - else - vec = ZFS_IOC(cmd); + } else + vecnum = cmd; - if (cflag != ZFS_CMD_COMPAT_NONE) { - if (vec == ZFS_IOC_COMPAT_PASS) +#ifdef illumos + vecnum = cmd - ZFS_IOC_FIRST; + ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip)); +#endif + + if (compat) { + if (vecnum == ZFS_IOC_COMPAT_PASS) return (0); - else if (vec == ZFS_IOC_COMPAT_FAIL) + else if (vecnum == ZFS_IOC_COMPAT_FAIL) return (ENOTSUP); } @@ -5358,65 +5776,203 @@ zfsdev_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, * for the zfs_cmd_t request. Bail out if not so we * will not access undefined memory region. */ - if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) - return (EINVAL); + if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) + return (SET_ERROR(EINVAL)); + vec = &zfs_ioc_vec[vecnum]; + +#ifdef illumos + zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP); + bzero(zc, sizeof(zfs_cmd_t)); - if (cflag != ZFS_CMD_COMPAT_NONE) { + error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag); + if (error != 0) { + error = SET_ERROR(EFAULT); + goto out; + } +#else /* !illumos */ + /* + * We don't alloc/free zc only if talking to library ioctl version 2 + */ + if (cflag != ZFS_CMD_COMPAT_LZC) { zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP); bzero(zc, sizeof(zfs_cmd_t)); - zfs_cmd_compat_get(zc, addr, cflag); - zfs_ioctl_compat_pre(zc, &vec, cflag); } else { - zc = (void *)addr; + zc = (void *)arg; + error = 0; } - error = zfs_ioc_vec[vec].zvec_secpolicy(zc, td->td_ucred); + if (newioc) { + zc_iocparm = (void *)arg; + if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_t)) { + error = SET_ERROR(EFAULT); + goto out; + } + error = ddi_copyin((void *)(uintptr_t)zc_iocparm->zfs_cmd, zc, + sizeof(zfs_cmd_t), flag); + if (error != 0) { + error = SET_ERROR(EFAULT); + goto out; + } + } + + if (compat) { + zfs_cmd_compat_get(zc, arg, cflag); + oldvecnum = vecnum; + error = zfs_ioctl_compat_pre(zc, &vecnum, cflag); + if (error != 0) + goto out; + if (oldvecnum != vecnum) + vec = &zfs_ioc_vec[vecnum]; + } +#endif /* !illumos */ + + zc->zc_iflags = flag & FKIOCTL; + if (zc->zc_nvlist_src_size != 0) { + error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &innvl); + if (error != 0) + goto out; + } + + /* rewrite innvl for backwards compatibility */ + if (compat) + innvl = zfs_ioctl_compat_innvl(zc, innvl, vecnum, cflag); /* * Ensure that all pool/dataset names are valid before we pass down to * the lower layers. */ - if (error == 0) { - zc->zc_name[sizeof (zc->zc_name) - 1] = '\0'; - zc->zc_iflags = flag & FKIOCTL; - switch (zfs_ioc_vec[vec].zvec_namecheck) { - case POOL_NAME: - if (pool_namecheck(zc->zc_name, NULL, NULL) != 0) - error = EINVAL; - else - error = pool_status_check(zc->zc_name, - zfs_ioc_vec[vec].zvec_namecheck, - zfs_ioc_vec[vec].zvec_pool_check); - break; + zc->zc_name[sizeof (zc->zc_name) - 1] = '\0'; + switch (vec->zvec_namecheck) { + case POOL_NAME: + if (pool_namecheck(zc->zc_name, NULL, NULL) != 0) + error = SET_ERROR(EINVAL); + else + error = pool_status_check(zc->zc_name, + vec->zvec_namecheck, vec->zvec_pool_check); + break; - case DATASET_NAME: - if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0) - error = EINVAL; - else - error = pool_status_check(zc->zc_name, - zfs_ioc_vec[vec].zvec_namecheck, - zfs_ioc_vec[vec].zvec_pool_check); - break; + case DATASET_NAME: + if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0) + error = SET_ERROR(EINVAL); + else + error = pool_status_check(zc->zc_name, + vec->zvec_namecheck, vec->zvec_pool_check); + break; - case NO_NAME: - break; + case NO_NAME: + break; + } + + if (error == 0 && !(flag & FKIOCTL)) + error = vec->zvec_secpolicy(zc, innvl, cr); + + if (error != 0) + goto out; + + /* legacy ioctls can modify zc_name */ + len = strcspn(zc->zc_name, "/@") + 1; + saved_poolname = kmem_alloc(len, KM_SLEEP); + (void) strlcpy(saved_poolname, zc->zc_name, len); + + if (vec->zvec_func != NULL) { + nvlist_t *outnvl; + int puterror = 0; + spa_t *spa; + nvlist_t *lognv = NULL; + + ASSERT(vec->zvec_legacy_func == NULL); + + /* + * Add the innvl to the lognv before calling the func, + * in case the func changes the innvl. + */ + if (vec->zvec_allow_log) { + lognv = fnvlist_alloc(); + fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL, + vec->zvec_name); + if (!nvlist_empty(innvl)) { + fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL, + innvl); + } + } + + outnvl = fnvlist_alloc(); + error = vec->zvec_func(zc->zc_name, innvl, outnvl); + + if (error == 0 && vec->zvec_allow_log && + spa_open(zc->zc_name, &spa, FTAG) == 0) { + if (!nvlist_empty(outnvl)) { + fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL, + outnvl); + } + (void) spa_history_log_nvl(spa, lognv); + spa_close(spa, FTAG); + } + fnvlist_free(lognv); + + /* rewrite outnvl for backwards compatibility */ + if (cflag != ZFS_CMD_COMPAT_NONE && cflag != ZFS_CMD_COMPAT_LZC) + outnvl = zfs_ioctl_compat_outnvl(zc, outnvl, vecnum, + cflag); + + if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) { + int smusherror = 0; + if (vec->zvec_smush_outnvlist) { + smusherror = nvlist_smush(outnvl, + zc->zc_nvlist_dst_size); + } + if (smusherror == 0) + puterror = put_nvlist(zc, outnvl); } + + if (puterror != 0) + error = puterror; + + nvlist_free(outnvl); + } else { + error = vec->zvec_legacy_func(zc); } - if (error == 0) - error = zfs_ioc_vec[vec].zvec_func(zc); +out: + nvlist_free(innvl); - if (error == 0) { - if (zfs_ioc_vec[vec].zvec_his_log) - zfs_log_history(zc); + if (compat) { + zfs_ioctl_compat_post(zc, cmd, cflag); + zfs_cmd_compat_put(zc, arg, vecnum, cflag); } - if (cflag != ZFS_CMD_COMPAT_NONE) { - zfs_ioctl_compat_post(zc, ZFS_IOC(cmd), cflag); - zfs_cmd_compat_put(zc, addr, cflag); - kmem_free(zc, sizeof(zfs_cmd_t)); +#ifdef illumos + rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag); + if (error == 0 && rc != 0) + error = SET_ERROR(EFAULT); +#else + if (newioc) { + rc = ddi_copyout(zc, (void *)(uintptr_t)zc_iocparm->zfs_cmd, + sizeof (zfs_cmd_t), flag); + if (error == 0 && rc != 0) + error = SET_ERROR(EFAULT); + } +#endif + if (error == 0 && vec->zvec_allow_log) { + char *s = tsd_get(zfs_allow_log_key); + if (s != NULL) + strfree(s); + (void) tsd_set(zfs_allow_log_key, saved_poolname); + } else { + if (saved_poolname != NULL) + strfree(saved_poolname); } +#ifdef illumos + kmem_free(zc, sizeof (zfs_cmd_t)); +#else + /* + * We don't alloc/free zc only if talking to library ioctl version 2 + */ + if (cflag != ZFS_CMD_COMPAT_LZC) + kmem_free(zc, sizeof (zfs_cmd_t)); +#endif return (error); } @@ -5540,6 +6096,13 @@ static struct cdevsw zfs_cdevsw = { .d_name = ZFS_DEV_NAME }; +static void +zfs_allow_log_destroy(void *arg) +{ + char *poolname = arg; + strfree(poolname); +} + static void zfsdev_init(void) { @@ -5557,9 +6120,6 @@ zfsdev_fini(void) static struct root_hold_token *zfs_root_token; struct proc *zfsproc; -uint_t zfs_fsyncer_key; -extern uint_t rrw_tsd_key; - #ifdef sun int _init(void) @@ -5569,6 +6129,7 @@ _init(void) spa_init(FREAD | FWRITE); zfs_init(); zvol_init(); + zfs_ioctl_init(); if ((error = mod_install(&modlinkage)) != 0) { zvol_fini(); @@ -5578,7 +6139,8 @@ _init(void) } tsd_create(&zfs_fsyncer_key, NULL); - tsd_create(&rrw_tsd_key, NULL); + tsd_create(&rrw_tsd_key, rrw_tsd_destroy); + tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy); error = ldi_ident_from_mod(&modlinkage, &zfs_li); ASSERT(error == 0); @@ -5593,7 +6155,7 @@ _fini(void) int error; if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled) - return (EBUSY); + return (SET_ERROR(EBUSY)); if ((error = mod_remove(&modlinkage)) != 0) return (error); @@ -5637,9 +6199,11 @@ zfs_modevent(module_t mod, int type, void *unused __unused) spa_init(FREAD | FWRITE); zfs_init(); zvol_init(); + zfs_ioctl_init(); tsd_create(&zfs_fsyncer_key, NULL); - tsd_create(&rrw_tsd_key, NULL); + tsd_create(&rrw_tsd_key, rrw_tsd_destroy); + tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy); printf("ZFS storage pool version: features support (" SPA_VERSION_STRING ")\n"); root_mount_rel(zfs_root_token); @@ -5660,6 +6224,7 @@ zfs_modevent(module_t mod, int type, void *unused __unused) tsd_destroy(&zfs_fsyncer_key); tsd_destroy(&rrw_tsd_key); + tsd_destroy(&zfs_allow_log_key); mutex_destroy(&zfs_share_lock); break; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c index ca0acfd32..1acdb8d40 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -105,7 +106,7 @@ zfs_onexit_minor_to_state(minor_t minor, zfs_onexit_t **zo) { *zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV); if (*zo == NULL) - return (EBADF); + return (SET_ERROR(EBADF)); return (0); } @@ -126,7 +127,7 @@ zfs_onexit_fd_hold(int fd, minor_t *minorp) fp = getf(fd); if (fp == NULL) - return (EBADF); + return (SET_ERROR(EBADF)); tmpfp = curthread->td_fpop; curthread->td_fpop = fp; @@ -216,7 +217,7 @@ zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire) kmem_free(ap, sizeof (zfs_onexit_action_node_t)); } else { mutex_exit(&zo->zo_lock); - error = ENOENT; + error = SET_ERROR(ENOENT); } return (error); @@ -245,7 +246,7 @@ zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data) if (ap != NULL) *data = ap->za_data; else - error = ENOENT; + error = SET_ERROR(ENOENT); mutex_exit(&zo->zo_lock); return (error); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c index ebea17a3a..a25f2943a 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -73,7 +74,7 @@ zfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode, static int zfs_replay_error(zfsvfs_t *zfsvfs, lr_t *lr, boolean_t byteswap) { - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } static void @@ -396,7 +397,7 @@ zfs_replay_create_acl(zfsvfs_t *zfsvfs, #endif break; default: - error = ENOTSUP; + error = SET_ERROR(ENOTSUP); } bail: @@ -528,7 +529,7 @@ zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap) error = VOP_SYMLINK(ZTOV(dzp), &vp, &cn, &xva.xva_vattr, link /*,vflg*/); break; default: - error = ENOTSUP; + error = SET_ERROR(ENOTSUP); } VOP_UNLOCK(ZTOV(dzp), 0); @@ -584,7 +585,7 @@ zfs_replay_remove(zfsvfs_t *zfsvfs, lr_remove_t *lr, boolean_t byteswap) error = VOP_RMDIR(ZTOV(dzp), vp, &cn /*,vflg*/); break; default: - error = ENOTSUP; + error = SET_ERROR(ENOTSUP); } vput(vp); VOP_UNLOCK(ZTOV(dzp), 0); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c index 74a505ecc..bd17abf3b 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c @@ -22,6 +22,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Pawel Jakub Dawidek . * All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -395,8 +396,10 @@ zfs_register_callbacks(vfs_t *vfsp) boolean_t do_setuid = B_FALSE; boolean_t exec = B_FALSE; boolean_t do_exec = B_FALSE; +#ifdef illumos boolean_t devices = B_FALSE; boolean_t do_devices = B_FALSE; +#endif boolean_t xattr = B_FALSE; boolean_t do_xattr = B_FALSE; boolean_t atime = B_FALSE; @@ -492,25 +495,33 @@ zfs_register_callbacks(vfs_t *vfsp) * overboard... */ ds = dmu_objset_ds(os); - error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); + dsl_pool_config_enter(dmu_objset_pool(os), FTAG); + error = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs); + error = error ? error : dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, - "xattr", xattr_changed_cb, zfsvfs); + zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, - "recordsize", blksz_changed_cb, zfsvfs); + zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs); +#ifdef illumos error = error ? error : dsl_prop_register(ds, - "readonly", readonly_changed_cb, zfsvfs); + zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs); +#endif error = error ? error : dsl_prop_register(ds, - "setuid", setuid_changed_cb, zfsvfs); + zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, - "exec", exec_changed_cb, zfsvfs); + zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, - "snapdir", snapdir_changed_cb, zfsvfs); + zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, - "aclmode", acl_mode_changed_cb, zfsvfs); + zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs); error = error ? error : dsl_prop_register(ds, - "aclinherit", acl_inherit_changed_cb, zfsvfs); + zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, + zfsvfs); error = error ? error : dsl_prop_register(ds, - "vscan", vscan_changed_cb, zfsvfs); + zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs); + dsl_pool_config_exit(dmu_objset_pool(os), FTAG); if (error) goto unregister; @@ -538,32 +549,42 @@ unregister: * registered, but this is OK; it will simply return ENOMSG, * which we will ignore. */ - (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); - (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); - (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); - (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); - (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); - (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); - (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); - (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); - (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, - zfsvfs); - (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ATIME), + atime_changed_cb, zfsvfs); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_XATTR), + xattr_changed_cb, zfsvfs); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE), + blksz_changed_cb, zfsvfs); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_READONLY), + readonly_changed_cb, zfsvfs); +#ifdef illumos + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DEVICES), + devices_changed_cb, zfsvfs); +#endif + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SETUID), + setuid_changed_cb, zfsvfs); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_EXEC), + exec_changed_cb, zfsvfs); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SNAPDIR), + snapdir_changed_cb, zfsvfs); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLMODE), + acl_mode_changed_cb, zfsvfs); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT), + acl_inherit_changed_cb, zfsvfs); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_VSCAN), + vscan_changed_cb, zfsvfs); return (error); - } static int zfs_space_delta_cb(dmu_object_type_t bonustype, void *data, uint64_t *userp, uint64_t *groupp) { - int error = 0; - /* * Is it a valid type of object to track? */ if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA) - return (ENOENT); + return (SET_ERROR(ENOENT)); /* * If we have a NULL data pointer @@ -572,7 +593,7 @@ zfs_space_delta_cb(dmu_object_type_t bonustype, void *data, * use the same ids */ if (data == NULL) - return (EEXIST); + return (SET_ERROR(EEXIST)); if (bonustype == DMU_OT_ZNODE) { znode_phys_t *znp = data; @@ -615,7 +636,7 @@ zfs_space_delta_cb(dmu_object_type_t bonustype, void *data, *groupp = BSWAP_64(*groupp); } } - return (error); + return (0); } static void @@ -662,7 +683,7 @@ zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, uint64_t obj; if (!dmu_objset_userspace_present(zfsvfs->z_os)) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); obj = zfs_userquota_prop_to_obj(zfsvfs, type); if (obj == 0) { @@ -706,7 +727,7 @@ id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid, if (domain && domain[0]) { domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok); if (domainid == -1) - return (ENOENT); + return (SET_ERROR(ENOENT)); } fuid = FUID_ENCODE(domainid, rid); (void) sprintf(buf, "%llx", (longlong_t)fuid); @@ -724,7 +745,7 @@ zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, *valp = 0; if (!dmu_objset_userspace_present(zfsvfs->z_os)) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); obj = zfs_userquota_prop_to_obj(zfsvfs, type); if (obj == 0) @@ -751,10 +772,10 @@ zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, boolean_t fuid_dirtied; if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA) - return (EINVAL); + return (SET_ERROR(EINVAL)); if (zfsvfs->z_version < ZPL_VERSION_USERSPACE) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj : &zfsvfs->z_groupquota_obj; @@ -882,7 +903,7 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp) "on a version %lld pool\n. Pool must be upgraded to mount " "this file system.", (u_longlong_t)zfsvfs->z_version, (u_longlong_t)spa_version(dmu_objset_spa(os))); - error = ENOTSUP; + error = SET_ERROR(ENOTSUP); goto out; } if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0) @@ -967,7 +988,7 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp) mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); - rrw_init(&zfsvfs->z_teardown_lock); + rrw_init(&zfsvfs->z_teardown_lock, B_FALSE); rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) @@ -1131,6 +1152,18 @@ zfs_domount(vfs_t *vfsp, char *osname) return (error); zfsvfs->z_vfs = vfsp; +#ifdef illumos + /* Initialize the generic filesystem structure. */ + vfsp->vfs_bcount = 0; + vfsp->vfs_data = NULL; + + if (zfs_create_unique_device(&mount_dev) == -1) { + error = SET_ERROR(ENODEV); + goto out; + } + ASSERT(vfs_devismounted(mount_dev) == 0); +#endif + if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, NULL)) goto out; @@ -1262,7 +1295,7 @@ str_to_uint64(char *str, uint64_t *objnum) while (*str) { if (*str < '0' || *str > '9') - return (EINVAL); + return (SET_ERROR(EINVAL)); num = num*10 + *str++ - '0'; } @@ -1284,7 +1317,7 @@ zfs_parse_bootfs(char *bpath, char *outpath) int error; if (*bpath == 0 || *bpath == '/') - return (EINVAL); + return (SET_ERROR(EINVAL)); (void) strcpy(outpath, bpath); @@ -1329,10 +1362,10 @@ zfs_check_global_label(const char *dsname, const char *hexsl) if (dsl_prop_get_integer(dsname, zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL)) - return (EACCES); + return (SET_ERROR(EACCES)); return (rdonly ? 0 : EACCES); } - return (EACCES); + return (SET_ERROR(EACCES)); } /* @@ -1364,7 +1397,7 @@ zfs_mount_label_policy(vfs_t *vfsp, char *osname) error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL), 1, sizeof (ds_hexsl), &ds_hexsl, NULL); if (error) - return (EACCES); + return (SET_ERROR(EACCES)); /* * If labeling is NOT enabled, then disallow the mount of datasets @@ -1374,7 +1407,7 @@ zfs_mount_label_policy(vfs_t *vfsp, char *osname) if (!is_system_labeled()) { if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) return (0); - return (EACCES); + return (SET_ERROR(EACCES)); } /* @@ -1391,7 +1424,7 @@ zfs_mount_label_policy(vfs_t *vfsp, char *osname) if (dsl_prop_get_integer(osname, zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL)) - return (EACCES); + return (SET_ERROR(EACCES)); if (!zoned) return (zfs_check_global_label(osname, ds_hexsl)); else @@ -1415,8 +1448,9 @@ zfs_mount_label_policy(vfs_t *vfsp, char *osname) char *str = NULL; if (l_to_str_internal(mnt_sl, &str) == 0 && - dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL), - ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0) + dsl_prop_set_string(osname, + zfs_prop_to_name(ZFS_PROP_MLSLABEL), + ZPROP_SRC_LOCAL, str) == 0) retv = 0; if (str != NULL) kmem_free(str, strlen(str) + 1); @@ -1462,7 +1496,7 @@ zfs_mountroot(vfs_t *vfsp, enum whymountroot why) */ if (why == ROOT_INIT) { if (zfsrootdone++) - return (EBUSY); + return (SET_ERROR(EBUSY)); /* * the process of doing a spa_load will require the * clock to be set before we could (for example) do @@ -1474,7 +1508,7 @@ zfs_mountroot(vfs_t *vfsp, enum whymountroot why) if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) { cmn_err(CE_NOTE, "spa_get_bootfs: can not get " "bootfs name"); - return (EINVAL); + return (SET_ERROR(EINVAL)); } zfs_devid = spa_get_bootprop("diskdevid"); error = spa_import_rootpool(rootfs.bo_name, zfs_devid); @@ -1543,7 +1577,7 @@ out: * if "why" is equal to anything else other than ROOT_INIT, * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. */ - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } #endif /* OPENSOLARIS_MOUNTROOT */ @@ -1577,11 +1611,33 @@ zfs_mount(vfs_t *vfsp) int error = 0; int canwrite; +#ifdef illumos + if (mvp->v_type != VDIR) + return (SET_ERROR(ENOTDIR)); + + mutex_enter(&mvp->v_lock); + if ((uap->flags & MS_REMOUNT) == 0 && + (uap->flags & MS_OVERLAY) == 0 && + (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { + mutex_exit(&mvp->v_lock); + return (SET_ERROR(EBUSY)); + } + mutex_exit(&mvp->v_lock); + + /* + * ZFS does not support passing unparsed data in via MS_DATA. + * Users should use the MS_OPTIONSTR interface; this means + * that all option parsing is already done and the options struct + * can be interrogated. + */ + if ((uap->flags & MS_DATA) && uap->datalen > 0) +#else if (!prison_allow(td->td_ucred, PR_ALLOW_MOUNT_ZFS)) - return (EPERM); + return (SET_ERROR(EPERM)); if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) - return (EINVAL); + return (SET_ERROR(EINVAL)); +#endif /* ! illumos */ /* * If full-owner-access is enabled and delegated administration is @@ -1636,7 +1692,7 @@ zfs_mount(vfs_t *vfsp) */ if (!INGLOBALZONE(curthread) && (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { - error = EPERM; + error = SET_ERROR(EPERM); goto out; } @@ -1827,7 +1883,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { rw_exit(&zfsvfs->z_teardown_inactive_lock); rrw_exit(&zfsvfs->z_teardown_lock, FTAG); - return (EIO); + return (SET_ERROR(EIO)); } /* @@ -1876,7 +1932,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) if (dsl_dataset_is_dirty(dmu_objset_ds(zfsvfs->z_os)) && !(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)) txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); - (void) dmu_objset_evict_dbufs(zfsvfs->z_os); + dmu_objset_evict_dbufs(zfsvfs->z_os); return (0); } @@ -1960,11 +2016,11 @@ zfs_umount(vfs_t *vfsp, int fflag) */ if (zfsvfs->z_ctldir == NULL) { if (vfsp->vfs_count > 1) - return (EBUSY); + return (SET_ERROR(EBUSY)); } else { if (vfsp->vfs_count > 2 || zfsvfs->z_ctldir->v_count > 1) - return (EBUSY); + return (SET_ERROR(EBUSY)); } } @@ -2093,7 +2149,7 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); if (err) - return (EINVAL); + return (SET_ERROR(EINVAL)); ZFS_ENTER(zfsvfs); } @@ -2107,7 +2163,7 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); } else { ZFS_EXIT(zfsvfs); - return (EINVAL); + return (SET_ERROR(EINVAL)); } /* @@ -2152,7 +2208,7 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); VN_RELE(ZTOV(zp)); ZFS_EXIT(zfsvfs); - return (EINVAL); + return (SET_ERROR(EINVAL)); } *vpp = ZTOV(zp); @@ -2363,14 +2419,14 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) dmu_tx_t *tx; if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) - return (EINVAL); + return (SET_ERROR(EINVAL)); if (newvers < zfsvfs->z_version) - return (EINVAL); + return (SET_ERROR(EINVAL)); if (zfs_spa_version_map(newvers) > spa_version(dmu_objset_spa(zfsvfs->z_os))) - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); tx = dmu_tx_create(os); dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); @@ -2409,9 +2465,8 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) sa_register_update_callback(os, zfs_sa_upgrade); } - spa_history_log_internal(LOG_DS_UPGRADE, - dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu", - zfsvfs->z_version, newvers, dmu_objset_id(os)); + spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx, + "from %llu to %llu", zfsvfs->z_version, newvers); dmu_tx_commit(tx); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index 7f75ce308..b57f1619c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ /* Portions Copyright 2007 Jeremy Teo */ @@ -177,7 +177,7 @@ zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && ((flag & FAPPEND) == 0)) { ZFS_EXIT(zfsvfs); - return (EPERM); + return (SET_ERROR(EPERM)); } if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && @@ -185,7 +185,7 @@ zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { if (fs_vscan(*vpp, cr, 0) != 0) { ZFS_EXIT(zfsvfs); - return (EACCES); + return (SET_ERROR(EACCES)); } } @@ -242,7 +242,7 @@ zfs_holey(vnode_t *vp, u_long cmd, offset_t *off) file_sz = zp->z_size; if (noff >= file_sz) { - return (ENXIO); + return (SET_ERROR(ENXIO)); } if (cmd == _FIO_SEEK_HOLE) @@ -261,7 +261,7 @@ zfs_holey(vnode_t *vp, u_long cmd, offset_t *off) *off = file_sz; return (0); } - return (ENXIO); + return (SET_ERROR(ENXIO)); } if (noff < *off) @@ -296,7 +296,7 @@ zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, case _FIO_SEEK_HOLE: #ifdef sun if (ddi_copyin((void *)data, &off, sizeof (off), flag)) - return (EFAULT); + return (SET_ERROR(EFAULT)); #else off = *(offset_t *)data; #endif @@ -312,13 +312,13 @@ zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, return (error); #ifdef sun if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) - return (EFAULT); + return (SET_ERROR(EFAULT)); #else *(offset_t *)data = off; #endif return (0); } - return (ENOTTY); + return (SET_ERROR(ENOTTY)); } static vm_page_t @@ -653,7 +653,7 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) if (zp->z_pflags & ZFS_AV_QUARANTINED) { ZFS_EXIT(zfsvfs); - return (EACCES); + return (SET_ERROR(EACCES)); } /* @@ -661,7 +661,7 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) */ if (uio->uio_loffset < (offset_t)0) { ZFS_EXIT(zfsvfs); - return (EINVAL); + return (SET_ERROR(EINVAL)); } /* @@ -754,7 +754,7 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) if (error) { /* convert checksum errors into IO errors */ if (error == ECKSUM) - error = EIO; + error = SET_ERROR(EIO); break; } @@ -842,7 +842,7 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && (uio->uio_loffset < zp->z_size))) { ZFS_EXIT(zfsvfs); - return (EPERM); + return (SET_ERROR(EPERM)); } zilog = zfsvfs->z_log; @@ -853,7 +853,7 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; if (woff < 0) { ZFS_EXIT(zfsvfs); - return (EINVAL); + return (SET_ERROR(EINVAL)); } /* @@ -916,7 +916,7 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) if (woff >= limit) { zfs_range_unlock(rl); ZFS_EXIT(zfsvfs); - return (EFBIG); + return (SET_ERROR(EFBIG)); } if ((woff + n) > limit || woff > (limit - n)) @@ -940,7 +940,7 @@ again: zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { if (abuf != NULL) dmu_return_arcbuf(abuf); - error = EDQUOT; + error = SET_ERROR(EDQUOT); break; } @@ -1208,7 +1208,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) * Nothing to do if the file has been removed */ if (zfs_zget(zfsvfs, object, &zp) != 0) - return (ENOENT); + return (SET_ERROR(ENOENT)); if (zp->z_unlinked) { /* * Release the vnode asynchronously as we currently have the @@ -1216,7 +1216,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) */ VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); - return (ENOENT); + return (SET_ERROR(ENOENT)); } zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); @@ -1234,7 +1234,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); /* test for truncation needs to be done while range locked */ if (offset >= zp->z_size) { - error = ENOENT; + error = SET_ERROR(ENOENT); } else { error = dmu_read(os, object, offset, size, buf, DMU_READ_NO_PREFETCH); @@ -1261,10 +1261,10 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) } /* test for truncation needs to be done while range locked */ if (lr->lr_offset >= zp->z_size) - error = ENOENT; + error = SET_ERROR(ENOENT); #ifdef DEBUG if (zil_fault_io) { - error = EIO; + error = SET_ERROR(EIO); zil_fault_io = 0; } #endif @@ -1345,7 +1345,7 @@ specvp_check(vnode_t **vpp, cred_t *cr) svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); VN_RELE(*vpp); if (svp == NULL) - error = ENOSYS; + error = SET_ERROR(ENOSYS); *vpp = svp; } return (error); @@ -1389,9 +1389,9 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { if (dvp->v_type != VDIR) { - return (ENOTDIR); + return (SET_ERROR(ENOTDIR)); } else if (zdp->z_sa_hdl == NULL) { - return (EIO); + return (SET_ERROR(EIO)); } if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { @@ -1413,7 +1413,7 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, } if (tvp == DNLC_NO_VNODE) { VN_RELE(tvp); - return (ENOENT); + return (SET_ERROR(ENOENT)); } else { *vpp = tvp; return (specvp_check(vpp, cr)); @@ -1436,7 +1436,7 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, */ if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { ZFS_EXIT(zfsvfs); - return (EINVAL); + return (SET_ERROR(EINVAL)); } #endif @@ -1446,7 +1446,7 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, */ if (zdp->z_pflags & ZFS_XATTR) { ZFS_EXIT(zfsvfs); - return (EINVAL); + return (SET_ERROR(EINVAL)); } if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { @@ -1470,7 +1470,7 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, if (dvp->v_type != VDIR) { ZFS_EXIT(zfsvfs); - return (ENOTDIR); + return (SET_ERROR(ENOTDIR)); } /* @@ -1485,7 +1485,7 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zfsvfs); - return (EILSEQ); + return (SET_ERROR(EILSEQ)); } error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); @@ -1609,7 +1609,7 @@ zfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, if (zfsvfs->z_use_fuids == B_FALSE && (vsecp || (vap->va_mask & AT_XVATTR) || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) - return (EINVAL); + return (SET_ERROR(EINVAL)); ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(dzp); @@ -1619,7 +1619,7 @@ zfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode, if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zfsvfs); - return (EILSEQ); + return (SET_ERROR(EILSEQ)); } if (vap->va_mask & AT_XVATTR) { @@ -1656,7 +1656,7 @@ top: if (have_acl) zfs_acl_ids_free(&acl_ids); if (strcmp(name, "..") == 0) - error = EISDIR; + error = SET_ERROR(EISDIR); ZFS_EXIT(zfsvfs); return (error); } @@ -1684,7 +1684,7 @@ top: (vap->va_type != VREG)) { if (have_acl) zfs_acl_ids_free(&acl_ids); - error = EINVAL; + error = SET_ERROR(EINVAL); goto out; } @@ -1695,7 +1695,7 @@ top: if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { zfs_acl_ids_free(&acl_ids); - error = EDQUOT; + error = SET_ERROR(EDQUOT); goto out; } @@ -1754,14 +1754,14 @@ top: * Can't truncate an existing file if in exclusive mode. */ if (excl == EXCL) { - error = EEXIST; + error = SET_ERROR(EEXIST); goto out; } /* * Can't open a directory for writing. */ if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { - error = EISDIR; + error = SET_ERROR(EISDIR); goto out; } /* @@ -1884,7 +1884,7 @@ top: * Need to use rmdir for removing directories. */ if (vp->v_type == VDIR) { - error = EPERM; + error = SET_ERROR(EPERM); goto out; } @@ -2099,7 +2099,7 @@ zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, if (zfsvfs->z_use_fuids == B_FALSE && (vsecp || (vap->va_mask & AT_XVATTR) || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) - return (EINVAL); + return (SET_ERROR(EINVAL)); ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(dzp); @@ -2107,13 +2107,13 @@ zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, if (dzp->z_pflags & ZFS_XATTR) { ZFS_EXIT(zfsvfs); - return (EINVAL); + return (SET_ERROR(EINVAL)); } if (zfsvfs->z_utf8 && u8_validate(dirname, strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zfsvfs); - return (EILSEQ); + return (SET_ERROR(EILSEQ)); } if (flags & FIGNORECASE) zf |= ZCILOOK; @@ -2159,7 +2159,7 @@ top: zfs_acl_ids_free(&acl_ids); zfs_dirent_unlock(dl); ZFS_EXIT(zfsvfs); - return (EDQUOT); + return (SET_ERROR(EDQUOT)); } /* @@ -2285,12 +2285,12 @@ top: } if (vp->v_type != VDIR) { - error = ENOTDIR; + error = SET_ERROR(ENOTDIR); goto out; } if (vp == cwd) { - error = EINVAL; + error = SET_ERROR(EINVAL); goto out; } @@ -2436,7 +2436,7 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_lon */ if (uio->uio_iov->iov_len <= 0) { ZFS_EXIT(zfsvfs); - return (EINVAL); + return (SET_ERROR(EINVAL)); } /* @@ -2550,7 +2550,7 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_lon "entry, obj = %lld, offset = %lld\n", (u_longlong_t)zp->z_id, (u_longlong_t)offset); - error = ENXIO; + error = SET_ERROR(ENXIO); goto update; } @@ -2599,7 +2599,7 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, u_lon * Did we manage to fit anything in the buffer? */ if (!outcount) { - error = EINVAL; + error = SET_ERROR(EINVAL); goto update; } break; @@ -2985,7 +2985,7 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, return (0); if (mask & AT_NOSET) - return (EINVAL); + return (SET_ERROR(EINVAL)); ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); @@ -3002,17 +3002,17 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || (mask & AT_XVATTR))) { ZFS_EXIT(zfsvfs); - return (EINVAL); + return (SET_ERROR(EINVAL)); } if (mask & AT_SIZE && vp->v_type == VDIR) { ZFS_EXIT(zfsvfs); - return (EISDIR); + return (SET_ERROR(EISDIR)); } if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { ZFS_EXIT(zfsvfs); - return (EINVAL); + return (SET_ERROR(EINVAL)); } /* @@ -3030,12 +3030,12 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { ZFS_EXIT(zfsvfs); - return (EPERM); + return (SET_ERROR(EPERM)); } if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { ZFS_EXIT(zfsvfs); - return (EPERM); + return (SET_ERROR(EPERM)); } /* @@ -3048,7 +3048,7 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { ZFS_EXIT(zfsvfs); - return (EOVERFLOW); + return (SET_ERROR(EOVERFLOW)); } } @@ -3059,7 +3059,7 @@ top: /* Can this be moved to before the top label? */ if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { ZFS_EXIT(zfsvfs); - return (EROFS); + return (SET_ERROR(EROFS)); } /* @@ -3217,7 +3217,7 @@ top: if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { mutex_exit(&zp->z_lock); ZFS_EXIT(zfsvfs); - return (EPERM); + return (SET_ERROR(EPERM)); } if (need_policy == FALSE && @@ -3304,7 +3304,7 @@ top: zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { if (attrzp) VN_RELE(ZTOV(attrzp)); - err = EDQUOT; + err = SET_ERROR(EDQUOT); goto out2; } } @@ -3316,7 +3316,7 @@ top: zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { if (attrzp) VN_RELE(ZTOV(attrzp)); - err = EDQUOT; + err = SET_ERROR(EDQUOT); goto out2; } } @@ -3330,7 +3330,7 @@ top: if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { - err = EPERM; + err = SET_ERROR(EPERM); goto out; } @@ -3658,7 +3658,7 @@ zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) *zlpp = zl; if (oidp == szp->z_id) /* We're a descendant of szp */ - return (EINVAL); + return (SET_ERROR(EINVAL)); if (oidp == rootid) /* We've hit the top */ return (0); @@ -3726,7 +3726,7 @@ zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, if (tdvp->v_vfsp != sdvp->v_vfsp || zfsctl_is_node(tdvp)) { ZFS_EXIT(zfsvfs); - return (EXDEV); + return (SET_ERROR(EXDEV)); } tdzp = VTOZ(tdvp); @@ -3734,7 +3734,7 @@ zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, if (zfsvfs->z_utf8 && u8_validate(tnm, strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zfsvfs); - return (EILSEQ); + return (SET_ERROR(EILSEQ)); } if (flags & FIGNORECASE) @@ -3752,7 +3752,7 @@ top: */ if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { ZFS_EXIT(zfsvfs); - return (EINVAL); + return (SET_ERROR(EINVAL)); } /* @@ -3857,7 +3857,7 @@ top: * not the case for FreeBSD, so we check for "." explicitly. */ if (strcmp(snm, ".") == 0 || strcmp(snm, "..") == 0) - serr = EINVAL; + serr = SET_ERROR(EINVAL); ZFS_EXIT(zfsvfs); return (serr); } @@ -3869,7 +3869,7 @@ top: rw_exit(&sdzp->z_name_lock); if (strcmp(tnm, "..") == 0) - terr = EINVAL; + terr = SET_ERROR(EINVAL); ZFS_EXIT(zfsvfs); return (terr); } @@ -3902,12 +3902,12 @@ top: */ if (ZTOV(szp)->v_type == VDIR) { if (ZTOV(tzp)->v_type != VDIR) { - error = ENOTDIR; + error = SET_ERROR(ENOTDIR); goto out; } } else { if (ZTOV(tzp)->v_type == VDIR) { - error = EISDIR; + error = SET_ERROR(EISDIR); goto out; } } @@ -4089,14 +4089,14 @@ zfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link, if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zfsvfs); - return (EILSEQ); + return (SET_ERROR(EILSEQ)); } if (flags & FIGNORECASE) zflg |= ZCILOOK; if (len > MAXPATHLEN) { ZFS_EXIT(zfsvfs); - return (ENAMETOOLONG); + return (SET_ERROR(ENAMETOOLONG)); } if ((error = zfs_acl_ids_create(dzp, 0, @@ -4126,7 +4126,7 @@ top: zfs_acl_ids_free(&acl_ids); zfs_dirent_unlock(dl); ZFS_EXIT(zfsvfs); - return (EDQUOT); + return (SET_ERROR(EDQUOT)); } tx = dmu_tx_create(zfsvfs->z_os); fuid_dirtied = zfsvfs->z_fuid_dirty; @@ -4288,12 +4288,12 @@ zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, */ if (svp->v_type == VDIR) { ZFS_EXIT(zfsvfs); - return (EPERM); + return (SET_ERROR(EPERM)); } if (svp->v_vfsp != tdvp->v_vfsp || zfsctl_is_node(svp)) { ZFS_EXIT(zfsvfs); - return (EXDEV); + return (SET_ERROR(EXDEV)); } szp = VTOZ(svp); @@ -4308,13 +4308,13 @@ zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, } if (parent == zfsvfs->z_shares_dir) { ZFS_EXIT(zfsvfs); - return (EPERM); + return (SET_ERROR(EPERM)); } if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zfsvfs); - return (EILSEQ); + return (SET_ERROR(EILSEQ)); } if (flags & FIGNORECASE) zf |= ZCILOOK; @@ -4327,14 +4327,14 @@ zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, */ if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { ZFS_EXIT(zfsvfs); - return (EINVAL); + return (SET_ERROR(EINVAL)); } owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) { ZFS_EXIT(zfsvfs); - return (EPERM); + return (SET_ERROR(EPERM)); } if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { @@ -4475,7 +4475,7 @@ zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { - err = EDQUOT; + err = SET_ERROR(EDQUOT); goto out; } top: @@ -4727,7 +4727,7 @@ zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, */ if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) { ZFS_EXIT(zfsvfs); - return (EAGAIN); + return (SET_ERROR(EAGAIN)); } ZFS_EXIT(zfsvfs); return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); @@ -4791,7 +4791,7 @@ zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, pvn_read_done(pp, B_ERROR); /* convert checksum errors into IO errors */ if (err == ECKSUM) - err = EIO; + err = SET_ERROR(EIO); return (err); } cur_pp = cur_pp->p_next; @@ -4941,28 +4941,28 @@ zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, if ((prot & PROT_WRITE) && (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) { ZFS_EXIT(zfsvfs); - return (EPERM); + return (SET_ERROR(EPERM)); } if ((prot & (PROT_READ | PROT_EXEC)) && (zp->z_pflags & ZFS_AV_QUARANTINED)) { ZFS_EXIT(zfsvfs); - return (EACCES); + return (SET_ERROR(EACCES)); } if (vp->v_flag & VNOMAP) { ZFS_EXIT(zfsvfs); - return (ENOSYS); + return (SET_ERROR(ENOSYS)); } if (off < 0 || len > MAXOFFSET_T - off) { ZFS_EXIT(zfsvfs); - return (ENXIO); + return (SET_ERROR(ENXIO)); } if (vp->v_type != VREG) { ZFS_EXIT(zfsvfs); - return (ENODEV); + return (SET_ERROR(ENODEV)); } /* @@ -4970,7 +4970,7 @@ zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, */ if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) { ZFS_EXIT(zfsvfs); - return (EAGAIN); + return (SET_ERROR(EAGAIN)); } as_rangelock(as); @@ -5085,7 +5085,7 @@ zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, if (cmd != F_FREESP) { ZFS_EXIT(zfsvfs); - return (EINVAL); + return (SET_ERROR(EINVAL)); } if (error = convoff(vp, bfp, 0, offset)) { @@ -5095,7 +5095,7 @@ zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, if (bfp->l_len < 0) { ZFS_EXIT(zfsvfs); - return (EINVAL); + return (SET_ERROR(EINVAL)); } off = bfp->l_start; @@ -5135,7 +5135,16 @@ zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) gen = (uint32_t)gen64; size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; + +#ifdef illumos + if (fidp->fid_len < size) { + fidp->fid_len = size; + ZFS_EXIT(zfsvfs); + return (SET_ERROR(ENOSPC)); + } +#else fidp->fid_len = size; +#endif zfid = (zfid_short_t *)fidp; @@ -5320,7 +5329,7 @@ zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, int preamble, postamble; if (xuio->xu_type != UIOTYPE_ZEROCOPY) - return (EINVAL); + return (SET_ERROR(EINVAL)); ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); @@ -5333,7 +5342,7 @@ zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, blksz = max_blksz; if (size < blksz || zp->z_blksz != blksz) { ZFS_EXIT(zfsvfs); - return (EINVAL); + return (SET_ERROR(EINVAL)); } /* * Caller requests buffers for write before knowing where the @@ -5401,7 +5410,7 @@ zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, /* avoid potential complexity of dealing with it */ if (blksz > max_blksz) { ZFS_EXIT(zfsvfs); - return (EINVAL); + return (SET_ERROR(EINVAL)); } maxsize = zp->z_size - uio->uio_loffset; @@ -5410,12 +5419,12 @@ zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, if (size < blksz || vn_has_cached_data(vp)) { ZFS_EXIT(zfsvfs); - return (EINVAL); + return (SET_ERROR(EINVAL)); } break; default: ZFS_EXIT(zfsvfs); - return (EINVAL); + return (SET_ERROR(EINVAL)); } uio->uio_extflg = UIO_XUIO; @@ -5462,13 +5471,13 @@ static int zfs_isdir(); static int zfs_inval() { - return (EINVAL); + return (SET_ERROR(EINVAL)); } static int zfs_isdir() { - return (EISDIR); + return (SET_ERROR(EISDIR)); } /* * Directory vnode operations template diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c index 18e54d24a..99ea2c08c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ /* Portions Copyright 2007 Jeremy Teo */ @@ -1173,8 +1173,10 @@ again: doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); +#ifdef __FreeBSD__ getnewvnode_drop_reserve(); - return (EINVAL); +#endif + return (SET_ERROR(EINVAL)); } hdl = dmu_buf_get_user(db); @@ -1193,7 +1195,7 @@ again: mutex_enter(&zp->z_lock); ASSERT3U(zp->z_id, ==, obj_num); if (zp->z_unlinked) { - err = ENOENT; + err = SET_ERROR(ENOENT); } else { vp = ZTOV(zp); *zpp = zp; @@ -1245,7 +1247,7 @@ again: zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size, doi.doi_bonus_type, NULL); if (zp == NULL) { - err = ENOENT; + err = SET_ERROR(ENOENT); } else { *zpp = zp; } @@ -1304,7 +1306,7 @@ zfs_rezget(znode_t *zp) doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); - return (EINVAL); + return (SET_ERROR(EINVAL)); } zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL); @@ -1331,7 +1333,7 @@ zfs_rezget(znode_t *zp) if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) { zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); - return (EIO); + return (SET_ERROR(EIO)); } zp->z_mode = mode; @@ -1339,7 +1341,7 @@ zfs_rezget(znode_t *zp) if (gen != zp->z_gen) { zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); - return (EIO); + return (SET_ERROR(EIO)); } /* @@ -2007,7 +2009,7 @@ zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t)) { sa_buf_rele(*db, tag); - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); @@ -2076,7 +2078,7 @@ zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table, * Otherwise the parent must be a directory. */ if (!*is_xattrdir && !S_ISDIR(parent_mode)) - return (EINVAL); + return (SET_ERROR(EINVAL)); *pobjp = parent; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c index 4898d57b4..3120b7be0 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -149,7 +149,7 @@ zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp) avl_index_t where; if (avl_find(t, dva, &where) != NULL) - return (EEXIST); + return (SET_ERROR(EEXIST)); zn = kmem_alloc(sizeof (zil_bp_node_t), KM_SLEEP); zn->zn_dva = *dva; @@ -220,7 +220,7 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum, sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk)) { - error = ECKSUM; + error = SET_ERROR(ECKSUM); } else { bcopy(lr, dst, len); *end = (char *)dst + len; @@ -234,7 +234,7 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum, sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk) || (zilc->zc_nused > (size - sizeof (*zilc)))) { - error = ECKSUM; + error = SET_ERROR(ECKSUM); } else { bcopy(lr, dst, zilc->zc_nused); *end = (char *)dst + zilc->zc_nused; @@ -242,7 +242,7 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, } } - VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); + VERIFY(arc_buf_remove_ref(abuf, &abuf)); } return (error); @@ -339,7 +339,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, break; error = zil_read_log_block(zilog, &blk, &next_blk, lrbuf, &end); - if (error) + if (error != 0) break; for (lrp = lrbuf; lrp < end; lrp += reclen) { @@ -474,7 +474,7 @@ zilog_dirty(zilog_t *zilog, uint64_t txg) if (dsl_dataset_is_snapshot(ds)) panic("dirtying snapshot!"); - if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg) == 0) { + if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg)) { /* up the hold count until we can be written out */ dmu_buf_add_ref(ds->ds_dbuf, zilog); } @@ -633,8 +633,8 @@ zil_claim(const char *osname, void *txarg) objset_t *os; int error; - error = dmu_objset_hold(osname, FTAG, &os); - if (error) { + error = dmu_objset_own(osname, DMU_OST_ANY, B_FALSE, FTAG, &os); + if (error != 0) { cmn_err(CE_WARN, "can't open objset for %s", osname); return (0); } @@ -647,7 +647,7 @@ zil_claim(const char *osname, void *txarg) zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log); BP_ZERO(&zh->zh_log); dsl_dataset_dirty(dmu_objset_ds(os), tx); - dmu_objset_rele(os, FTAG); + dmu_objset_disown(os, FTAG); return (0); } @@ -672,7 +672,7 @@ zil_claim(const char *osname, void *txarg) } ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1)); - dmu_objset_rele(os, FTAG); + dmu_objset_disown(os, FTAG); return (0); } @@ -692,7 +692,7 @@ zil_check_log_chain(const char *osname, void *tx) ASSERT(tx == NULL); error = dmu_objset_hold(osname, FTAG, &os); - if (error) { + if (error != 0) { cmn_err(CE_WARN, "can't open objset for %s", osname); return (0); } @@ -980,7 +980,7 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb) /* pass the old blkptr in order to spread log blocks across devs */ error = zio_alloc_zil(spa, txg, bp, &lwb->lwb_blk, zil_blksz, USE_SLOG(zilog)); - if (!error) { + if (error == 0) { ASSERT3U(bp->blk_birth, ==, txg); bp->blk_cksum = lwb->lwb_blk.blk_cksum; bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++; @@ -1091,7 +1091,7 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb) txg_wait_synced(zilog->zl_dmu_pool, txg); return (lwb); } - if (error) { + if (error != 0) { ASSERT(error == ENOENT || error == EEXIST || error == EALREADY); return (lwb); @@ -1715,6 +1715,9 @@ zil_free(zilog_t *zilog) { zilog->zl_stop_sync = 1; + ASSERT0(zilog->zl_suspend); + ASSERT0(zilog->zl_suspending); + ASSERT(list_is_empty(&zilog->zl_lwb_list)); list_destroy(&zilog->zl_lwb_list); @@ -1810,32 +1813,100 @@ zil_close(zilog_t *zilog) mutex_exit(&zilog->zl_lock); } +static char *suspend_tag = "zil suspending"; + /* * Suspend an intent log. While in suspended mode, we still honor * synchronous semantics, but we rely on txg_wait_synced() to do it. - * We suspend the log briefly when taking a snapshot so that the snapshot - * contains all the data it's supposed to, and has an empty intent log. + * On old version pools, we suspend the log briefly when taking a + * snapshot so that it will have an empty intent log. + * + * Long holds are not really intended to be used the way we do here -- + * held for such a short time. A concurrent caller of dsl_dataset_long_held() + * could fail. Therefore we take pains to only put a long hold if it is + * actually necessary. Fortunately, it will only be necessary if the + * objset is currently mounted (or the ZVOL equivalent). In that case it + * will already have a long hold, so we are not really making things any worse. + * + * Ideally, we would locate the existing long-holder (i.e. the zfsvfs_t or + * zvol_state_t), and use their mechanism to prevent their hold from being + * dropped (e.g. VFS_HOLD()). However, that would be even more pain for + * very little gain. + * + * if cookiep == NULL, this does both the suspend & resume. + * Otherwise, it returns with the dataset "long held", and the cookie + * should be passed into zil_resume(). */ int -zil_suspend(zilog_t *zilog) +zil_suspend(const char *osname, void **cookiep) { - const zil_header_t *zh = zilog->zl_header; + objset_t *os; + zilog_t *zilog; + const zil_header_t *zh; + int error; + + error = dmu_objset_hold(osname, suspend_tag, &os); + if (error != 0) + return (error); + zilog = dmu_objset_zil(os); mutex_enter(&zilog->zl_lock); + zh = zilog->zl_header; + if (zh->zh_flags & ZIL_REPLAY_NEEDED) { /* unplayed log */ mutex_exit(&zilog->zl_lock); - return (EBUSY); + dmu_objset_rele(os, suspend_tag); + return (SET_ERROR(EBUSY)); + } + + /* + * Don't put a long hold in the cases where we can avoid it. This + * is when there is no cookie so we are doing a suspend & resume + * (i.e. called from zil_vdev_offline()), and there's nothing to do + * for the suspend because it's already suspended, or there's no ZIL. + */ + if (cookiep == NULL && !zilog->zl_suspending && + (zilog->zl_suspend > 0 || BP_IS_HOLE(&zh->zh_log))) { + mutex_exit(&zilog->zl_lock); + dmu_objset_rele(os, suspend_tag); + return (0); } - if (zilog->zl_suspend++ != 0) { + + dsl_dataset_long_hold(dmu_objset_ds(os), suspend_tag); + dsl_pool_rele(dmu_objset_pool(os), suspend_tag); + + zilog->zl_suspend++; + + if (zilog->zl_suspend > 1) { /* - * Someone else already began a suspend. + * Someone else is already suspending it. * Just wait for them to finish. */ + while (zilog->zl_suspending) cv_wait(&zilog->zl_cv_suspend, &zilog->zl_lock); mutex_exit(&zilog->zl_lock); + + if (cookiep == NULL) + zil_resume(os); + else + *cookiep = os; + return (0); + } + + /* + * If there is no pointer to an on-disk block, this ZIL must not + * be active (e.g. filesystem not mounted), so there's nothing + * to clean up. + */ + if (BP_IS_HOLE(&zh->zh_log)) { + ASSERT(cookiep != NULL); /* fast path already handled */ + + *cookiep = os; + mutex_exit(&zilog->zl_lock); return (0); } + zilog->zl_suspending = B_TRUE; mutex_exit(&zilog->zl_lock); @@ -1848,16 +1919,25 @@ zil_suspend(zilog_t *zilog) cv_broadcast(&zilog->zl_cv_suspend); mutex_exit(&zilog->zl_lock); + if (cookiep == NULL) + zil_resume(os); + else + *cookiep = os; return (0); } void -zil_resume(zilog_t *zilog) +zil_resume(void *cookie) { + objset_t *os = cookie; + zilog_t *zilog = dmu_objset_zil(os); + mutex_enter(&zilog->zl_lock); ASSERT(zilog->zl_suspend != 0); zilog->zl_suspend--; mutex_exit(&zilog->zl_lock); + dsl_dataset_long_rele(dmu_objset_ds(os), suspend_tag); + dsl_dataset_rele(dmu_objset_ds(os), suspend_tag); } typedef struct zil_replay_arg { @@ -1930,7 +2010,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg) if (txtype == TX_WRITE && reclen == sizeof (lr_write_t)) { error = zil_read_log_data(zilog, (lr_write_t *)lr, zr->zr_lr + reclen); - if (error) + if (error != 0) return (zil_replay_error(zilog, lr, error)); } @@ -1951,7 +2031,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg) * is updated if we are in replay mode. */ error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, zr->zr_byteswap); - if (error) { + if (error != 0) { /* * The DMU's dnode layer doesn't see removes until the txg * commits, so a subsequent claim can spuriously fail with @@ -1961,7 +2041,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg) */ txg_wait_synced(spa_get_dsl(zilog->zl_spa), 0); error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, B_FALSE); - if (error) + if (error != 0) return (zil_replay_error(zilog, lr, error)); } return (0); @@ -2035,19 +2115,10 @@ zil_replaying(zilog_t *zilog, dmu_tx_t *tx) int zil_vdev_offline(const char *osname, void *arg) { - objset_t *os; - zilog_t *zilog; int error; - error = dmu_objset_hold(osname, FTAG, &os); - if (error) - return (error); - - zilog = dmu_objset_zil(os); - if (zil_suspend(zilog) != 0) - error = EEXIST; - else - zil_resume(zilog); - dmu_objset_rele(os, FTAG); - return (error); + error = zil_suspend(osname, NULL); + if (error != 0) + return (SET_ERROR(EEXIST)); + return (0); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c index 5e4d9a857..76c27cf46 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -372,7 +372,7 @@ zio_decompress(zio_t *zio, void *data, uint64_t size) if (zio->io_error == 0 && zio_decompress_data(BP_GET_COMPRESS(zio->io_bp), zio->io_data, data, zio->io_size, size) != 0) - zio->io_error = EIO; + zio->io_error = SET_ERROR(EIO); } /* @@ -724,6 +724,7 @@ zio_write_override(zio_t *zio, blkptr_t *bp, int copies, boolean_t nopwrite) void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp) { + metaslab_check_free(spa, bp); bplist_append(&spa->spa_free_bplist[txg & TXG_MASK], bp); } @@ -740,6 +741,8 @@ zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, ASSERT(spa_syncing_txg(spa) == txg); ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free); + metaslab_check_free(spa, bp); + zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp), NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags, NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_FREE_PIPELINE); @@ -2030,8 +2033,8 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) if (arc_buf_size(abuf) != zio->io_orig_size || bcmp(abuf->b_data, zio->io_orig_data, zio->io_orig_size) != 0) - error = EEXIST; - VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); + error = SET_ERROR(EEXIST); + VERIFY(arc_buf_remove_ref(abuf, &abuf)); } ddt_enter(ddt); @@ -2492,7 +2495,7 @@ zio_vdev_io_start(zio_t *zio) return (ZIO_PIPELINE_STOP); if (!vdev_accessible(vd, zio)) { - zio->io_error = ENXIO; + zio->io_error = SET_ERROR(ENXIO); zio_interrupt(zio); return (ZIO_PIPELINE_STOP); } @@ -2529,7 +2532,7 @@ zio_vdev_io_done(zio_t *zio) if (zio->io_error) { if (!vdev_accessible(vd, zio)) { - zio->io_error = ENXIO; + zio->io_error = SET_ERROR(ENXIO); } else { unexpected_error = B_TRUE; } @@ -2614,15 +2617,16 @@ zio_vdev_io_assess(zio_t *zio) */ if (zio->io_error && vd != NULL && vd->vdev_ops->vdev_op_leaf && !vdev_accessible(vd, zio)) - zio->io_error = ENXIO; + zio->io_error = SET_ERROR(ENXIO); /* * If we can't write to an interior vdev (mirror or RAID-Z), * set vdev_cant_write so that we stop trying to allocate from it. */ if (zio->io_error == ENXIO && zio->io_type == ZIO_TYPE_WRITE && - vd != NULL && !vd->vdev_ops->vdev_op_leaf) + vd != NULL && !vd->vdev_ops->vdev_op_leaf) { vd->vdev_cant_write = B_TRUE; + } if (zio->io_error) zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_checksum.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_checksum.c index c8fe20f2e..bc7331764 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_checksum.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_checksum.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -201,7 +202,7 @@ zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info) zio_cksum_t actual_cksum, expected_cksum, verifier; if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) - return (EINVAL); + return (SET_ERROR(EINVAL)); if (ci->ci_eck) { zio_eck_t *eck; @@ -216,10 +217,10 @@ zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info) else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC)) nused = BSWAP_64(zilc->zc_nused); else - return (ECKSUM); + return (SET_ERROR(ECKSUM)); if (nused > size) - return (ECKSUM); + return (SET_ERROR(ECKSUM)); size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t); } else { @@ -261,7 +262,7 @@ zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info) info->zbc_has_cksum = 1; if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) - return (ECKSUM); + return (SET_ERROR(ECKSUM)); if (zio_injection_enabled && !zio->io_error && (error = zio_handle_fault_injection(zio, ECKSUM)) != 0) { diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c index 1dc780d4b..d3c6ae17c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c @@ -27,6 +27,10 @@ * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. */ +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + */ + #include #include #include @@ -130,7 +134,7 @@ zio_decompress_data(enum zio_compress c, void *src, void *dst, zio_compress_info_t *ci = &zio_compress_table[c]; if ((uint_t)c >= ZIO_COMPRESS_FUNCTIONS || ci->ci_decompress == NULL) - return (EINVAL); + return (SET_ERROR(EINVAL)); return (ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level)); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c index a9d4ab407..9f3db704b 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ /* @@ -276,7 +276,7 @@ zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error) break; } if (handler->zi_record.zi_error == ENXIO) { - ret = EIO; + ret = SET_ERROR(EIO); break; } } @@ -416,7 +416,7 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record) * still allowing it to be unloaded. */ if ((spa = spa_inject_addref(name)) == NULL) - return (ENOENT); + return (SET_ERROR(ENOENT)); handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP); @@ -468,7 +468,7 @@ zio_inject_list_next(int *id, char *name, size_t buflen, (void) strncpy(name, spa_name(handler->zi_spa), buflen); ret = 0; } else { - ret = ENOENT; + ret = SET_ERROR(ENOENT); } rw_exit(&inject_lock); @@ -495,7 +495,7 @@ zio_clear_fault(int id) if (handler == NULL) { rw_exit(&inject_lock); - return (ENOENT); + return (SET_ERROR(ENOENT)); } list_remove(&inject_handlers, handler); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c index 48557a926..8fe0b7c0b 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c @@ -23,6 +23,7 @@ * * Copyright (c) 2006-2010 Pawel Jakub Dawidek * All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -144,7 +145,7 @@ typedef struct zvol_state { int zvol_maxphys = DMU_MAX_ACCESS/2; extern int zfs_set_prop_nvlist(const char *, zprop_source_t, - nvlist_t *, nvlist_t **); + nvlist_t *, nvlist_t *); static int zvol_remove_zv(zvol_state_t *); static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio); static int zvol_dumpify(zvol_state_t *zv); @@ -194,14 +195,14 @@ int zvol_check_volsize(uint64_t volsize, uint64_t blocksize) { if (volsize == 0) - return (EINVAL); + return (SET_ERROR(EINVAL)); if (volsize % blocksize != 0) - return (EINVAL); + return (SET_ERROR(EINVAL)); #ifdef _ILP32 if (volsize - 1 > SPEC_MAXOFFSET_T) - return (EOVERFLOW); + return (SET_ERROR(EOVERFLOW)); #endif return (0); } @@ -212,7 +213,7 @@ zvol_check_volblocksize(uint64_t volblocksize) if (volblocksize < SPA_MINBLOCKSIZE || volblocksize > SPA_MAXBLOCKSIZE || !ISP2(volblocksize)) - return (EDOM); + return (SET_ERROR(EDOM)); return (0); } @@ -288,7 +289,7 @@ zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, /* Abort immediately if we have encountered gang blocks */ if (BP_IS_GANG(bp)) - return (EFRAGS); + return (SET_ERROR(EFRAGS)); /* * See if the block is at the end of the previous extent. @@ -427,7 +428,7 @@ zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap) static int zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap) { - return (ENOTSUP); + return (SET_ERROR(ENOTSUP)); } /* @@ -491,7 +492,7 @@ zvol_create_minor(const char *name) if (zvol_minor_lookup(name) != NULL) { mutex_exit(&spa_namespace_lock); - return (EEXIST); + return (SET_ERROR(EEXIST)); } /* lie and say we're read-only */ @@ -506,13 +507,13 @@ zvol_create_minor(const char *name) if ((minor = zfsdev_minor_alloc()) == 0) { dmu_objset_disown(os, FTAG); mutex_exit(&spa_namespace_lock); - return (ENXIO); + return (SET_ERROR(ENXIO)); } if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) { dmu_objset_disown(os, FTAG); mutex_exit(&spa_namespace_lock); - return (EAGAIN); + return (SET_ERROR(EAGAIN)); } (void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME, (char *)name); @@ -524,7 +525,7 @@ zvol_create_minor(const char *name) ddi_soft_state_free(zfsdev_state, minor); dmu_objset_disown(os, FTAG); mutex_exit(&spa_namespace_lock); - return (EAGAIN); + return (SET_ERROR(EAGAIN)); } (void) snprintf(blkbuf, sizeof (blkbuf), "%u", minor); @@ -535,7 +536,7 @@ zvol_create_minor(const char *name) ddi_soft_state_free(zfsdev_state, minor); dmu_objset_disown(os, FTAG); mutex_exit(&spa_namespace_lock); - return (EAGAIN); + return (SET_ERROR(EAGAIN)); } zs = ddi_get_soft_state(zfsdev_state, minor); @@ -609,7 +610,7 @@ zvol_remove_zv(zvol_state_t *zv) ASSERT(MUTEX_HELD(&spa_namespace_lock)); if (zv->zv_total_opens != 0) - return (EBUSY); + return (SET_ERROR(EBUSY)); ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name); @@ -636,7 +637,7 @@ zvol_remove_minor(const char *name) mutex_enter(&spa_namespace_lock); if ((zv = zvol_minor_lookup(name)) == NULL) { mutex_exit(&spa_namespace_lock); - return (ENXIO); + return (SET_ERROR(ENXIO)); } g_topology_lock(); rc = zvol_remove_zv(zv); @@ -700,7 +701,7 @@ zvol_last_close(zvol_state_t *zv) if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) && !(zv->zv_flags & ZVOL_RDONLY)) txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); - (void) dmu_objset_evict_dbufs(zv->zv_objset); + dmu_objset_evict_dbufs(zv->zv_objset); dmu_objset_disown(zv->zv_objset, zvol_tag); zv->zv_objset = NULL; @@ -719,7 +720,7 @@ zvol_prealloc(zvol_state_t *zv) /* Check the space usage before attempting to allocate the space */ dmu_objset_space(os, &refd, &avail, &usedobjs, &availobjs); if (avail < zv->zv_volsize) - return (ENOSPC); + return (SET_ERROR(ENOSPC)); /* Free old extents if they exist */ zvol_free_extents(zv); @@ -747,7 +748,7 @@ zvol_prealloc(zvol_state_t *zv) } #endif /* sun */ -int +static int zvol_update_volsize(objset_t *os, uint64_t volsize) { dmu_tx_t *tx; @@ -919,7 +920,7 @@ zvol_open(struct g_provider *pp, int flag, int count) if (zv == NULL) { if (locked) mutex_exit(&spa_namespace_lock); - return (ENXIO); + return (SET_ERROR(ENXIO)); } if (zv->zv_total_opens == 0) @@ -930,17 +931,17 @@ zvol_open(struct g_provider *pp, int flag, int count) return (err); } if ((flag & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) { - err = EROFS; + err = SET_ERROR(EROFS); goto out; } if (zv->zv_flags & ZVOL_EXCL) { - err = EBUSY; + err = SET_ERROR(EBUSY); goto out; } #ifdef FEXCL if (flag & FEXCL) { if (zv->zv_total_opens != 0) { - err = EBUSY; + err = SET_ERROR(EBUSY); goto out; } zv->zv_flags |= ZVOL_EXCL; @@ -978,7 +979,7 @@ zvol_close(struct g_provider *pp, int flag, int count) if (zv == NULL) { if (locked) mutex_exit(&spa_namespace_lock); - return (ENXIO); + return (SET_ERROR(ENXIO)); } if (zv->zv_flags & ZVOL_EXCL) { @@ -1187,9 +1188,9 @@ zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t size, return (numerrors < vd->vdev_children ? 0 : EIO); if (doread && !vdev_readable(vd)) - return (EIO); + return (SET_ERROR(EIO)); else if (!doread && !vdev_writeable(vd)) - return (EIO); + return (SET_ERROR(EIO)); dvd = vd->vdev_tsd; ASSERT3P(dvd, !=, NULL); @@ -1198,7 +1199,7 @@ zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t size, if (ddi_in_panic() || isdump) { ASSERT(!doread); if (doread) - return (EIO); + return (SET_ERROR(EIO)); return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset), lbtodb(size))); } else { @@ -1219,7 +1220,7 @@ zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size, /* Must be sector aligned, and not stradle a block boundary. */ if (P2PHASE(offset, DEV_BSIZE) || P2PHASE(size, DEV_BSIZE) || P2BOUNDARY(offset, size, zv->zv_volblocksize)) { - return (EINVAL); + return (SET_ERROR(EINVAL)); } ASSERT(size <= zv->zv_volblocksize); @@ -1230,6 +1231,9 @@ zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size, ze = list_next(&zv->zv_extents, ze); } + if (ze == NULL) + return (SET_ERROR(EINVAL)); + if (!ddi_in_panic()) spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); @@ -1310,7 +1314,7 @@ zvol_strategy(struct bio *bp) if (error) { /* convert checksum errors into IO errors */ if (error == ECKSUM) - error = EIO; + error = SET_ERROR(EIO); break; } off += size; @@ -1358,7 +1362,10 @@ zvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks) zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); if (zv == NULL) - return (ENXIO); + return (SET_ERROR(ENXIO)); + + if ((zv->zv_flags & ZVOL_DUMPIFIED) == 0) + return (SET_ERROR(EINVAL)); boff = ldbtob(blkno); resid = ldbtob(nblocks); @@ -1390,12 +1397,12 @@ zvol_read(dev_t dev, uio_t *uio, cred_t *cr) zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); if (zv == NULL) - return (ENXIO); + return (SET_ERROR(ENXIO)); volsize = zv->zv_volsize; if (uio->uio_resid > 0 && (uio->uio_loffset < 0 || uio->uio_loffset >= volsize)) - return (EIO); + return (SET_ERROR(EIO)); if (zv->zv_flags & ZVOL_DUMPIFIED) { error = physio(zvol_strategy, NULL, dev, B_READ, @@ -1416,7 +1423,7 @@ zvol_read(dev_t dev, uio_t *uio, cred_t *cr) if (error) { /* convert checksum errors into IO errors */ if (error == ECKSUM) - error = EIO; + error = SET_ERROR(EIO); break; } } @@ -1437,12 +1444,12 @@ zvol_write(dev_t dev, uio_t *uio, cred_t *cr) zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); if (zv == NULL) - return (ENXIO); + return (SET_ERROR(ENXIO)); volsize = zv->zv_volsize; if (uio->uio_resid > 0 && (uio->uio_loffset < 0 || uio->uio_loffset >= volsize)) - return (EIO); + return (SET_ERROR(EIO)); if (zv->zv_flags & ZVOL_DUMPIFIED) { error = physio(zvol_strategy, NULL, dev, B_WRITE, @@ -1494,7 +1501,7 @@ zvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs) char *ptr; if (ddi_copyin(arg, &efi, sizeof (dk_efi_t), flag)) - return (EFAULT); + return (SET_ERROR(EFAULT)); ptr = (char *)(uintptr_t)efi.dki_data_64; length = efi.dki_length; /* @@ -1505,7 +1512,7 @@ zvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs) * PMBR. */ if (efi.dki_lba < 1 || efi.dki_lba > 2 || length <= 0) - return (EINVAL); + return (SET_ERROR(EINVAL)); gpe.efi_gpe_StartingLBA = LE_64(34ULL); gpe.efi_gpe_EndingLBA = LE_64((vs >> bs) - 1); @@ -1530,13 +1537,13 @@ zvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs) gpt.efi_gpt_HeaderCRC32 = LE_32(~crc); if (ddi_copyout(&gpt, ptr, MIN(sizeof (gpt), length), flag)) - return (EFAULT); + return (SET_ERROR(EFAULT)); ptr += sizeof (gpt); length -= sizeof (gpt); } if (length > 0 && ddi_copyout(&gpe, ptr, MIN(sizeof (gpe), length), flag)) - return (EFAULT); + return (SET_ERROR(EFAULT)); return (0); } @@ -1556,9 +1563,9 @@ zvol_get_volume_params(minor_t minor, uint64_t *blksize, zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); if (zv == NULL) - return (ENXIO); + return (SET_ERROR(ENXIO)); if (zv->zv_flags & ZVOL_DUMPIFIED) - return (ENXIO); + return (SET_ERROR(ENXIO)); ASSERT(blksize && max_xfer_len && minor_hdl && objset_hdl && zil_hdl && rl_hdl && bonus_hdl); @@ -1632,7 +1639,7 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) if (zv == NULL) { mutex_exit(&spa_namespace_lock); - return (ENXIO); + return (SET_ERROR(ENXIO)); } ASSERT(zv->zv_total_opens > 0); @@ -1647,7 +1654,7 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) dki.dki_maxtransfer = 1 << (SPA_MAXBLOCKSHIFT - zv->zv_min_bs); mutex_exit(&spa_namespace_lock); if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag)) - error = EFAULT; + error = SET_ERROR(EFAULT); return (error); case DKIOCGMEDIAINFO: @@ -1657,7 +1664,7 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) dkm.dki_media_type = DK_UNKNOWN; mutex_exit(&spa_namespace_lock); if (ddi_copyout(&dkm, (void *)arg, sizeof (dkm), flag)) - error = EFAULT; + error = SET_ERROR(EFAULT); return (error); case DKIOCGETEFI: @@ -1685,7 +1692,7 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) int wce = (zv->zv_flags & ZVOL_WCE) ? 1 : 0; if (ddi_copyout(&wce, (void *)arg, sizeof (int), flag)) - error = EFAULT; + error = SET_ERROR(EFAULT); break; } case DKIOCSETWCE: @@ -1693,7 +1700,7 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) int wce; if (ddi_copyin((void *)arg, &wce, sizeof (int), flag)) { - error = EFAULT; + error = SET_ERROR(EFAULT); break; } if (wce) { @@ -1713,7 +1720,7 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) * commands using these (like prtvtoc) expect ENOTSUP * since we're emulating an EFI label */ - error = ENOTSUP; + error = SET_ERROR(ENOTSUP); break; case DKIOCDUMPINIT: @@ -1732,8 +1739,67 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) zfs_range_unlock(rl); break; + case DKIOCFREE: + { + dkioc_free_t df; + dmu_tx_t *tx; + + if (ddi_copyin((void *)arg, &df, sizeof (df), flag)) { + error = SET_ERROR(EFAULT); + break; + } + + /* + * Apply Postel's Law to length-checking. If they overshoot, + * just blank out until the end, if there's a need to blank + * out anything. + */ + if (df.df_start >= zv->zv_volsize) + break; /* No need to do anything... */ + if (df.df_start + df.df_length > zv->zv_volsize) + df.df_length = DMU_OBJECT_END; + + rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length, + RL_WRITER); + tx = dmu_tx_create(zv->zv_objset); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error != 0) { + dmu_tx_abort(tx); + } else { + zvol_log_truncate(zv, tx, df.df_start, + df.df_length, B_TRUE); + dmu_tx_commit(tx); + error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, + df.df_start, df.df_length); + } + + zfs_range_unlock(rl); + + if (error == 0) { + /* + * If the write-cache is disabled or 'sync' property + * is set to 'always' then treat this as a synchronous + * operation (i.e. commit to zil). + */ + if (!(zv->zv_flags & ZVOL_WCE) || + (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) + zil_commit(zv->zv_zilog, ZVOL_OBJ); + + /* + * If the caller really wants synchronous writes, and + * can't wait for them, don't return until the write + * is done. + */ + if (df.df_flags & DF_WAIT_SYNC) { + txg_wait_synced( + dmu_objset_pool(zv->zv_objset), 0); + } + } + break; + } + default: - error = ENOTTY; + error = SET_ERROR(ENOTTY); break; } @@ -1880,11 +1946,11 @@ zvol_dumpify(zvol_state_t *zv) objset_t *os = zv->zv_objset; if (zv->zv_flags & ZVOL_RDONLY) - return (EROFS); + return (SET_ERROR(EROFS)); if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) { - boolean_t resize = (dumpsize > 0) ? B_TRUE : B_FALSE; + boolean_t resize = (dumpsize > 0); if ((error = zvol_dump_init(zv, resize)) != 0) { (void) zvol_dump_fini(zv); @@ -2183,8 +2249,10 @@ zvol_create_snapshots(objset_t *os, const char *name) cookie = obj = 0; sname = kmem_alloc(MAXPATHLEN, KM_SLEEP); +#if 0 (void) dmu_objset_find(name, dmu_objset_prefetch, NULL, DS_FIND_SNAPSHOTS); +#endif for (;;) { len = snprintf(sname, MAXPATHLEN, "%s@", name); @@ -2194,8 +2262,10 @@ zvol_create_snapshots(objset_t *os, const char *name) break; } + dsl_pool_config_enter(dmu_objset_pool(os), FTAG); error = dmu_snapshot_list_next(os, MAXPATHLEN - len, sname + len, &obj, &cookie, NULL); + dsl_pool_config_exit(dmu_objset_pool(os), FTAG); if (error != 0) { if (error == ENOENT) error = 0; @@ -2230,13 +2300,16 @@ zvol_create_minors(const char *name) return (error); } if (dmu_objset_type(os) == DMU_OST_ZVOL) { + dsl_dataset_long_hold(os->os_dsl_dataset, FTAG); + dsl_pool_rele(dmu_objset_pool(os), FTAG); if ((error = zvol_create_minor(name)) == 0) error = zvol_create_snapshots(os, name); else { printf("ZFS WARNING: Unable to create ZVOL %s (error=%d).\n", name, error); } - dmu_objset_rele(os, FTAG); + dsl_dataset_long_rele(os->os_dsl_dataset, FTAG); + dsl_dataset_rele(os->os_dsl_dataset, FTAG); return (error); } if (dmu_objset_type(os) != DMU_OST_ZFS) { @@ -2253,12 +2326,14 @@ zvol_create_minors(const char *name) p = osname + strlen(osname); len = MAXPATHLEN - (p - osname); +#if 0 /* Prefetch the datasets. */ cookie = 0; while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) { if (!dataset_name_hidden(osname)) (void) dmu_objset_prefetch(osname, NULL); } +#endif cookie = 0; while (dmu_dir_list_next(os, MAXPATHLEN - (p - osname), p, NULL, diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/feature_tests.h b/sys/cddl/contrib/opensolaris/uts/common/sys/feature_tests.h index bb79cb839..11f4ec7c3 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/sys/feature_tests.h +++ b/sys/cddl/contrib/opensolaris/uts/common/sys/feature_tests.h @@ -27,8 +27,6 @@ #ifndef _SYS_FEATURE_TESTS_H #define _SYS_FEATURE_TESTS_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include #ifdef __cplusplus @@ -366,7 +364,7 @@ extern "C" { * compiler is used. This allows for the use of single prototype * declarations regardless of compiler version. */ -#if (defined(__STDC__) && defined(_STDC_C99)) +#if (defined(__STDC__) && defined(_STDC_C99)) && !defined(__cplusplus) #define _RESTRICT_KYWD restrict #else #define _RESTRICT_KYWD diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h b/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h index 38deab636..51c75068a 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h +++ b/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h @@ -55,6 +55,16 @@ typedef enum { ZFS_TYPE_POOL = 0x8 } zfs_type_t; +typedef enum dmu_objset_type { + DMU_OST_NONE, + DMU_OST_META, + DMU_OST_ZFS, + DMU_OST_ZVOL, + DMU_OST_OTHER, /* For testing only! */ + DMU_OST_ANY, /* Be careful! */ + DMU_OST_NUMTYPES +} dmu_objset_type_t; + #define ZFS_TYPE_DATASET \ (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME | ZFS_TYPE_SNAPSHOT) @@ -750,75 +760,79 @@ typedef struct ddt_histogram { /* * /dev/zfs ioctl numbers. */ -typedef unsigned long zfs_ioc_t; - -#define ZFS_IOC(ioreq) ((ioreq) & 0xff) - -#define ZFS_IOC_POOL_CREATE _IOWR('Z', 0, struct zfs_cmd) -#define ZFS_IOC_POOL_DESTROY _IOWR('Z', 1, struct zfs_cmd) -#define ZFS_IOC_POOL_IMPORT _IOWR('Z', 2, struct zfs_cmd) -#define ZFS_IOC_POOL_EXPORT _IOWR('Z', 3, struct zfs_cmd) -#define ZFS_IOC_POOL_CONFIGS _IOWR('Z', 4, struct zfs_cmd) -#define ZFS_IOC_POOL_STATS _IOWR('Z', 5, struct zfs_cmd) -#define ZFS_IOC_POOL_TRYIMPORT _IOWR('Z', 6, struct zfs_cmd) -#define ZFS_IOC_POOL_SCAN _IOWR('Z', 7, struct zfs_cmd) -#define ZFS_IOC_POOL_FREEZE _IOWR('Z', 8, struct zfs_cmd) -#define ZFS_IOC_POOL_UPGRADE _IOWR('Z', 9, struct zfs_cmd) -#define ZFS_IOC_POOL_GET_HISTORY _IOWR('Z', 10, struct zfs_cmd) -#define ZFS_IOC_VDEV_ADD _IOWR('Z', 11, struct zfs_cmd) -#define ZFS_IOC_VDEV_REMOVE _IOWR('Z', 12, struct zfs_cmd) -#define ZFS_IOC_VDEV_SET_STATE _IOWR('Z', 13, struct zfs_cmd) -#define ZFS_IOC_VDEV_ATTACH _IOWR('Z', 14, struct zfs_cmd) -#define ZFS_IOC_VDEV_DETACH _IOWR('Z', 15, struct zfs_cmd) -#define ZFS_IOC_VDEV_SETPATH _IOWR('Z', 16, struct zfs_cmd) -#define ZFS_IOC_VDEV_SETFRU _IOWR('Z', 17, struct zfs_cmd) -#define ZFS_IOC_OBJSET_STATS _IOWR('Z', 18, struct zfs_cmd) -#define ZFS_IOC_OBJSET_ZPLPROPS _IOWR('Z', 19, struct zfs_cmd) -#define ZFS_IOC_DATASET_LIST_NEXT _IOWR('Z', 20, struct zfs_cmd) -#define ZFS_IOC_SNAPSHOT_LIST_NEXT _IOWR('Z', 21, struct zfs_cmd) -#define ZFS_IOC_SET_PROP _IOWR('Z', 22, struct zfs_cmd) -#define ZFS_IOC_CREATE _IOWR('Z', 23, struct zfs_cmd) -#define ZFS_IOC_DESTROY _IOWR('Z', 24, struct zfs_cmd) -#define ZFS_IOC_ROLLBACK _IOWR('Z', 25, struct zfs_cmd) -#define ZFS_IOC_RENAME _IOWR('Z', 26, struct zfs_cmd) -#define ZFS_IOC_RECV _IOWR('Z', 27, struct zfs_cmd) -#define ZFS_IOC_SEND _IOWR('Z', 28, struct zfs_cmd) -#define ZFS_IOC_INJECT_FAULT _IOWR('Z', 29, struct zfs_cmd) -#define ZFS_IOC_CLEAR_FAULT _IOWR('Z', 30, struct zfs_cmd) -#define ZFS_IOC_INJECT_LIST_NEXT _IOWR('Z', 31, struct zfs_cmd) -#define ZFS_IOC_ERROR_LOG _IOWR('Z', 32, struct zfs_cmd) -#define ZFS_IOC_CLEAR _IOWR('Z', 33, struct zfs_cmd) -#define ZFS_IOC_PROMOTE _IOWR('Z', 34, struct zfs_cmd) -#define ZFS_IOC_DESTROY_SNAPS_NVL _IOWR('Z', 35, struct zfs_cmd) -#define ZFS_IOC_SNAPSHOT _IOWR('Z', 36, struct zfs_cmd) -#define ZFS_IOC_DSOBJ_TO_DSNAME _IOWR('Z', 37, struct zfs_cmd) -#define ZFS_IOC_OBJ_TO_PATH _IOWR('Z', 38, struct zfs_cmd) -#define ZFS_IOC_POOL_SET_PROPS _IOWR('Z', 39, struct zfs_cmd) -#define ZFS_IOC_POOL_GET_PROPS _IOWR('Z', 40, struct zfs_cmd) -#define ZFS_IOC_SET_FSACL _IOWR('Z', 41, struct zfs_cmd) -#define ZFS_IOC_GET_FSACL _IOWR('Z', 42, struct zfs_cmd) -#define ZFS_IOC_SHARE _IOWR('Z', 43, struct zfs_cmd) -#define ZFS_IOC_INHERIT_PROP _IOWR('Z', 44, struct zfs_cmd) -#define ZFS_IOC_SMB_ACL _IOWR('Z', 45, struct zfs_cmd) -#define ZFS_IOC_USERSPACE_ONE _IOWR('Z', 46, struct zfs_cmd) -#define ZFS_IOC_USERSPACE_MANY _IOWR('Z', 47, struct zfs_cmd) -#define ZFS_IOC_USERSPACE_UPGRADE _IOWR('Z', 48, struct zfs_cmd) -#define ZFS_IOC_HOLD _IOWR('Z', 49, struct zfs_cmd) -#define ZFS_IOC_RELEASE _IOWR('Z', 50, struct zfs_cmd) -#define ZFS_IOC_GET_HOLDS _IOWR('Z', 51, struct zfs_cmd) -#define ZFS_IOC_OBJSET_RECVD_PROPS _IOWR('Z', 52, struct zfs_cmd) -#define ZFS_IOC_VDEV_SPLIT _IOWR('Z', 53, struct zfs_cmd) -#define ZFS_IOC_NEXT_OBJ _IOWR('Z', 54, struct zfs_cmd) -#define ZFS_IOC_DIFF _IOWR('Z', 55, struct zfs_cmd) -#define ZFS_IOC_TMP_SNAPSHOT _IOWR('Z', 56, struct zfs_cmd) -#define ZFS_IOC_OBJ_TO_STATS _IOWR('Z', 57, struct zfs_cmd) -#define ZFS_IOC_JAIL _IOWR('Z', 58, struct zfs_cmd) -#define ZFS_IOC_UNJAIL _IOWR('Z', 59, struct zfs_cmd) -#define ZFS_IOC_POOL_REGUID _IOWR('Z', 60, struct zfs_cmd) -#define ZFS_IOC_SPACE_WRITTEN _IOWR('Z', 61, struct zfs_cmd) -#define ZFS_IOC_SPACE_SNAPS _IOWR('Z', 62, struct zfs_cmd) -#define ZFS_IOC_SEND_PROGRESS _IOWR('Z', 63, struct zfs_cmd) -#define ZFS_IOC_POOL_REOPEN _IOWR('Z', 64, struct zfs_cmd) +typedef enum zfs_ioc { + ZFS_IOC_FIRST = 0, + ZFS_IOC_POOL_CREATE = ZFS_IOC_FIRST, + ZFS_IOC_POOL_DESTROY, + ZFS_IOC_POOL_IMPORT, + ZFS_IOC_POOL_EXPORT, + ZFS_IOC_POOL_CONFIGS, + ZFS_IOC_POOL_STATS, + ZFS_IOC_POOL_TRYIMPORT, + ZFS_IOC_POOL_SCAN, + ZFS_IOC_POOL_FREEZE, + ZFS_IOC_POOL_UPGRADE, + ZFS_IOC_POOL_GET_HISTORY, + ZFS_IOC_VDEV_ADD, + ZFS_IOC_VDEV_REMOVE, + ZFS_IOC_VDEV_SET_STATE, + ZFS_IOC_VDEV_ATTACH, + ZFS_IOC_VDEV_DETACH, + ZFS_IOC_VDEV_SETPATH, + ZFS_IOC_VDEV_SETFRU, + ZFS_IOC_OBJSET_STATS, + ZFS_IOC_OBJSET_ZPLPROPS, + ZFS_IOC_DATASET_LIST_NEXT, + ZFS_IOC_SNAPSHOT_LIST_NEXT, + ZFS_IOC_SET_PROP, + ZFS_IOC_CREATE, + ZFS_IOC_DESTROY, + ZFS_IOC_ROLLBACK, + ZFS_IOC_RENAME, + ZFS_IOC_RECV, + ZFS_IOC_SEND, + ZFS_IOC_INJECT_FAULT, + ZFS_IOC_CLEAR_FAULT, + ZFS_IOC_INJECT_LIST_NEXT, + ZFS_IOC_ERROR_LOG, + ZFS_IOC_CLEAR, + ZFS_IOC_PROMOTE, + ZFS_IOC_DESTROY_SNAPS, + ZFS_IOC_SNAPSHOT, + ZFS_IOC_DSOBJ_TO_DSNAME, + ZFS_IOC_OBJ_TO_PATH, + ZFS_IOC_POOL_SET_PROPS, + ZFS_IOC_POOL_GET_PROPS, + ZFS_IOC_SET_FSACL, + ZFS_IOC_GET_FSACL, + ZFS_IOC_SHARE, + ZFS_IOC_INHERIT_PROP, + ZFS_IOC_SMB_ACL, + ZFS_IOC_USERSPACE_ONE, + ZFS_IOC_USERSPACE_MANY, + ZFS_IOC_USERSPACE_UPGRADE, + ZFS_IOC_HOLD, + ZFS_IOC_RELEASE, + ZFS_IOC_GET_HOLDS, + ZFS_IOC_OBJSET_RECVD_PROPS, + ZFS_IOC_VDEV_SPLIT, + ZFS_IOC_NEXT_OBJ, + ZFS_IOC_DIFF, + ZFS_IOC_TMP_SNAPSHOT, + ZFS_IOC_OBJ_TO_STATS, + ZFS_IOC_JAIL, + ZFS_IOC_UNJAIL, + ZFS_IOC_POOL_REGUID, + ZFS_IOC_SPACE_WRITTEN, + ZFS_IOC_SPACE_SNAPS, + ZFS_IOC_SEND_PROGRESS, + ZFS_IOC_POOL_REOPEN, + ZFS_IOC_LOG_HISTORY, + ZFS_IOC_SEND_NEW, + ZFS_IOC_SEND_SPACE, + ZFS_IOC_CLONE, + ZFS_IOC_LAST +} zfs_ioc_t; /* * Internal SPA load state. Used by FMA diagnosis engine. @@ -854,6 +868,12 @@ typedef enum { #define ZPOOL_HIST_TXG "history txg" #define ZPOOL_HIST_INT_EVENT "history internal event" #define ZPOOL_HIST_INT_STR "history internal str" +#define ZPOOL_HIST_INT_NAME "internal_name" +#define ZPOOL_HIST_IOCTL "ioctl" +#define ZPOOL_HIST_INPUT_NVL "in_nvl" +#define ZPOOL_HIST_OUTPUT_NVL "out_nvl" +#define ZPOOL_HIST_DSNAME "dsname" +#define ZPOOL_HIST_DSID "dsid" /* * Flags for ZFS_IOC_VDEV_SET_STATE @@ -899,56 +919,6 @@ typedef enum { #define ZFS_EV_VDEV_PATH "vdev_path" #define ZFS_EV_VDEV_GUID "vdev_guid" -/* - * Note: This is encoded on-disk, so new events must be added to the - * end, and unused events can not be removed. Be sure to edit - * libzfs_pool.c: hist_event_table[]. - */ -typedef enum history_internal_events { - LOG_NO_EVENT = 0, - LOG_POOL_CREATE, - LOG_POOL_VDEV_ADD, - LOG_POOL_REMOVE, - LOG_POOL_DESTROY, - LOG_POOL_EXPORT, - LOG_POOL_IMPORT, - LOG_POOL_VDEV_ATTACH, - LOG_POOL_VDEV_REPLACE, - LOG_POOL_VDEV_DETACH, - LOG_POOL_VDEV_ONLINE, - LOG_POOL_VDEV_OFFLINE, - LOG_POOL_UPGRADE, - LOG_POOL_CLEAR, - LOG_POOL_SCAN, - LOG_POOL_PROPSET, - LOG_DS_CREATE, - LOG_DS_CLONE, - LOG_DS_DESTROY, - LOG_DS_DESTROY_BEGIN, - LOG_DS_INHERIT, - LOG_DS_PROPSET, - LOG_DS_QUOTA, - LOG_DS_PERM_UPDATE, - LOG_DS_PERM_REMOVE, - LOG_DS_PERM_WHO_REMOVE, - LOG_DS_PROMOTE, - LOG_DS_RECEIVE, - LOG_DS_RENAME, - LOG_DS_RESERVATION, - LOG_DS_REPLAY_INC_SYNC, - LOG_DS_REPLAY_FULL_SYNC, - LOG_DS_ROLLBACK, - LOG_DS_SNAPSHOT, - LOG_DS_UPGRADE, - LOG_DS_REFQUOTA, - LOG_DS_REFRESERV, - LOG_POOL_SCAN_DONE, - LOG_DS_USER_HOLD, - LOG_DS_USER_RELEASE, - LOG_POOL_SPLIT, - LOG_END -} history_internal_events_t; - #ifdef __cplusplus } #endif diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/nvpair.h b/sys/cddl/contrib/opensolaris/uts/common/sys/nvpair.h index 3062dd95a..e4545a96e 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/sys/nvpair.h +++ b/sys/cddl/contrib/opensolaris/uts/common/sys/nvpair.h @@ -283,6 +283,7 @@ void fnvlist_pack_free(char *, size_t); nvlist_t *fnvlist_unpack(char *, size_t); nvlist_t *fnvlist_dup(nvlist_t *); void fnvlist_merge(nvlist_t *, nvlist_t *); +size_t fnvlist_num_pairs(nvlist_t *); void fnvlist_add_boolean(nvlist_t *, const char *); void fnvlist_add_boolean_value(nvlist_t *, const char *, boolean_t); -- 2.45.0