4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
24 * Copyright (c) 2013 Steven Hartland. All rights reserved.
25 * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
28 #ifndef _SYS_DSL_DATASET_H
29 #define _SYS_DSL_DATASET_H
35 #include <sys/bplist.h>
36 #include <sys/dsl_synctask.h>
37 #include <sys/zfs_context.h>
38 #include <sys/dsl_deadlist.h>
39 #include <sys/zfs_refcount.h>
40 #include <sys/rrwlock.h>
41 #include <sys/dsl_crypt.h>
42 #include <zfeature_common.h>
51 struct dsl_crypto_params;
52 struct dsl_key_mapping;
53 struct zfs_bookmark_phys;
55 #define DS_FLAG_INCONSISTENT (1ULL<<0)
56 #define DS_IS_INCONSISTENT(ds) \
57 (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT)
60 * Do not allow this dataset to be promoted.
62 #define DS_FLAG_NOPROMOTE (1ULL<<1)
65 * DS_FLAG_UNIQUE_ACCURATE is set if ds_unique_bytes has been correctly
66 * calculated for head datasets (starting with SPA_VERSION_UNIQUE_ACCURATE,
67 * refquota/refreservations).
69 #define DS_FLAG_UNIQUE_ACCURATE (1ULL<<2)
72 * DS_FLAG_DEFER_DESTROY is set after 'zfs destroy -d' has been called
73 * on a dataset. This allows the dataset to be destroyed using 'zfs release'.
75 #define DS_FLAG_DEFER_DESTROY (1ULL<<3)
76 #define DS_IS_DEFER_DESTROY(ds) \
77 (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_DEFER_DESTROY)
80 * DS_FIELD_* are strings that are used in the "extensified" dataset zap object.
81 * They should be of the format <reverse-dns>:<field>.
85 * This field's value is the object ID of a zap object which contains the
86 * bookmarks of this dataset. If it is present, then this dataset is counted
87 * in the refcount of the SPA_FEATURES_BOOKMARKS feature.
89 #define DS_FIELD_BOOKMARK_NAMES "com.delphix:bookmarks"
92 * This field is present (with value=0) if this dataset may contain large
93 * dnodes (>512B). If it is present, then this dataset is counted in the
94 * refcount of the SPA_FEATURE_LARGE_DNODE feature.
96 #define DS_FIELD_LARGE_DNODE "org.zfsonlinux:large_dnode"
99 * These fields are set on datasets that are in the middle of a resumable
100 * receive, and allow the sender to resume the send if it is interrupted.
102 #define DS_FIELD_RESUME_FROMGUID "com.delphix:resume_fromguid"
103 #define DS_FIELD_RESUME_TONAME "com.delphix:resume_toname"
104 #define DS_FIELD_RESUME_TOGUID "com.delphix:resume_toguid"
105 #define DS_FIELD_RESUME_OBJECT "com.delphix:resume_object"
106 #define DS_FIELD_RESUME_OFFSET "com.delphix:resume_offset"
107 #define DS_FIELD_RESUME_BYTES "com.delphix:resume_bytes"
108 #define DS_FIELD_RESUME_LARGEBLOCK "com.delphix:resume_largeblockok"
109 #define DS_FIELD_RESUME_EMBEDOK "com.delphix:resume_embedok"
110 #define DS_FIELD_RESUME_COMPRESSOK "com.delphix:resume_compressok"
111 #define DS_FIELD_RESUME_RAWOK "com.datto:resume_rawok"
114 * This field is set to the object number of the remap deadlist if one exists.
116 #define DS_FIELD_REMAP_DEADLIST "com.delphix:remap_deadlist"
119 * We were receiving an incremental from a redaction bookmark, and these are the
120 * guids of its snapshots.
122 #define DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS \
123 "com.delphix:resume_redact_book_snaps"
126 * This field is set to the ivset guid for encrypted snapshots. This is used
127 * for validating raw receives.
129 #define DS_FIELD_IVSET_GUID "com.datto:ivset_guid"
132 * DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
133 * name lookups should be performed case-insensitively.
135 #define DS_FLAG_CI_DATASET (1ULL<<16)
137 #define DS_CREATE_FLAG_NODIRTY (1ULL<<24)
139 typedef struct dsl_dataset_phys {
140 uint64_t ds_dir_obj; /* DMU_OT_DSL_DIR */
141 uint64_t ds_prev_snap_obj; /* DMU_OT_DSL_DATASET */
142 uint64_t ds_prev_snap_txg;
143 uint64_t ds_next_snap_obj; /* DMU_OT_DSL_DATASET */
144 uint64_t ds_snapnames_zapobj; /* DMU_OT_DSL_DS_SNAP_MAP 0 for snaps */
145 uint64_t ds_num_children; /* clone/snap children; ==0 for head */
146 uint64_t ds_creation_time; /* seconds since 1970 */
147 uint64_t ds_creation_txg;
148 uint64_t ds_deadlist_obj; /* DMU_OT_DEADLIST */
150 * ds_referenced_bytes, ds_compressed_bytes, and ds_uncompressed_bytes
151 * include all blocks referenced by this dataset, including those
152 * shared with any other datasets.
154 uint64_t ds_referenced_bytes;
155 uint64_t ds_compressed_bytes;
156 uint64_t ds_uncompressed_bytes;
157 uint64_t ds_unique_bytes; /* only relevant to snapshots */
159 * The ds_fsid_guid is a 56-bit ID that can change to avoid
160 * collisions. The ds_guid is a 64-bit ID that will never
161 * change, so there is a small probability that it will collide.
163 uint64_t ds_fsid_guid;
165 uint64_t ds_flags; /* DS_FLAG_* */
167 uint64_t ds_next_clones_obj; /* DMU_OT_DSL_CLONES */
168 uint64_t ds_props_obj; /* DMU_OT_DSL_PROPS for snaps */
169 uint64_t ds_userrefs_obj; /* DMU_OT_USERREFS */
170 uint64_t ds_pad[5]; /* pad out to 320 bytes for good measure */
171 } dsl_dataset_phys_t;
173 typedef struct dsl_dataset {
174 dmu_buf_user_t ds_dbu;
175 rrwlock_t ds_bp_rwlock; /* Protects ds_phys->ds_bp */
178 struct dsl_dir *ds_dir;
181 uint64_t ds_fsid_guid;
182 boolean_t ds_is_snapshot;
183 struct dsl_key_mapping *ds_key_mapping;
185 /* only used in syncing context, only valid for non-snapshots: */
186 struct dsl_dataset *ds_prev;
187 uint64_t ds_bookmarks_obj; /* DMU_OTN_ZAP_METADATA */
188 avl_tree_t ds_bookmarks; /* dsl_bookmark_node_t */
190 /* has internal locking: */
191 dsl_deadlist_t ds_deadlist;
192 bplist_t ds_pending_deadlist;
195 * The remap deadlist contains blocks (DVA's, really) that are
196 * referenced by the previous snapshot and point to indirect vdevs,
197 * but in this dataset they have been remapped to point to concrete
198 * (or at least, less-indirect) vdevs. In other words, the
199 * physical DVA is referenced by the previous snapshot but not by
200 * this dataset. Logically, the DVA continues to be referenced,
201 * but we are using a different (less indirect) physical DVA.
202 * This deadlist is used to determine when physical DVAs that
203 * point to indirect vdevs are no longer referenced anywhere,
204 * and thus should be marked obsolete.
206 * This is only used if SPA_FEATURE_OBSOLETE_COUNTS is enabled.
208 dsl_deadlist_t ds_remap_deadlist;
209 /* protects creation of the ds_remap_deadlist */
210 kmutex_t ds_remap_deadlist_lock;
212 /* protected by lock on pool's dp_dirty_datasets list */
213 txg_node_t ds_dirty_link;
214 list_node_t ds_synced_link;
217 * ds_phys->ds_<accounting> is also protected by ds_lock.
218 * Protected by ds_lock:
222 uint64_t ds_userrefs;
226 * Long holds prevent the ds from being destroyed; they allow the
227 * ds to remain held even after dropping the dp_config_rwlock.
228 * Owning counts as a long hold. See the comments above
229 * dsl_pool_hold() for details.
231 zfs_refcount_t ds_longholds;
233 /* no locking; only for making guesses */
234 uint64_t ds_trysnap_txg;
236 /* for objset_open() */
237 kmutex_t ds_opening_lock;
239 uint64_t ds_reserved; /* cached refreservation */
240 uint64_t ds_quota; /* cached refquota */
242 kmutex_t ds_sendstream_lock;
243 list_t ds_sendstreams;
246 * When in the middle of a resumable receive, tracks how much
247 * progress we have made.
249 uint64_t ds_resume_object[TXG_SIZE];
250 uint64_t ds_resume_offset[TXG_SIZE];
251 uint64_t ds_resume_bytes[TXG_SIZE];
253 /* Protected by our dsl_dir's dd_lock */
257 * For ZFEATURE_FLAG_PER_DATASET features, set if this dataset
260 void *ds_feature[SPA_FEATURES];
263 * Set if we need to activate the feature on this dataset this txg
264 * (used only in syncing context).
266 void *ds_feature_activation[SPA_FEATURES];
268 /* Protected by ds_lock; keep at end of struct for better locality */
269 char ds_snapname[ZFS_MAX_DATASET_NAME_LEN];
272 static inline dsl_dataset_phys_t *
273 dsl_dataset_phys(dsl_dataset_t *ds)
275 return ((dsl_dataset_phys_t *)ds->ds_dbuf->db_data);
278 typedef struct dsl_dataset_promote_arg {
279 const char *ddpa_clonename;
280 dsl_dataset_t *ddpa_clone;
281 list_t shared_snaps, origin_snaps, clone_snaps;
282 dsl_dataset_t *origin_origin; /* origin of the origin */
283 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
287 } dsl_dataset_promote_arg_t;
289 typedef struct dsl_dataset_rollback_arg {
290 const char *ddra_fsname;
291 const char *ddra_tosnap;
293 nvlist_t *ddra_result;
294 } dsl_dataset_rollback_arg_t;
296 typedef struct dsl_dataset_snapshot_arg {
297 nvlist_t *ddsa_snaps;
298 nvlist_t *ddsa_props;
299 nvlist_t *ddsa_errors;
302 } dsl_dataset_snapshot_arg_t;
305 * The max length of a temporary tag prefix is the number of hex digits
306 * required to express UINT64_MAX plus one for the hyphen.
308 #define MAX_TAG_PREFIX_LEN 17
310 #define dsl_dataset_is_snapshot(ds) \
311 (dsl_dataset_phys(ds)->ds_num_children != 0)
313 #define DS_UNIQUE_IS_ACCURATE(ds) \
314 ((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
316 /* flags for holding the dataset */
317 typedef enum ds_hold_flags {
318 DS_HOLD_FLAG_NONE = 0 << 0,
319 DS_HOLD_FLAG_DECRYPT = 1 << 0 /* needs access to encrypted data */
322 int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag,
323 dsl_dataset_t **dsp);
324 int dsl_dataset_hold_flags(struct dsl_pool *dp, const char *name,
325 ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
326 boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds,
328 int dsl_dataset_create_key_mapping(dsl_dataset_t *ds);
329 int dsl_dataset_hold_obj_flags(struct dsl_pool *dp, uint64_t dsobj,
330 ds_hold_flags_t flags, void *tag, dsl_dataset_t **);
331 void dsl_dataset_remove_key_mapping(dsl_dataset_t *ds);
332 int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj,
333 void *tag, dsl_dataset_t **);
334 void dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags,
336 void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
337 int dsl_dataset_own(struct dsl_pool *dp, const char *name,
338 ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
339 int dsl_dataset_own_force(struct dsl_pool *dp, const char *name,
340 ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
341 int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
342 ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
343 int dsl_dataset_own_obj_force(struct dsl_pool *dp, uint64_t dsobj,
344 ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
345 void dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag);
346 void dsl_dataset_name(dsl_dataset_t *ds, char *name);
347 boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag, boolean_t override);
348 int dsl_dataset_namelen(dsl_dataset_t *ds);
349 boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds);
350 uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
351 dsl_dataset_t *origin, uint64_t flags, cred_t *,
352 struct dsl_crypto_params *, dmu_tx_t *);
353 uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
354 struct dsl_crypto_params *dcp, uint64_t flags, dmu_tx_t *tx);
355 void dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx);
356 int dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx);
357 int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors);
358 void dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx);
359 int dsl_dataset_promote_check(void *arg, dmu_tx_t *tx);
360 int dsl_dataset_promote(const char *name, char *conflsnap);
361 int dsl_dataset_rename_snapshot(const char *fsname,
362 const char *oldsnapname, const char *newsnapname, boolean_t recursive);
363 int dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
364 minor_t cleanup_minor, const char *htag);
366 blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
368 spa_t *dsl_dataset_get_spa(dsl_dataset_t *ds);
370 boolean_t dsl_dataset_modified_since_snap(dsl_dataset_t *ds,
371 dsl_dataset_t *snap);
373 void dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx);
374 void dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx);
376 void dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp,
378 int dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp,
379 dmu_tx_t *tx, boolean_t async);
380 void dsl_dataset_block_remapped(dsl_dataset_t *ds, uint64_t vdev,
381 uint64_t offset, uint64_t size, uint64_t birth, dmu_tx_t *tx);
382 int dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name,
385 void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
387 int get_clones_stat_impl(dsl_dataset_t *ds, nvlist_t *val);
388 char *get_receive_resume_stats_impl(dsl_dataset_t *ds);
389 char *get_child_receive_stats(dsl_dataset_t *ds);
390 uint64_t dsl_get_refratio(dsl_dataset_t *ds);
391 uint64_t dsl_get_logicalreferenced(dsl_dataset_t *ds);
392 uint64_t dsl_get_compressratio(dsl_dataset_t *ds);
393 uint64_t dsl_get_used(dsl_dataset_t *ds);
394 uint64_t dsl_get_creation(dsl_dataset_t *ds);
395 uint64_t dsl_get_creationtxg(dsl_dataset_t *ds);
396 uint64_t dsl_get_refquota(dsl_dataset_t *ds);
397 uint64_t dsl_get_refreservation(dsl_dataset_t *ds);
398 uint64_t dsl_get_guid(dsl_dataset_t *ds);
399 uint64_t dsl_get_unique(dsl_dataset_t *ds);
400 uint64_t dsl_get_objsetid(dsl_dataset_t *ds);
401 uint64_t dsl_get_userrefs(dsl_dataset_t *ds);
402 uint64_t dsl_get_defer_destroy(dsl_dataset_t *ds);
403 uint64_t dsl_get_referenced(dsl_dataset_t *ds);
404 uint64_t dsl_get_numclones(dsl_dataset_t *ds);
405 uint64_t dsl_get_inconsistent(dsl_dataset_t *ds);
406 uint64_t dsl_get_redacted(dsl_dataset_t *ds);
407 uint64_t dsl_get_available(dsl_dataset_t *ds);
408 int dsl_get_written(dsl_dataset_t *ds, uint64_t *written);
409 int dsl_get_prev_snap(dsl_dataset_t *ds, char *snap);
410 void dsl_get_redact_snaps(dsl_dataset_t *ds, nvlist_t *propval);
411 int dsl_get_mountpoint(dsl_dataset_t *ds, const char *dsname, char *value,
414 void get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv);
415 void dsl_dataset_stats(dsl_dataset_t *os, nvlist_t *nv);
417 void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat);
418 void dsl_dataset_space(dsl_dataset_t *ds,
419 uint64_t *refdbytesp, uint64_t *availbytesp,
420 uint64_t *usedobjsp, uint64_t *availobjsp);
421 uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds);
422 int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *newds,
423 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
424 int dsl_dataset_space_written_bookmark(struct zfs_bookmark_phys *bmp,
425 dsl_dataset_t *newds, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
426 int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last,
427 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
429 int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
431 int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
432 uint64_t asize, uint64_t inflight, uint64_t *used,
434 int dsl_dataset_set_refquota(const char *dsname, zprop_source_t source,
436 int dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source,
437 uint64_t reservation);
438 int dsl_dataset_set_compression(const char *dsname, zprop_source_t source,
439 uint64_t compression);
441 boolean_t dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,
442 uint64_t earlier_txg);
443 void dsl_dataset_long_hold(dsl_dataset_t *ds, const void *tag);
444 void dsl_dataset_long_rele(dsl_dataset_t *ds, const void *tag);
445 boolean_t dsl_dataset_long_held(dsl_dataset_t *ds);
447 int dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
448 dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx);
449 void dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
450 dsl_dataset_t *origin_head, dmu_tx_t *tx);
451 int dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
452 dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr, proc_t *proc);
453 void dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
456 void dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj,
458 void dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds);
459 int dsl_dataset_get_snapname(dsl_dataset_t *ds);
460 int dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name,
462 int dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx,
464 void dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds,
465 zprop_source_t source, uint64_t value, dmu_tx_t *tx);
466 void dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx);
467 boolean_t dsl_dataset_is_zapified(dsl_dataset_t *ds);
468 boolean_t dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds);
470 int dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx);
471 void dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx);
472 int dsl_dataset_rollback(const char *fsname, const char *tosnap, void *owner,
475 uint64_t dsl_dataset_get_remap_deadlist_object(dsl_dataset_t *ds);
476 void dsl_dataset_create_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx);
477 boolean_t dsl_dataset_remap_deadlist_exists(dsl_dataset_t *ds);
478 void dsl_dataset_destroy_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx);
480 void dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, void *arg,
482 void dsl_dataset_deactivate_feature(dsl_dataset_t *ds, spa_feature_t f,
484 boolean_t dsl_dataset_feature_is_active(dsl_dataset_t *ds, spa_feature_t f);
485 boolean_t dsl_dataset_get_uint64_array_feature(dsl_dataset_t *ds,
486 spa_feature_t f, uint64_t *outlength, uint64_t **outp);
488 void dsl_dataset_activate_redaction(dsl_dataset_t *ds, uint64_t *redact_snaps,
489 uint64_t num_redact_snaps, dmu_tx_t *tx);
492 #define dprintf_ds(ds, fmt, ...) do { \
493 if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
494 char *__ds_name = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); \
495 dsl_dataset_name(ds, __ds_name); \
496 dprintf("ds=%s " fmt, __ds_name, __VA_ARGS__); \
497 kmem_free(__ds_name, ZFS_MAX_DATASET_NAME_LEN); \
501 #define dprintf_ds(dd, fmt, ...)
508 #endif /* _SYS_DSL_DATASET_H */