]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c
MFV r313071:
[FreeBSD/FreeBSD.git] / sys / cddl / contrib / opensolaris / uts / common / fs / zfs / dsl_destroy.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24  * Copyright (c) 2013 Steven Hartland. All rights reserved.
25  * Copyright (c) 2013 by Joyent, Inc. All rights reserved.
26  * Copyright (c) 2014 Integros [integros.com]
27  */
28
29 #include <sys/zfs_context.h>
30 #include <sys/dsl_userhold.h>
31 #include <sys/dsl_dataset.h>
32 #include <sys/dsl_synctask.h>
33 #include <sys/dmu_tx.h>
34 #include <sys/dsl_pool.h>
35 #include <sys/dsl_dir.h>
36 #include <sys/dmu_traverse.h>
37 #include <sys/dsl_scan.h>
38 #include <sys/dmu_objset.h>
39 #include <sys/zap.h>
40 #include <sys/zfeature.h>
41 #include <sys/zfs_ioctl.h>
42 #include <sys/dsl_deleg.h>
43 #include <sys/dmu_impl.h>
44
45 typedef struct dmu_snapshots_destroy_arg {
46         nvlist_t *dsda_snaps;
47         nvlist_t *dsda_successful_snaps;
48         boolean_t dsda_defer;
49         nvlist_t *dsda_errlist;
50 } dmu_snapshots_destroy_arg_t;
51
52 int
53 dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
54 {
55         if (!ds->ds_is_snapshot)
56                 return (SET_ERROR(EINVAL));
57
58         if (dsl_dataset_long_held(ds))
59                 return (SET_ERROR(EBUSY));
60
61         /*
62          * Only allow deferred destroy on pools that support it.
63          * NOTE: deferred destroy is only supported on snapshots.
64          */
65         if (defer) {
66                 if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
67                     SPA_VERSION_USERREFS)
68                         return (SET_ERROR(ENOTSUP));
69                 return (0);
70         }
71
72         /*
73          * If this snapshot has an elevated user reference count,
74          * we can't destroy it yet.
75          */
76         if (ds->ds_userrefs > 0)
77                 return (SET_ERROR(EBUSY));
78
79         /*
80          * Can't delete a branch point.
81          */
82         if (dsl_dataset_phys(ds)->ds_num_children > 1)
83                 return (SET_ERROR(EEXIST));
84
85         return (0);
86 }
87
88 static int
89 dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx)
90 {
91         dmu_snapshots_destroy_arg_t *dsda = arg;
92         dsl_pool_t *dp = dmu_tx_pool(tx);
93         nvpair_t *pair;
94         int error = 0;
95
96         if (!dmu_tx_is_syncing(tx))
97                 return (0);
98
99         for (pair = nvlist_next_nvpair(dsda->dsda_snaps, NULL);
100             pair != NULL; pair = nvlist_next_nvpair(dsda->dsda_snaps, pair)) {
101                 dsl_dataset_t *ds;
102
103                 error = dsl_dataset_hold(dp, nvpair_name(pair),
104                     FTAG, &ds);
105
106                 /*
107                  * If the snapshot does not exist, silently ignore it
108                  * (it's "already destroyed").
109                  */
110                 if (error == ENOENT)
111                         continue;
112
113                 if (error == 0) {
114                         error = dsl_destroy_snapshot_check_impl(ds,
115                             dsda->dsda_defer);
116                         dsl_dataset_rele(ds, FTAG);
117                 }
118
119                 if (error == 0) {
120                         fnvlist_add_boolean(dsda->dsda_successful_snaps,
121                             nvpair_name(pair));
122                 } else {
123                         fnvlist_add_int32(dsda->dsda_errlist,
124                             nvpair_name(pair), error);
125                 }
126         }
127
128         pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL);
129         if (pair != NULL)
130                 return (fnvpair_value_int32(pair));
131
132         return (0);
133 }
134
135 struct process_old_arg {
136         dsl_dataset_t *ds;
137         dsl_dataset_t *ds_prev;
138         boolean_t after_branch_point;
139         zio_t *pio;
140         uint64_t used, comp, uncomp;
141 };
142
143 static int
144 process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
145 {
146         struct process_old_arg *poa = arg;
147         dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
148
149         ASSERT(!BP_IS_HOLE(bp));
150
151         if (bp->blk_birth <= dsl_dataset_phys(poa->ds)->ds_prev_snap_txg) {
152                 dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
153                 if (poa->ds_prev && !poa->after_branch_point &&
154                     bp->blk_birth >
155                     dsl_dataset_phys(poa->ds_prev)->ds_prev_snap_txg) {
156                         dsl_dataset_phys(poa->ds_prev)->ds_unique_bytes +=
157                             bp_get_dsize_sync(dp->dp_spa, bp);
158                 }
159         } else {
160                 poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
161                 poa->comp += BP_GET_PSIZE(bp);
162                 poa->uncomp += BP_GET_UCSIZE(bp);
163                 dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
164         }
165         return (0);
166 }
167
168 static void
169 process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
170     dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
171 {
172         struct process_old_arg poa = { 0 };
173         dsl_pool_t *dp = ds->ds_dir->dd_pool;
174         objset_t *mos = dp->dp_meta_objset;
175         uint64_t deadlist_obj;
176
177         ASSERT(ds->ds_deadlist.dl_oldfmt);
178         ASSERT(ds_next->ds_deadlist.dl_oldfmt);
179
180         poa.ds = ds;
181         poa.ds_prev = ds_prev;
182         poa.after_branch_point = after_branch_point;
183         poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
184         VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
185             process_old_cb, &poa, tx));
186         VERIFY0(zio_wait(poa.pio));
187         ASSERT3U(poa.used, ==, dsl_dataset_phys(ds)->ds_unique_bytes);
188
189         /* change snapused */
190         dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
191             -poa.used, -poa.comp, -poa.uncomp, tx);
192
193         /* swap next's deadlist to our deadlist */
194         dsl_deadlist_close(&ds->ds_deadlist);
195         dsl_deadlist_close(&ds_next->ds_deadlist);
196         deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj;
197         dsl_dataset_phys(ds)->ds_deadlist_obj =
198             dsl_dataset_phys(ds_next)->ds_deadlist_obj;
199         dsl_dataset_phys(ds_next)->ds_deadlist_obj = deadlist_obj;
200         dsl_deadlist_open(&ds->ds_deadlist, mos,
201             dsl_dataset_phys(ds)->ds_deadlist_obj);
202         dsl_deadlist_open(&ds_next->ds_deadlist, mos,
203             dsl_dataset_phys(ds_next)->ds_deadlist_obj);
204 }
205
206 static void
207 dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
208 {
209         objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
210         zap_cursor_t zc;
211         zap_attribute_t za;
212
213         /*
214          * If it is the old version, dd_clones doesn't exist so we can't
215          * find the clones, but dsl_deadlist_remove_key() is a no-op so it
216          * doesn't matter.
217          */
218         if (dsl_dir_phys(ds->ds_dir)->dd_clones == 0)
219                 return;
220
221         for (zap_cursor_init(&zc, mos, dsl_dir_phys(ds->ds_dir)->dd_clones);
222             zap_cursor_retrieve(&zc, &za) == 0;
223             zap_cursor_advance(&zc)) {
224                 dsl_dataset_t *clone;
225
226                 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
227                     za.za_first_integer, FTAG, &clone));
228                 if (clone->ds_dir->dd_origin_txg > mintxg) {
229                         dsl_deadlist_remove_key(&clone->ds_deadlist,
230                             mintxg, tx);
231                         dsl_dataset_remove_clones_key(clone, mintxg, tx);
232                 }
233                 dsl_dataset_rele(clone, FTAG);
234         }
235         zap_cursor_fini(&zc);
236 }
237
238 void
239 dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
240 {
241         int err;
242         int after_branch_point = FALSE;
243         dsl_pool_t *dp = ds->ds_dir->dd_pool;
244         objset_t *mos = dp->dp_meta_objset;
245         dsl_dataset_t *ds_prev = NULL;
246         uint64_t obj;
247
248         ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
249         rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
250         ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
251         rrw_exit(&ds->ds_bp_rwlock, FTAG);
252         ASSERT(refcount_is_zero(&ds->ds_longholds));
253
254         if (defer &&
255             (ds->ds_userrefs > 0 ||
256             dsl_dataset_phys(ds)->ds_num_children > 1)) {
257                 ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
258                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
259                 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY;
260                 spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
261                 return;
262         }
263
264         ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
265
266         /* We need to log before removing it from the namespace. */
267         spa_history_log_internal_ds(ds, "destroy", tx, "");
268
269         dsl_scan_ds_destroyed(ds, tx);
270
271         obj = ds->ds_object;
272
273         for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
274                 if (ds->ds_feature_inuse[f]) {
275                         dsl_dataset_deactivate_feature(obj, f, tx);
276                         ds->ds_feature_inuse[f] = B_FALSE;
277                 }
278         }
279         if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
280                 ASSERT3P(ds->ds_prev, ==, NULL);
281                 VERIFY0(dsl_dataset_hold_obj(dp,
282                     dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &ds_prev));
283                 after_branch_point =
284                     (dsl_dataset_phys(ds_prev)->ds_next_snap_obj != obj);
285
286                 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
287                 if (after_branch_point &&
288                     dsl_dataset_phys(ds_prev)->ds_next_clones_obj != 0) {
289                         dsl_dataset_remove_from_next_clones(ds_prev, obj, tx);
290                         if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) {
291                                 VERIFY0(zap_add_int(mos,
292                                     dsl_dataset_phys(ds_prev)->
293                                     ds_next_clones_obj,
294                                     dsl_dataset_phys(ds)->ds_next_snap_obj,
295                                     tx));
296                         }
297                 }
298                 if (!after_branch_point) {
299                         dsl_dataset_phys(ds_prev)->ds_next_snap_obj =
300                             dsl_dataset_phys(ds)->ds_next_snap_obj;
301                 }
302         }
303
304         dsl_dataset_t *ds_next;
305         uint64_t old_unique;
306         uint64_t used = 0, comp = 0, uncomp = 0;
307
308         VERIFY0(dsl_dataset_hold_obj(dp,
309             dsl_dataset_phys(ds)->ds_next_snap_obj, FTAG, &ds_next));
310         ASSERT3U(dsl_dataset_phys(ds_next)->ds_prev_snap_obj, ==, obj);
311
312         old_unique = dsl_dataset_phys(ds_next)->ds_unique_bytes;
313
314         dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
315         dsl_dataset_phys(ds_next)->ds_prev_snap_obj =
316             dsl_dataset_phys(ds)->ds_prev_snap_obj;
317         dsl_dataset_phys(ds_next)->ds_prev_snap_txg =
318             dsl_dataset_phys(ds)->ds_prev_snap_txg;
319         ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==,
320             ds_prev ? dsl_dataset_phys(ds_prev)->ds_creation_txg : 0);
321
322         if (ds_next->ds_deadlist.dl_oldfmt) {
323                 process_old_deadlist(ds, ds_prev, ds_next,
324                     after_branch_point, tx);
325         } else {
326                 /* Adjust prev's unique space. */
327                 if (ds_prev && !after_branch_point) {
328                         dsl_deadlist_space_range(&ds_next->ds_deadlist,
329                             dsl_dataset_phys(ds_prev)->ds_prev_snap_txg,
330                             dsl_dataset_phys(ds)->ds_prev_snap_txg,
331                             &used, &comp, &uncomp);
332                         dsl_dataset_phys(ds_prev)->ds_unique_bytes += used;
333                 }
334
335                 /* Adjust snapused. */
336                 dsl_deadlist_space_range(&ds_next->ds_deadlist,
337                     dsl_dataset_phys(ds)->ds_prev_snap_txg, UINT64_MAX,
338                     &used, &comp, &uncomp);
339                 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
340                     -used, -comp, -uncomp, tx);
341
342                 /* Move blocks to be freed to pool's free list. */
343                 dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
344                     &dp->dp_free_bpobj, dsl_dataset_phys(ds)->ds_prev_snap_txg,
345                     tx);
346                 dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
347                     DD_USED_HEAD, used, comp, uncomp, tx);
348
349                 /* Merge our deadlist into next's and free it. */
350                 dsl_deadlist_merge(&ds_next->ds_deadlist,
351                     dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
352         }
353         dsl_deadlist_close(&ds->ds_deadlist);
354         dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
355         dmu_buf_will_dirty(ds->ds_dbuf, tx);
356         dsl_dataset_phys(ds)->ds_deadlist_obj = 0;
357
358         /* Collapse range in clone heads */
359         dsl_dataset_remove_clones_key(ds,
360             dsl_dataset_phys(ds)->ds_creation_txg, tx);
361
362         if (ds_next->ds_is_snapshot) {
363                 dsl_dataset_t *ds_nextnext;
364
365                 /*
366                  * Update next's unique to include blocks which
367                  * were previously shared by only this snapshot
368                  * and it.  Those blocks will be born after the
369                  * prev snap and before this snap, and will have
370                  * died after the next snap and before the one
371                  * after that (ie. be on the snap after next's
372                  * deadlist).
373                  */
374                 VERIFY0(dsl_dataset_hold_obj(dp,
375                     dsl_dataset_phys(ds_next)->ds_next_snap_obj,
376                     FTAG, &ds_nextnext));
377                 dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
378                     dsl_dataset_phys(ds)->ds_prev_snap_txg,
379                     dsl_dataset_phys(ds)->ds_creation_txg,
380                     &used, &comp, &uncomp);
381                 dsl_dataset_phys(ds_next)->ds_unique_bytes += used;
382                 dsl_dataset_rele(ds_nextnext, FTAG);
383                 ASSERT3P(ds_next->ds_prev, ==, NULL);
384
385                 /* Collapse range in this head. */
386                 dsl_dataset_t *hds;
387                 VERIFY0(dsl_dataset_hold_obj(dp,
388                     dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &hds));
389                 dsl_deadlist_remove_key(&hds->ds_deadlist,
390                     dsl_dataset_phys(ds)->ds_creation_txg, tx);
391                 dsl_dataset_rele(hds, FTAG);
392
393         } else {
394                 ASSERT3P(ds_next->ds_prev, ==, ds);
395                 dsl_dataset_rele(ds_next->ds_prev, ds_next);
396                 ds_next->ds_prev = NULL;
397                 if (ds_prev) {
398                         VERIFY0(dsl_dataset_hold_obj(dp,
399                             dsl_dataset_phys(ds)->ds_prev_snap_obj,
400                             ds_next, &ds_next->ds_prev));
401                 }
402
403                 dsl_dataset_recalc_head_uniq(ds_next);
404
405                 /*
406                  * Reduce the amount of our unconsumed refreservation
407                  * being charged to our parent by the amount of
408                  * new unique data we have gained.
409                  */
410                 if (old_unique < ds_next->ds_reserved) {
411                         int64_t mrsdelta;
412                         uint64_t new_unique =
413                             dsl_dataset_phys(ds_next)->ds_unique_bytes;
414
415                         ASSERT(old_unique <= new_unique);
416                         mrsdelta = MIN(new_unique - old_unique,
417                             ds_next->ds_reserved - old_unique);
418                         dsl_dir_diduse_space(ds->ds_dir,
419                             DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
420                 }
421         }
422         dsl_dataset_rele(ds_next, FTAG);
423
424         /*
425          * This must be done after the dsl_traverse(), because it will
426          * re-open the objset.
427          */
428         if (ds->ds_objset) {
429                 dmu_objset_evict(ds->ds_objset);
430                 ds->ds_objset = NULL;
431         }
432
433         /* remove from snapshot namespace */
434         dsl_dataset_t *ds_head;
435         ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0);
436         VERIFY0(dsl_dataset_hold_obj(dp,
437             dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &ds_head));
438         VERIFY0(dsl_dataset_get_snapname(ds));
439 #ifdef ZFS_DEBUG
440         {
441                 uint64_t val;
442
443                 err = dsl_dataset_snap_lookup(ds_head,
444                     ds->ds_snapname, &val);
445                 ASSERT0(err);
446                 ASSERT3U(val, ==, obj);
447         }
448 #endif
449         VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx, B_TRUE));
450         dsl_dataset_rele(ds_head, FTAG);
451
452         if (ds_prev != NULL)
453                 dsl_dataset_rele(ds_prev, FTAG);
454
455         spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
456
457         if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) {
458                 uint64_t count;
459                 ASSERT0(zap_count(mos,
460                     dsl_dataset_phys(ds)->ds_next_clones_obj, &count) &&
461                     count == 0);
462                 VERIFY0(dmu_object_free(mos,
463                     dsl_dataset_phys(ds)->ds_next_clones_obj, tx));
464         }
465         if (dsl_dataset_phys(ds)->ds_props_obj != 0)
466                 VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_props_obj,
467                     tx));
468         if (dsl_dataset_phys(ds)->ds_userrefs_obj != 0)
469                 VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_userrefs_obj,
470                     tx));
471         dsl_dir_rele(ds->ds_dir, ds);
472         ds->ds_dir = NULL;
473         dmu_object_free_zapified(mos, obj, tx);
474 }
475
476 static void
477 dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx)
478 {
479         dmu_snapshots_destroy_arg_t *dsda = arg;
480         dsl_pool_t *dp = dmu_tx_pool(tx);
481         nvpair_t *pair;
482
483         for (pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, NULL);
484             pair != NULL;
485             pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, pair)) {
486                 dsl_dataset_t *ds;
487
488                 VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
489
490                 dsl_destroy_snapshot_sync_impl(ds, dsda->dsda_defer, tx);
491                 dsl_dataset_rele(ds, FTAG);
492         }
493 }
494
495 /*
496  * The semantics of this function are described in the comment above
497  * lzc_destroy_snaps().  To summarize:
498  *
499  * The snapshots must all be in the same pool.
500  *
501  * Snapshots that don't exist will be silently ignored (considered to be
502  * "already deleted").
503  *
504  * On success, all snaps will be destroyed and this will return 0.
505  * On failure, no snaps will be destroyed, the errlist will be filled in,
506  * and this will return an errno.
507  */
508 int
509 dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer,
510     nvlist_t *errlist)
511 {
512         dmu_snapshots_destroy_arg_t dsda;
513         int error;
514         nvpair_t *pair;
515
516         pair = nvlist_next_nvpair(snaps, NULL);
517         if (pair == NULL)
518                 return (0);
519
520         dsda.dsda_snaps = snaps;
521         dsda.dsda_successful_snaps = fnvlist_alloc();
522         dsda.dsda_defer = defer;
523         dsda.dsda_errlist = errlist;
524
525         error = dsl_sync_task(nvpair_name(pair),
526             dsl_destroy_snapshot_check, dsl_destroy_snapshot_sync,
527             &dsda, 0, ZFS_SPACE_CHECK_NONE);
528         fnvlist_free(dsda.dsda_successful_snaps);
529
530         return (error);
531 }
532
533 int
534 dsl_destroy_snapshot(const char *name, boolean_t defer)
535 {
536         int error;
537         nvlist_t *nvl = fnvlist_alloc();
538         nvlist_t *errlist = fnvlist_alloc();
539
540         fnvlist_add_boolean(nvl, name);
541         error = dsl_destroy_snapshots_nvl(nvl, defer, errlist);
542         fnvlist_free(errlist);
543         fnvlist_free(nvl);
544         return (error);
545 }
546
547 struct killarg {
548         dsl_dataset_t *ds;
549         dmu_tx_t *tx;
550 };
551
552 /* ARGSUSED */
553 static int
554 kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
555     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
556 {
557         struct killarg *ka = arg;
558         dmu_tx_t *tx = ka->tx;
559
560         if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
561                 return (0);
562
563         if (zb->zb_level == ZB_ZIL_LEVEL) {
564                 ASSERT(zilog != NULL);
565                 /*
566                  * It's a block in the intent log.  It has no
567                  * accounting, so just free it.
568                  */
569                 dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
570         } else {
571                 ASSERT(zilog == NULL);
572                 ASSERT3U(bp->blk_birth, >,
573                     dsl_dataset_phys(ka->ds)->ds_prev_snap_txg);
574                 (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
575         }
576
577         return (0);
578 }
579
580 static void
581 old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
582 {
583         struct killarg ka;
584
585         /*
586          * Free everything that we point to (that's born after
587          * the previous snapshot, if we are a clone)
588          *
589          * NB: this should be very quick, because we already
590          * freed all the objects in open context.
591          */
592         ka.ds = ds;
593         ka.tx = tx;
594         VERIFY0(traverse_dataset(ds,
595             dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST,
596             kill_blkptr, &ka));
597         ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
598             dsl_dataset_phys(ds)->ds_unique_bytes == 0);
599 }
600
601 typedef struct dsl_destroy_head_arg {
602         const char *ddha_name;
603 } dsl_destroy_head_arg_t;
604
605 int
606 dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
607 {
608         int error;
609         uint64_t count;
610         objset_t *mos;
611
612         ASSERT(!ds->ds_is_snapshot);
613         if (ds->ds_is_snapshot)
614                 return (SET_ERROR(EINVAL));
615
616         if (refcount_count(&ds->ds_longholds) != expected_holds)
617                 return (SET_ERROR(EBUSY));
618
619         mos = ds->ds_dir->dd_pool->dp_meta_objset;
620
621         /*
622          * Can't delete a head dataset if there are snapshots of it.
623          * (Except if the only snapshots are from the branch we cloned
624          * from.)
625          */
626         if (ds->ds_prev != NULL &&
627             dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == ds->ds_object)
628                 return (SET_ERROR(EBUSY));
629
630         /*
631          * Can't delete if there are children of this fs.
632          */
633         error = zap_count(mos,
634             dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, &count);
635         if (error != 0)
636                 return (error);
637         if (count != 0)
638                 return (SET_ERROR(EEXIST));
639
640         if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) &&
641             dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&
642             ds->ds_prev->ds_userrefs == 0) {
643                 /* We need to remove the origin snapshot as well. */
644                 if (!refcount_is_zero(&ds->ds_prev->ds_longholds))
645                         return (SET_ERROR(EBUSY));
646         }
647         return (0);
648 }
649
650 static int
651 dsl_destroy_head_check(void *arg, dmu_tx_t *tx)
652 {
653         dsl_destroy_head_arg_t *ddha = arg;
654         dsl_pool_t *dp = dmu_tx_pool(tx);
655         dsl_dataset_t *ds;
656         int error;
657
658         error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds);
659         if (error != 0)
660                 return (error);
661
662         error = dsl_destroy_head_check_impl(ds, 0);
663         dsl_dataset_rele(ds, FTAG);
664         return (error);
665 }
666
667 static void
668 dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx)
669 {
670         dsl_dir_t *dd;
671         dsl_pool_t *dp = dmu_tx_pool(tx);
672         objset_t *mos = dp->dp_meta_objset;
673         dd_used_t t;
674
675         ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock));
676
677         VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd));
678
679         ASSERT0(dsl_dir_phys(dd)->dd_head_dataset_obj);
680
681         /*
682          * Decrement the filesystem count for all parent filesystems.
683          *
684          * When we receive an incremental stream into a filesystem that already
685          * exists, a temporary clone is created.  We never count this temporary
686          * clone, whose name begins with a '%'.
687          */
688         if (dd->dd_myname[0] != '%' && dd->dd_parent != NULL)
689                 dsl_fs_ss_count_adjust(dd->dd_parent, -1,
690                     DD_FIELD_FILESYSTEM_COUNT, tx);
691
692         /*
693          * Remove our reservation. The impl() routine avoids setting the
694          * actual property, which would require the (already destroyed) ds.
695          */
696         dsl_dir_set_reservation_sync_impl(dd, 0, tx);
697
698         ASSERT0(dsl_dir_phys(dd)->dd_used_bytes);
699         ASSERT0(dsl_dir_phys(dd)->dd_reserved);
700         for (t = 0; t < DD_USED_NUM; t++)
701                 ASSERT0(dsl_dir_phys(dd)->dd_used_breakdown[t]);
702
703         VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx));
704         VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx));
705         VERIFY0(dsl_deleg_destroy(mos, dsl_dir_phys(dd)->dd_deleg_zapobj, tx));
706         VERIFY0(zap_remove(mos,
707             dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj,
708             dd->dd_myname, tx));
709
710         dsl_dir_rele(dd, FTAG);
711         dmu_object_free_zapified(mos, ddobj, tx);
712 }
713
714 void
715 dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
716 {
717         dsl_pool_t *dp = dmu_tx_pool(tx);
718         objset_t *mos = dp->dp_meta_objset;
719         uint64_t obj, ddobj, prevobj = 0;
720         boolean_t rmorigin;
721
722         ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
723         ASSERT(ds->ds_prev == NULL ||
724             dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object);
725         rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
726         ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
727         rrw_exit(&ds->ds_bp_rwlock, FTAG);
728         ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
729
730         /* We need to log before removing it from the namespace. */
731         spa_history_log_internal_ds(ds, "destroy", tx, "");
732
733         rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
734             DS_IS_DEFER_DESTROY(ds->ds_prev) &&
735             dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&
736             ds->ds_prev->ds_userrefs == 0);
737
738         /* Remove our reservation. */
739         if (ds->ds_reserved != 0) {
740                 dsl_dataset_set_refreservation_sync_impl(ds,
741                     (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
742                     0, tx);
743                 ASSERT0(ds->ds_reserved);
744         }
745
746         obj = ds->ds_object;
747
748         for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
749                 if (ds->ds_feature_inuse[f]) {
750                         dsl_dataset_deactivate_feature(obj, f, tx);
751                         ds->ds_feature_inuse[f] = B_FALSE;
752                 }
753         }
754
755         dsl_scan_ds_destroyed(ds, tx);
756
757         if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
758                 /* This is a clone */
759                 ASSERT(ds->ds_prev != NULL);
760                 ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj, !=,
761                     obj);
762                 ASSERT0(dsl_dataset_phys(ds)->ds_next_snap_obj);
763
764                 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
765                 if (dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj != 0) {
766                         dsl_dataset_remove_from_next_clones(ds->ds_prev,
767                             obj, tx);
768                 }
769
770                 ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_num_children, >, 1);
771                 dsl_dataset_phys(ds->ds_prev)->ds_num_children--;
772         }
773
774         /*
775          * Destroy the deadlist.  Unless it's a clone, the
776          * deadlist should be empty.  (If it's a clone, it's
777          * safe to ignore the deadlist contents.)
778          */
779         dsl_deadlist_close(&ds->ds_deadlist);
780         dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
781         dmu_buf_will_dirty(ds->ds_dbuf, tx);
782         dsl_dataset_phys(ds)->ds_deadlist_obj = 0;
783
784         objset_t *os;
785         VERIFY0(dmu_objset_from_ds(ds, &os));
786
787         if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) {
788                 old_synchronous_dataset_destroy(ds, tx);
789         } else {
790                 /*
791                  * Move the bptree into the pool's list of trees to
792                  * clean up and update space accounting information.
793                  */
794                 uint64_t used, comp, uncomp;
795
796                 zil_destroy_sync(dmu_objset_zil(os), tx);
797
798                 if (!spa_feature_is_active(dp->dp_spa,
799                     SPA_FEATURE_ASYNC_DESTROY)) {
800                         dsl_scan_t *scn = dp->dp_scan;
801                         spa_feature_incr(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY,
802                             tx);
803                         dp->dp_bptree_obj = bptree_alloc(mos, tx);
804                         VERIFY0(zap_add(mos,
805                             DMU_POOL_DIRECTORY_OBJECT,
806                             DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
807                             &dp->dp_bptree_obj, tx));
808                         ASSERT(!scn->scn_async_destroying);
809                         scn->scn_async_destroying = B_TRUE;
810                 }
811
812                 used = dsl_dir_phys(ds->ds_dir)->dd_used_bytes;
813                 comp = dsl_dir_phys(ds->ds_dir)->dd_compressed_bytes;
814                 uncomp = dsl_dir_phys(ds->ds_dir)->dd_uncompressed_bytes;
815
816                 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
817                     dsl_dataset_phys(ds)->ds_unique_bytes == used);
818
819                 rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
820                 bptree_add(mos, dp->dp_bptree_obj,
821                     &dsl_dataset_phys(ds)->ds_bp,
822                     dsl_dataset_phys(ds)->ds_prev_snap_txg,
823                     used, comp, uncomp, tx);
824                 rrw_exit(&ds->ds_bp_rwlock, FTAG);
825                 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
826                     -used, -comp, -uncomp, tx);
827                 dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
828                     used, comp, uncomp, tx);
829         }
830
831         if (ds->ds_prev != NULL) {
832                 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
833                         VERIFY0(zap_remove_int(mos,
834                             dsl_dir_phys(ds->ds_prev->ds_dir)->dd_clones,
835                             ds->ds_object, tx));
836                 }
837                 prevobj = ds->ds_prev->ds_object;
838                 dsl_dataset_rele(ds->ds_prev, ds);
839                 ds->ds_prev = NULL;
840         }
841
842         /*
843          * This must be done after the dsl_traverse(), because it will
844          * re-open the objset.
845          */
846         if (ds->ds_objset) {
847                 dmu_objset_evict(ds->ds_objset);
848                 ds->ds_objset = NULL;
849         }
850
851         /* Erase the link in the dir */
852         dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
853         dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj = 0;
854         ddobj = ds->ds_dir->dd_object;
855         ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0);
856         VERIFY0(zap_destroy(mos,
857             dsl_dataset_phys(ds)->ds_snapnames_zapobj, tx));
858
859         if (ds->ds_bookmarks != 0) {
860                 VERIFY0(zap_destroy(mos, ds->ds_bookmarks, tx));
861                 spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
862         }
863
864         spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
865
866         ASSERT0(dsl_dataset_phys(ds)->ds_next_clones_obj);
867         ASSERT0(dsl_dataset_phys(ds)->ds_props_obj);
868         ASSERT0(dsl_dataset_phys(ds)->ds_userrefs_obj);
869         dsl_dir_rele(ds->ds_dir, ds);
870         ds->ds_dir = NULL;
871         dmu_object_free_zapified(mos, obj, tx);
872
873         dsl_dir_destroy_sync(ddobj, tx);
874
875         if (rmorigin) {
876                 dsl_dataset_t *prev;
877                 VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev));
878                 dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx);
879                 dsl_dataset_rele(prev, FTAG);
880         }
881 }
882
883 static void
884 dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)
885 {
886         dsl_destroy_head_arg_t *ddha = arg;
887         dsl_pool_t *dp = dmu_tx_pool(tx);
888         dsl_dataset_t *ds;
889
890         VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
891         dsl_destroy_head_sync_impl(ds, tx);
892         dsl_dataset_rele(ds, FTAG);
893 }
894
895 static void
896 dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx)
897 {
898         dsl_destroy_head_arg_t *ddha = arg;
899         dsl_pool_t *dp = dmu_tx_pool(tx);
900         dsl_dataset_t *ds;
901
902         VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
903
904         /* Mark it as inconsistent on-disk, in case we crash */
905         dmu_buf_will_dirty(ds->ds_dbuf, tx);
906         dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT;
907
908         spa_history_log_internal_ds(ds, "destroy begin", tx, "");
909         dsl_dataset_rele(ds, FTAG);
910 }
911
912 int
913 dsl_destroy_head(const char *name)
914 {
915         dsl_destroy_head_arg_t ddha;
916         int error;
917         spa_t *spa;
918         boolean_t isenabled;
919
920 #ifdef _KERNEL
921         zfs_destroy_unmount_origin(name);
922 #endif
923
924         error = spa_open(name, &spa, FTAG);
925         if (error != 0)
926                 return (error);
927         isenabled = spa_feature_is_enabled(spa, SPA_FEATURE_ASYNC_DESTROY);
928         spa_close(spa, FTAG);
929
930         ddha.ddha_name = name;
931
932         if (!isenabled) {
933                 objset_t *os;
934
935                 error = dsl_sync_task(name, dsl_destroy_head_check,
936                     dsl_destroy_head_begin_sync, &ddha,
937                     0, ZFS_SPACE_CHECK_NONE);
938                 if (error != 0)
939                         return (error);
940
941                 /*
942                  * Head deletion is processed in one txg on old pools;
943                  * remove the objects from open context so that the txg sync
944                  * is not too long.
945                  */
946                 error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os);
947                 if (error == 0) {
948                         uint64_t prev_snap_txg =
949                             dsl_dataset_phys(dmu_objset_ds(os))->
950                             ds_prev_snap_txg;
951                         for (uint64_t obj = 0; error == 0;
952                             error = dmu_object_next(os, &obj, FALSE,
953                             prev_snap_txg))
954                                 (void) dmu_free_long_object(os, obj);
955                         /* sync out all frees */
956                         txg_wait_synced(dmu_objset_pool(os), 0);
957                         dmu_objset_disown(os, FTAG);
958                 }
959         }
960
961         return (dsl_sync_task(name, dsl_destroy_head_check,
962             dsl_destroy_head_sync, &ddha, 0, ZFS_SPACE_CHECK_NONE));
963 }
964
965 /*
966  * Note, this function is used as the callback for dmu_objset_find().  We
967  * always return 0 so that we will continue to find and process
968  * inconsistent datasets, even if we encounter an error trying to
969  * process one of them.
970  */
971 /* ARGSUSED */
972 int
973 dsl_destroy_inconsistent(const char *dsname, void *arg)
974 {
975         objset_t *os;
976
977         if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
978                 boolean_t need_destroy = DS_IS_INCONSISTENT(dmu_objset_ds(os));
979
980                 /*
981                  * If the dataset is inconsistent because a resumable receive
982                  * has failed, then do not destroy it.
983                  */
984                 if (dsl_dataset_has_resume_receive_state(dmu_objset_ds(os)))
985                         need_destroy = B_FALSE;
986
987                 dmu_objset_rele(os, FTAG);
988                 if (need_destroy)
989                         (void) dsl_destroy_head(dsname);
990         }
991         return (0);
992 }