]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - module/zfs/dsl_destroy.c
Retire KM_NODEBUG
[FreeBSD/FreeBSD.git] / module / zfs / dsl_destroy.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
24  * Copyright (c) 2013 Steven Hartland. All rights reserved.
25  */
26
27 #include <sys/zfs_context.h>
28 #include <sys/dsl_userhold.h>
29 #include <sys/dsl_dataset.h>
30 #include <sys/dsl_synctask.h>
31 #include <sys/dmu_tx.h>
32 #include <sys/dsl_pool.h>
33 #include <sys/dsl_dir.h>
34 #include <sys/dmu_traverse.h>
35 #include <sys/dsl_scan.h>
36 #include <sys/dmu_objset.h>
37 #include <sys/zap.h>
38 #include <sys/zfeature.h>
39 #include <sys/zfs_ioctl.h>
40 #include <sys/dsl_deleg.h>
41 #include <sys/dmu_impl.h>
42
43 typedef struct dmu_snapshots_destroy_arg {
44         nvlist_t *dsda_snaps;
45         nvlist_t *dsda_successful_snaps;
46         boolean_t dsda_defer;
47         nvlist_t *dsda_errlist;
48 } dmu_snapshots_destroy_arg_t;
49
50 int
51 dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
52 {
53         if (!dsl_dataset_is_snapshot(ds))
54                 return (SET_ERROR(EINVAL));
55
56         if (dsl_dataset_long_held(ds))
57                 return (SET_ERROR(EBUSY));
58
59         /*
60          * Only allow deferred destroy on pools that support it.
61          * NOTE: deferred destroy is only supported on snapshots.
62          */
63         if (defer) {
64                 if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
65                     SPA_VERSION_USERREFS)
66                         return (SET_ERROR(ENOTSUP));
67                 return (0);
68         }
69
70         /*
71          * If this snapshot has an elevated user reference count,
72          * we can't destroy it yet.
73          */
74         if (ds->ds_userrefs > 0)
75                 return (SET_ERROR(EBUSY));
76
77         /*
78          * Can't delete a branch point.
79          */
80         if (ds->ds_phys->ds_num_children > 1)
81                 return (SET_ERROR(EEXIST));
82
83         return (0);
84 }
85
86 static int
87 dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx)
88 {
89         dmu_snapshots_destroy_arg_t *dsda = arg;
90         dsl_pool_t *dp = dmu_tx_pool(tx);
91         nvpair_t *pair;
92         int error = 0;
93
94         if (!dmu_tx_is_syncing(tx))
95                 return (0);
96
97         for (pair = nvlist_next_nvpair(dsda->dsda_snaps, NULL);
98             pair != NULL; pair = nvlist_next_nvpair(dsda->dsda_snaps, pair)) {
99                 dsl_dataset_t *ds;
100
101                 error = dsl_dataset_hold(dp, nvpair_name(pair),
102                     FTAG, &ds);
103
104                 /*
105                  * If the snapshot does not exist, silently ignore it
106                  * (it's "already destroyed").
107                  */
108                 if (error == ENOENT)
109                         continue;
110
111                 if (error == 0) {
112                         error = dsl_destroy_snapshot_check_impl(ds,
113                             dsda->dsda_defer);
114                         dsl_dataset_rele(ds, FTAG);
115                 }
116
117                 if (error == 0) {
118                         fnvlist_add_boolean(dsda->dsda_successful_snaps,
119                             nvpair_name(pair));
120                 } else {
121                         fnvlist_add_int32(dsda->dsda_errlist,
122                             nvpair_name(pair), error);
123                 }
124         }
125
126         pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL);
127         if (pair != NULL)
128                 return (fnvpair_value_int32(pair));
129
130         return (0);
131 }
132
133 struct process_old_arg {
134         dsl_dataset_t *ds;
135         dsl_dataset_t *ds_prev;
136         boolean_t after_branch_point;
137         zio_t *pio;
138         uint64_t used, comp, uncomp;
139 };
140
141 static int
142 process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
143 {
144         struct process_old_arg *poa = arg;
145         dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
146
147         ASSERT(!BP_IS_HOLE(bp));
148
149         if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) {
150                 dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
151                 if (poa->ds_prev && !poa->after_branch_point &&
152                     bp->blk_birth >
153                     poa->ds_prev->ds_phys->ds_prev_snap_txg) {
154                         poa->ds_prev->ds_phys->ds_unique_bytes +=
155                             bp_get_dsize_sync(dp->dp_spa, bp);
156                 }
157         } else {
158                 poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
159                 poa->comp += BP_GET_PSIZE(bp);
160                 poa->uncomp += BP_GET_UCSIZE(bp);
161                 dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
162         }
163         return (0);
164 }
165
166 static void
167 process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
168     dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
169 {
170         struct process_old_arg poa = { 0 };
171         dsl_pool_t *dp = ds->ds_dir->dd_pool;
172         objset_t *mos = dp->dp_meta_objset;
173         uint64_t deadlist_obj;
174
175         ASSERT(ds->ds_deadlist.dl_oldfmt);
176         ASSERT(ds_next->ds_deadlist.dl_oldfmt);
177
178         poa.ds = ds;
179         poa.ds_prev = ds_prev;
180         poa.after_branch_point = after_branch_point;
181         poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
182         VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
183             process_old_cb, &poa, tx));
184         VERIFY0(zio_wait(poa.pio));
185         ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes);
186
187         /* change snapused */
188         dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
189             -poa.used, -poa.comp, -poa.uncomp, tx);
190
191         /* swap next's deadlist to our deadlist */
192         dsl_deadlist_close(&ds->ds_deadlist);
193         dsl_deadlist_close(&ds_next->ds_deadlist);
194         deadlist_obj = ds->ds_phys->ds_deadlist_obj;
195         ds->ds_phys->ds_deadlist_obj = ds_next->ds_phys->ds_deadlist_obj;
196         ds_next->ds_phys->ds_deadlist_obj = deadlist_obj;
197         dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
198         dsl_deadlist_open(&ds_next->ds_deadlist, mos,
199             ds_next->ds_phys->ds_deadlist_obj);
200 }
201
202 static void
203 dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
204 {
205         objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
206         zap_cursor_t *zc;
207         zap_attribute_t *za;
208
209         /*
210          * If it is the old version, dd_clones doesn't exist so we can't
211          * find the clones, but dsl_deadlist_remove_key() is a no-op so it
212          * doesn't matter.
213          */
214         if (ds->ds_dir->dd_phys->dd_clones == 0)
215                 return;
216
217         zc = kmem_alloc(sizeof (zap_cursor_t), KM_PUSHPAGE);
218         za = kmem_alloc(sizeof (zap_attribute_t), KM_PUSHPAGE);
219
220         for (zap_cursor_init(zc, mos, ds->ds_dir->dd_phys->dd_clones);
221             zap_cursor_retrieve(zc, za) == 0;
222             zap_cursor_advance(zc)) {
223                 dsl_dataset_t *clone;
224
225                 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
226                     za->za_first_integer, FTAG, &clone));
227                 if (clone->ds_dir->dd_origin_txg > mintxg) {
228                         dsl_deadlist_remove_key(&clone->ds_deadlist,
229                             mintxg, tx);
230                         dsl_dataset_remove_clones_key(clone, mintxg, tx);
231                 }
232                 dsl_dataset_rele(clone, FTAG);
233         }
234         zap_cursor_fini(zc);
235
236         kmem_free(za, sizeof (zap_attribute_t));
237         kmem_free(zc, sizeof (zap_cursor_t));
238 }
239
240 void
241 dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
242 {
243 #ifdef ZFS_DEBUG
244         int err;
245 #endif
246         int after_branch_point = FALSE;
247         dsl_pool_t *dp = ds->ds_dir->dd_pool;
248         objset_t *mos = dp->dp_meta_objset;
249         dsl_dataset_t *ds_prev = NULL;
250         uint64_t obj, old_unique, used = 0, comp = 0, uncomp = 0;
251         dsl_dataset_t *ds_next, *ds_head, *hds;
252
253
254         ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
255         ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
256         ASSERT(refcount_is_zero(&ds->ds_longholds));
257
258         if (defer &&
259             (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)) {
260                 ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
261                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
262                 ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
263                 spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
264                 return;
265         }
266
267         ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
268
269         /* We need to log before removing it from the namespace. */
270         spa_history_log_internal_ds(ds, "destroy", tx, "");
271
272         dsl_scan_ds_destroyed(ds, tx);
273
274         obj = ds->ds_object;
275
276         if (ds->ds_phys->ds_prev_snap_obj != 0) {
277                 ASSERT3P(ds->ds_prev, ==, NULL);
278                 VERIFY0(dsl_dataset_hold_obj(dp,
279                     ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
280                 after_branch_point =
281                     (ds_prev->ds_phys->ds_next_snap_obj != obj);
282
283                 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
284                 if (after_branch_point &&
285                     ds_prev->ds_phys->ds_next_clones_obj != 0) {
286                         dsl_dataset_remove_from_next_clones(ds_prev, obj, tx);
287                         if (ds->ds_phys->ds_next_snap_obj != 0) {
288                                 VERIFY0(zap_add_int(mos,
289                                     ds_prev->ds_phys->ds_next_clones_obj,
290                                     ds->ds_phys->ds_next_snap_obj, tx));
291                         }
292                 }
293                 if (!after_branch_point) {
294                         ds_prev->ds_phys->ds_next_snap_obj =
295                             ds->ds_phys->ds_next_snap_obj;
296                 }
297         }
298
299         VERIFY0(dsl_dataset_hold_obj(dp,
300             ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
301         ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
302
303         old_unique = ds_next->ds_phys->ds_unique_bytes;
304
305         dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
306         ds_next->ds_phys->ds_prev_snap_obj =
307             ds->ds_phys->ds_prev_snap_obj;
308         ds_next->ds_phys->ds_prev_snap_txg =
309             ds->ds_phys->ds_prev_snap_txg;
310         ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
311             ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
312
313         if (ds_next->ds_deadlist.dl_oldfmt) {
314                 process_old_deadlist(ds, ds_prev, ds_next,
315                     after_branch_point, tx);
316         } else {
317                 /* Adjust prev's unique space. */
318                 if (ds_prev && !after_branch_point) {
319                         dsl_deadlist_space_range(&ds_next->ds_deadlist,
320                             ds_prev->ds_phys->ds_prev_snap_txg,
321                             ds->ds_phys->ds_prev_snap_txg,
322                             &used, &comp, &uncomp);
323                         ds_prev->ds_phys->ds_unique_bytes += used;
324                 }
325
326                 /* Adjust snapused. */
327                 dsl_deadlist_space_range(&ds_next->ds_deadlist,
328                     ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
329                     &used, &comp, &uncomp);
330                 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
331                     -used, -comp, -uncomp, tx);
332
333                 /* Move blocks to be freed to pool's free list. */
334                 dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
335                     &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg,
336                     tx);
337                 dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
338                     DD_USED_HEAD, used, comp, uncomp, tx);
339
340                 /* Merge our deadlist into next's and free it. */
341                 dsl_deadlist_merge(&ds_next->ds_deadlist,
342                     ds->ds_phys->ds_deadlist_obj, tx);
343         }
344         dsl_deadlist_close(&ds->ds_deadlist);
345         dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
346         dmu_buf_will_dirty(ds->ds_dbuf, tx);
347         ds->ds_phys->ds_deadlist_obj = 0;
348
349         /* Collapse range in clone heads */
350         dsl_dataset_remove_clones_key(ds,
351             ds->ds_phys->ds_creation_txg, tx);
352
353         if (dsl_dataset_is_snapshot(ds_next)) {
354                 dsl_dataset_t *ds_nextnext;
355
356                 /*
357                  * Update next's unique to include blocks which
358                  * were previously shared by only this snapshot
359                  * and it.  Those blocks will be born after the
360                  * prev snap and before this snap, and will have
361                  * died after the next snap and before the one
362                  * after that (ie. be on the snap after next's
363                  * deadlist).
364                  */
365                 VERIFY0(dsl_dataset_hold_obj(dp,
366                     ds_next->ds_phys->ds_next_snap_obj, FTAG, &ds_nextnext));
367                 dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
368                     ds->ds_phys->ds_prev_snap_txg,
369                     ds->ds_phys->ds_creation_txg,
370                     &used, &comp, &uncomp);
371                 ds_next->ds_phys->ds_unique_bytes += used;
372                 dsl_dataset_rele(ds_nextnext, FTAG);
373                 ASSERT3P(ds_next->ds_prev, ==, NULL);
374
375                 /* Collapse range in this head. */
376                 VERIFY0(dsl_dataset_hold_obj(dp,
377                     ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &hds));
378                 dsl_deadlist_remove_key(&hds->ds_deadlist,
379                     ds->ds_phys->ds_creation_txg, tx);
380                 dsl_dataset_rele(hds, FTAG);
381
382         } else {
383                 ASSERT3P(ds_next->ds_prev, ==, ds);
384                 dsl_dataset_rele(ds_next->ds_prev, ds_next);
385                 ds_next->ds_prev = NULL;
386                 if (ds_prev) {
387                         VERIFY0(dsl_dataset_hold_obj(dp,
388                             ds->ds_phys->ds_prev_snap_obj,
389                             ds_next, &ds_next->ds_prev));
390                 }
391
392                 dsl_dataset_recalc_head_uniq(ds_next);
393
394                 /*
395                  * Reduce the amount of our unconsumed refreservation
396                  * being charged to our parent by the amount of
397                  * new unique data we have gained.
398                  */
399                 if (old_unique < ds_next->ds_reserved) {
400                         int64_t mrsdelta;
401                         uint64_t new_unique =
402                             ds_next->ds_phys->ds_unique_bytes;
403
404                         ASSERT(old_unique <= new_unique);
405                         mrsdelta = MIN(new_unique - old_unique,
406                             ds_next->ds_reserved - old_unique);
407                         dsl_dir_diduse_space(ds->ds_dir,
408                             DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
409                 }
410         }
411         dsl_dataset_rele(ds_next, FTAG);
412
413         /*
414          * This must be done after the dsl_traverse(), because it will
415          * re-open the objset.
416          */
417         if (ds->ds_objset) {
418                 dmu_objset_evict(ds->ds_objset);
419                 ds->ds_objset = NULL;
420         }
421
422         /* remove from snapshot namespace */
423         ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
424         VERIFY0(dsl_dataset_hold_obj(dp,
425             ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
426         VERIFY0(dsl_dataset_get_snapname(ds));
427 #ifdef ZFS_DEBUG
428         {
429                 uint64_t val;
430
431                 err = dsl_dataset_snap_lookup(ds_head,
432                     ds->ds_snapname, &val);
433                 ASSERT0(err);
434                 ASSERT3U(val, ==, obj);
435         }
436 #endif
437         VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx));
438         dsl_dataset_rele(ds_head, FTAG);
439
440         if (ds_prev != NULL)
441                 dsl_dataset_rele(ds_prev, FTAG);
442
443         spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
444
445         if (ds->ds_phys->ds_next_clones_obj != 0) {
446                 ASSERTV(uint64_t count);
447                 ASSERT0(zap_count(mos,
448                     ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
449                 VERIFY0(dmu_object_free(mos,
450                     ds->ds_phys->ds_next_clones_obj, tx));
451         }
452         if (ds->ds_phys->ds_props_obj != 0)
453                 VERIFY0(zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
454         if (ds->ds_phys->ds_userrefs_obj != 0)
455                 VERIFY0(zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
456         dsl_dir_rele(ds->ds_dir, ds);
457         ds->ds_dir = NULL;
458         dmu_object_free_zapified(mos, obj, tx);
459 }
460
461 static void
462 dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx)
463 {
464         dmu_snapshots_destroy_arg_t *dsda = arg;
465         dsl_pool_t *dp = dmu_tx_pool(tx);
466         nvpair_t *pair;
467
468         for (pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, NULL);
469             pair != NULL;
470             pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, pair)) {
471                 dsl_dataset_t *ds;
472
473                 VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
474
475                 dsl_destroy_snapshot_sync_impl(ds, dsda->dsda_defer, tx);
476                 dsl_dataset_rele(ds, FTAG);
477         }
478 }
479
480 /*
481  * The semantics of this function are described in the comment above
482  * lzc_destroy_snaps().  To summarize:
483  *
484  * The snapshots must all be in the same pool.
485  *
486  * Snapshots that don't exist will be silently ignored (considered to be
487  * "already deleted").
488  *
489  * On success, all snaps will be destroyed and this will return 0.
490  * On failure, no snaps will be destroyed, the errlist will be filled in,
491  * and this will return an errno.
492  */
493 int
494 dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer,
495     nvlist_t *errlist)
496 {
497         dmu_snapshots_destroy_arg_t dsda;
498         int error;
499         nvpair_t *pair;
500
501         pair = nvlist_next_nvpair(snaps, NULL);
502         if (pair == NULL)
503                 return (0);
504
505         dsda.dsda_snaps = snaps;
506         VERIFY0(nvlist_alloc(&dsda.dsda_successful_snaps,
507             NV_UNIQUE_NAME, KM_PUSHPAGE));
508         dsda.dsda_defer = defer;
509         dsda.dsda_errlist = errlist;
510
511         error = dsl_sync_task(nvpair_name(pair),
512             dsl_destroy_snapshot_check, dsl_destroy_snapshot_sync,
513             &dsda, 0);
514         fnvlist_free(dsda.dsda_successful_snaps);
515
516         return (error);
517 }
518
519 int
520 dsl_destroy_snapshot(const char *name, boolean_t defer)
521 {
522         int error;
523         nvlist_t *nvl;
524         nvlist_t *errlist;
525
526         VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE));
527         VERIFY0(nvlist_alloc(&errlist, NV_UNIQUE_NAME, KM_PUSHPAGE));
528
529         fnvlist_add_boolean(nvl, name);
530         error = dsl_destroy_snapshots_nvl(nvl, defer, errlist);
531         fnvlist_free(errlist);
532         fnvlist_free(nvl);
533         return (error);
534 }
535
536 struct killarg {
537         dsl_dataset_t *ds;
538         dmu_tx_t *tx;
539 };
540
541 /* ARGSUSED */
542 static int
543 kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
544     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
545 {
546         struct killarg *ka = arg;
547         dmu_tx_t *tx = ka->tx;
548
549         if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
550                 return (0);
551
552         if (zb->zb_level == ZB_ZIL_LEVEL) {
553                 ASSERT(zilog != NULL);
554                 /*
555                  * It's a block in the intent log.  It has no
556                  * accounting, so just free it.
557                  */
558                 dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
559         } else {
560                 ASSERT(zilog == NULL);
561                 ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
562                 (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
563         }
564
565         return (0);
566 }
567
568 static void
569 old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
570 {
571         struct killarg ka;
572
573         /*
574          * Free everything that we point to (that's born after
575          * the previous snapshot, if we are a clone)
576          *
577          * NB: this should be very quick, because we already
578          * freed all the objects in open context.
579          */
580         ka.ds = ds;
581         ka.tx = tx;
582         VERIFY0(traverse_dataset(ds,
583             ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
584             kill_blkptr, &ka));
585         ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
586 }
587
588 typedef struct dsl_destroy_head_arg {
589         const char *ddha_name;
590 } dsl_destroy_head_arg_t;
591
592 int
593 dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
594 {
595         int error;
596         uint64_t count;
597         objset_t *mos;
598
599         ASSERT(!dsl_dataset_is_snapshot(ds));
600         if (dsl_dataset_is_snapshot(ds))
601                 return (SET_ERROR(EINVAL));
602
603         if (refcount_count(&ds->ds_longholds) != expected_holds)
604                 return (SET_ERROR(EBUSY));
605
606         mos = ds->ds_dir->dd_pool->dp_meta_objset;
607
608         /*
609          * Can't delete a head dataset if there are snapshots of it.
610          * (Except if the only snapshots are from the branch we cloned
611          * from.)
612          */
613         if (ds->ds_prev != NULL &&
614             ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
615                 return (SET_ERROR(EBUSY));
616
617         /*
618          * Can't delete if there are children of this fs.
619          */
620         error = zap_count(mos,
621             ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
622         if (error != 0)
623                 return (error);
624         if (count != 0)
625                 return (SET_ERROR(EEXIST));
626
627         if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) &&
628             ds->ds_prev->ds_phys->ds_num_children == 2 &&
629             ds->ds_prev->ds_userrefs == 0) {
630                 /* We need to remove the origin snapshot as well. */
631                 if (!refcount_is_zero(&ds->ds_prev->ds_longholds))
632                         return (SET_ERROR(EBUSY));
633         }
634         return (0);
635 }
636
637 static int
638 dsl_destroy_head_check(void *arg, dmu_tx_t *tx)
639 {
640         dsl_destroy_head_arg_t *ddha = arg;
641         dsl_pool_t *dp = dmu_tx_pool(tx);
642         dsl_dataset_t *ds;
643         int error;
644
645         error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds);
646         if (error != 0)
647                 return (error);
648
649         error = dsl_destroy_head_check_impl(ds, 0);
650         dsl_dataset_rele(ds, FTAG);
651         return (error);
652 }
653
654 static void
655 dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx)
656 {
657         dsl_dir_t *dd;
658         dsl_pool_t *dp = dmu_tx_pool(tx);
659         objset_t *mos = dp->dp_meta_objset;
660         dd_used_t t;
661
662         ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock));
663
664         VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd));
665
666         ASSERT0(dd->dd_phys->dd_head_dataset_obj);
667
668         /*
669          * Remove our reservation. The impl() routine avoids setting the
670          * actual property, which would require the (already destroyed) ds.
671          */
672         dsl_dir_set_reservation_sync_impl(dd, 0, tx);
673
674         ASSERT0(dd->dd_phys->dd_used_bytes);
675         ASSERT0(dd->dd_phys->dd_reserved);
676         for (t = 0; t < DD_USED_NUM; t++)
677                 ASSERT0(dd->dd_phys->dd_used_breakdown[t]);
678
679         VERIFY0(zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
680         VERIFY0(zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
681         VERIFY0(dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
682         VERIFY0(zap_remove(mos,
683             dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
684
685         dsl_dir_rele(dd, FTAG);
686         dmu_object_free_zapified(mos, ddobj, tx);
687 }
688
689 void
690 dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
691 {
692         dsl_pool_t *dp = dmu_tx_pool(tx);
693         objset_t *mos = dp->dp_meta_objset;
694         uint64_t obj, ddobj, prevobj = 0;
695         boolean_t rmorigin;
696         objset_t *os;
697
698         ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
699         ASSERT(ds->ds_prev == NULL ||
700             ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
701         ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
702         ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
703
704         /* We need to log before removing it from the namespace. */
705         spa_history_log_internal_ds(ds, "destroy", tx, "");
706
707         rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
708             DS_IS_DEFER_DESTROY(ds->ds_prev) &&
709             ds->ds_prev->ds_phys->ds_num_children == 2 &&
710             ds->ds_prev->ds_userrefs == 0);
711
712         /* Remove our reservation. */
713         if (ds->ds_reserved != 0) {
714                 dsl_dataset_set_refreservation_sync_impl(ds,
715                     (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
716                     0, tx);
717                 ASSERT0(ds->ds_reserved);
718         }
719
720         dsl_scan_ds_destroyed(ds, tx);
721
722         obj = ds->ds_object;
723
724         if (ds->ds_phys->ds_prev_snap_obj != 0) {
725                 /* This is a clone */
726                 ASSERT(ds->ds_prev != NULL);
727                 ASSERT3U(ds->ds_prev->ds_phys->ds_next_snap_obj, !=, obj);
728                 ASSERT0(ds->ds_phys->ds_next_snap_obj);
729
730                 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
731                 if (ds->ds_prev->ds_phys->ds_next_clones_obj != 0) {
732                         dsl_dataset_remove_from_next_clones(ds->ds_prev,
733                             obj, tx);
734                 }
735
736                 ASSERT3U(ds->ds_prev->ds_phys->ds_num_children, >, 1);
737                 ds->ds_prev->ds_phys->ds_num_children--;
738         }
739
740         /*
741          * Destroy the deadlist.  Unless it's a clone, the
742          * deadlist should be empty.  (If it's a clone, it's
743          * safe to ignore the deadlist contents.)
744          */
745         dsl_deadlist_close(&ds->ds_deadlist);
746         dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
747         dmu_buf_will_dirty(ds->ds_dbuf, tx);
748         ds->ds_phys->ds_deadlist_obj = 0;
749
750         VERIFY0(dmu_objset_from_ds(ds, &os));
751
752         if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) {
753                 old_synchronous_dataset_destroy(ds, tx);
754         } else {
755                 /*
756                  * Move the bptree into the pool's list of trees to
757                  * clean up and update space accounting information.
758                  */
759                 uint64_t used, comp, uncomp;
760
761                 zil_destroy_sync(dmu_objset_zil(os), tx);
762
763                 if (!spa_feature_is_active(dp->dp_spa,
764                     SPA_FEATURE_ASYNC_DESTROY)) {
765                         dsl_scan_t *scn = dp->dp_scan;
766                         spa_feature_incr(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY,
767                             tx);
768                         dp->dp_bptree_obj = bptree_alloc(mos, tx);
769                         VERIFY0(zap_add(mos,
770                             DMU_POOL_DIRECTORY_OBJECT,
771                             DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
772                             &dp->dp_bptree_obj, tx));
773                         ASSERT(!scn->scn_async_destroying);
774                         scn->scn_async_destroying = B_TRUE;
775                 }
776
777                 used = ds->ds_dir->dd_phys->dd_used_bytes;
778                 comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
779                 uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
780
781                 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
782                     ds->ds_phys->ds_unique_bytes == used);
783
784                 bptree_add(mos, dp->dp_bptree_obj,
785                     &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
786                     used, comp, uncomp, tx);
787                 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
788                     -used, -comp, -uncomp, tx);
789                 dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
790                     used, comp, uncomp, tx);
791         }
792
793         if (ds->ds_prev != NULL) {
794                 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
795                         VERIFY0(zap_remove_int(mos,
796                             ds->ds_prev->ds_dir->dd_phys->dd_clones,
797                             ds->ds_object, tx));
798                 }
799                 prevobj = ds->ds_prev->ds_object;
800                 dsl_dataset_rele(ds->ds_prev, ds);
801                 ds->ds_prev = NULL;
802         }
803
804         /*
805          * This must be done after the dsl_traverse(), because it will
806          * re-open the objset.
807          */
808         if (ds->ds_objset) {
809                 dmu_objset_evict(ds->ds_objset);
810                 ds->ds_objset = NULL;
811         }
812
813         /* Erase the link in the dir */
814         dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
815         ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
816         ddobj = ds->ds_dir->dd_object;
817         ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
818         VERIFY0(zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx));
819
820         if (ds->ds_bookmarks != 0) {
821                 VERIFY0(zap_destroy(mos,
822                     ds->ds_bookmarks, tx));
823                 spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
824         }
825
826         spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
827
828         ASSERT0(ds->ds_phys->ds_next_clones_obj);
829         ASSERT0(ds->ds_phys->ds_props_obj);
830         ASSERT0(ds->ds_phys->ds_userrefs_obj);
831         dsl_dir_rele(ds->ds_dir, ds);
832         ds->ds_dir = NULL;
833         dmu_object_free_zapified(mos, obj, tx);
834
835         dsl_dir_destroy_sync(ddobj, tx);
836
837         if (rmorigin) {
838                 dsl_dataset_t *prev;
839                 VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev));
840                 dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx);
841                 dsl_dataset_rele(prev, FTAG);
842         }
843 }
844
845 static void
846 dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)
847 {
848         dsl_destroy_head_arg_t *ddha = arg;
849         dsl_pool_t *dp = dmu_tx_pool(tx);
850         dsl_dataset_t *ds;
851
852         VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
853         dsl_destroy_head_sync_impl(ds, tx);
854         dsl_dataset_rele(ds, FTAG);
855 }
856
857 static void
858 dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx)
859 {
860         dsl_destroy_head_arg_t *ddha = arg;
861         dsl_pool_t *dp = dmu_tx_pool(tx);
862         dsl_dataset_t *ds;
863
864         VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
865
866         /* Mark it as inconsistent on-disk, in case we crash */
867         dmu_buf_will_dirty(ds->ds_dbuf, tx);
868         ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
869
870         spa_history_log_internal_ds(ds, "destroy begin", tx, "");
871         dsl_dataset_rele(ds, FTAG);
872 }
873
874 int
875 dsl_destroy_head(const char *name)
876 {
877         dsl_destroy_head_arg_t ddha;
878         int error;
879         spa_t *spa;
880         boolean_t isenabled;
881
882 #ifdef _KERNEL
883         zfs_destroy_unmount_origin(name);
884 #endif
885
886         error = spa_open(name, &spa, FTAG);
887         if (error != 0)
888                 return (error);
889         isenabled = spa_feature_is_enabled(spa, SPA_FEATURE_ASYNC_DESTROY);
890         spa_close(spa, FTAG);
891
892         ddha.ddha_name = name;
893
894         if (!isenabled) {
895                 objset_t *os;
896
897                 error = dsl_sync_task(name, dsl_destroy_head_check,
898                     dsl_destroy_head_begin_sync, &ddha, 0);
899                 if (error != 0)
900                         return (error);
901
902                 /*
903                  * Head deletion is processed in one txg on old pools;
904                  * remove the objects from open context so that the txg sync
905                  * is not too long.
906                  */
907                 error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os);
908                 if (error == 0) {
909                         uint64_t obj;
910                         uint64_t prev_snap_txg =
911                             dmu_objset_ds(os)->ds_phys->ds_prev_snap_txg;
912                         for (obj = 0; error == 0;
913                             error = dmu_object_next(os, &obj, FALSE,
914                             prev_snap_txg))
915                                 (void) dmu_free_long_object(os, obj);
916                         /* sync out all frees */
917                         txg_wait_synced(dmu_objset_pool(os), 0);
918                         dmu_objset_disown(os, FTAG);
919                 }
920         }
921
922         return (dsl_sync_task(name, dsl_destroy_head_check,
923             dsl_destroy_head_sync, &ddha, 0));
924 }
925
926 /*
927  * Note, this function is used as the callback for dmu_objset_find().  We
928  * always return 0 so that we will continue to find and process
929  * inconsistent datasets, even if we encounter an error trying to
930  * process one of them.
931  */
932 /* ARGSUSED */
933 int
934 dsl_destroy_inconsistent(const char *dsname, void *arg)
935 {
936         objset_t *os;
937
938         if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
939                 boolean_t inconsistent = DS_IS_INCONSISTENT(dmu_objset_ds(os));
940                 dmu_objset_rele(os, FTAG);
941                 if (inconsistent)
942                         (void) dsl_destroy_head(dsname);
943         }
944         return (0);
945 }
946
947
948 #if defined(_KERNEL) && defined(HAVE_SPL)
949 EXPORT_SYMBOL(dsl_destroy_head);
950 EXPORT_SYMBOL(dsl_destroy_head_sync_impl);
951 EXPORT_SYMBOL(dsl_dataset_user_hold_check_one);
952 EXPORT_SYMBOL(dsl_destroy_snapshot_sync_impl);
953 EXPORT_SYMBOL(dsl_destroy_inconsistent);
954 EXPORT_SYMBOL(dsl_dataset_user_release_tmp);
955 EXPORT_SYMBOL(dsl_destroy_head_check_impl);
956 #endif