4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
29 * Iterate over all children of the current object. This includes the normal
30 * dataset hierarchy, but also arbitrary hierarchies due to clones. We want to
31 * walk all datasets in the pool, and construct a directed graph of the form:
41 * @yesterday ----> foo
43 * In order to construct this graph, we have to walk every dataset in the pool,
44 * because the clone parent is stored as a property of the child, not the
45 * parent. The parent only keeps track of the number of clones.
47 * In the normal case (without clones) this would be rather expensive. To avoid
48 * unnecessary computation, we first try a walk of the subtree hierarchy
49 * starting from the initial node. At each dataset, we construct a node in the
50 * graph and an edge leading from its parent. If we don't see any snapshots
51 * with a non-zero clone count, then we are finished.
53 * If we do find a cloned snapshot, then we finish the walk of the current
54 * subtree, but indicate that we need to do a complete walk. We then perform a
55 * global walk of all datasets, avoiding the subtree we already processed.
57 * At the end of this, we'll end up with a directed graph of all relevant (and
58 * possible some irrelevant) datasets in the system. We need to both find our
59 * limiting subgraph and determine a safe ordering in which to destroy the
60 * datasets. We do a topological ordering of our graph starting at our target
61 * dataset, and then walk the results in reverse.
63 * It's possible for the graph to have cycles if, for example, the user renames
64 * a clone to be the parent of its origin snapshot. The user can request to
65 * generate an error in this case, or ignore the cycle and continue.
67 * When removing datasets, we want to destroy the snapshots in chronological
68 * order (because this is the most efficient method). In order to accomplish
69 * this, we store the creation transaction group with each vertex and keep each
70 * vertex's edges sorted according to this value. The topological sort will
71 * automatically walk the snapshots in the correct order.
84 #include "libzfs_impl.h"
85 #include "zfs_namecheck.h"
87 #define MIN_EDGECOUNT 4
90 * Vertex structure. Indexed by dataset name, this structure maintains a list
91 * of edges to other vertices.
94 typedef struct zfs_vertex {
95 char zv_dataset[ZFS_MAXNAMELEN];
96 struct zfs_vertex *zv_next;
99 struct zfs_edge **zv_edges;
111 * Edge structure. Simply maintains a pointer to the destination vertex. There
112 * is no need to store the source vertex, since we only use edges in the context
113 * of the source vertex.
115 typedef struct zfs_edge {
116 zfs_vertex_t *ze_dest;
117 struct zfs_edge *ze_next;
120 #define ZFS_GRAPH_SIZE 1027 /* this could be dynamic some day */
123 * Graph structure. Vertices are maintained in a hash indexed by dataset name.
125 typedef struct zfs_graph {
126 zfs_vertex_t **zg_hash;
132 * Allocate a new edge pointing to the target vertex.
135 zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest)
137 zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t));
151 zfs_edge_destroy(zfs_edge_t *zep)
157 * Allocate a new vertex with the given name.
159 static zfs_vertex_t *
160 zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset)
162 zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t));
167 assert(strlen(dataset) < ZFS_MAXNAMELEN);
169 (void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset));
171 if ((zvp->zv_edges = zfs_alloc(hdl,
172 MIN_EDGECOUNT * sizeof (void *))) == NULL) {
177 zvp->zv_edgealloc = MIN_EDGECOUNT;
183 * Destroy a vertex. Frees up any associated edges.
186 zfs_vertex_destroy(zfs_vertex_t *zvp)
190 for (i = 0; i < zvp->zv_edgecount; i++)
191 zfs_edge_destroy(zvp->zv_edges[i]);
198 * Given a vertex, add an edge to the destination vertex.
201 zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp,
204 zfs_edge_t *zep = zfs_edge_create(hdl, dest);
209 if (zvp->zv_edgecount == zvp->zv_edgealloc) {
212 if ((ptr = zfs_realloc(hdl, zvp->zv_edges,
213 zvp->zv_edgealloc * sizeof (void *),
214 zvp->zv_edgealloc * 2 * sizeof (void *))) == NULL)
218 zvp->zv_edgealloc *= 2;
221 zvp->zv_edges[zvp->zv_edgecount++] = zep;
227 zfs_edge_compare(const void *a, const void *b)
229 const zfs_edge_t *ea = *((zfs_edge_t **)a);
230 const zfs_edge_t *eb = *((zfs_edge_t **)b);
232 if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg)
234 if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg)
240 * Sort the given vertex edges according to the creation txg of each vertex.
243 zfs_vertex_sort_edges(zfs_vertex_t *zvp)
245 if (zvp->zv_edgecount == 0)
248 qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *),
253 * Construct a new graph object. We allow the size to be specified as a
254 * parameter so in the future we can size the hash according to the number of
255 * datasets in the pool.
258 zfs_graph_create(libzfs_handle_t *hdl, size_t size)
260 zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t));
266 if ((zgp->zg_hash = zfs_alloc(hdl,
267 size * sizeof (zfs_vertex_t *))) == NULL) {
276 * Destroy a graph object. We have to iterate over all the hash chains,
277 * destroying each vertex in the process.
280 zfs_graph_destroy(zfs_graph_t *zgp)
283 zfs_vertex_t *current, *next;
285 for (i = 0; i < zgp->zg_size; i++) {
286 current = zgp->zg_hash[i];
287 while (current != NULL) {
288 next = current->zv_next;
289 zfs_vertex_destroy(current);
299 * Graph hash function. Classic bernstein k=33 hash function, taken from
300 * usr/src/cmd/sgs/tools/common/strhash.c
303 zfs_graph_hash(zfs_graph_t *zgp, const char *str)
308 while ((c = *str++) != 0)
309 hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
311 return (hash % zgp->zg_size);
315 * Given a dataset name, finds the associated vertex, creating it if necessary.
317 static zfs_vertex_t *
318 zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset,
321 size_t idx = zfs_graph_hash(zgp, dataset);
324 for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) {
325 if (strcmp(zvp->zv_dataset, dataset) == 0) {
326 if (zvp->zv_txg == 0)
332 if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL)
335 zvp->zv_next = zgp->zg_hash[idx];
337 zgp->zg_hash[idx] = zvp;
344 * Given two dataset names, create an edge between them. For the source vertex,
345 * mark 'zv_visited' to indicate that we have seen this vertex, and not simply
346 * created it as a destination of another edge. If 'dest' is NULL, then this
347 * is an individual vertex (i.e. the starting vertex), so don't add an edge.
350 zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source,
351 const char *dest, uint64_t txg)
353 zfs_vertex_t *svp, *dvp;
355 if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL)
357 svp->zv_visited = VISIT_SEEN;
359 dvp = zfs_graph_lookup(hdl, zgp, dest, txg);
362 if (zfs_vertex_add_edge(hdl, svp, dvp) != 0)
370 * Iterate over all children of the given dataset, adding any vertices as
371 * necessary. Returns 0 if no cloned snapshots were seen, -1 if there was an
372 * error, or 1 otherwise. This is a simple recursive algorithm - the ZFS
373 * namespace typically is very flat. We manually invoke the necessary ioctl()
374 * calls to avoid the overhead and additional semantics of zfs_open().
377 iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
379 zfs_cmd_t zc = { 0 };
384 * Look up the source vertex, and avoid it if we've seen it before.
386 zvp = zfs_graph_lookup(hdl, zgp, dataset, 0);
389 if (zvp->zv_visited == VISIT_SEEN)
393 * We check the clone parent here instead of within the loop, so that if
394 * the root dataset has been promoted from a clone, we find its parent
397 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
398 if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0 &&
399 zc.zc_objset_stats.dds_clone_of[0] != '\0') {
400 if (zfs_graph_add(hdl, zgp, zc.zc_objset_stats.dds_clone_of,
401 zc.zc_name, zc.zc_objset_stats.dds_creation_txg) != 0)
405 for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
406 ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
407 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
410 * Ignore private dataset names.
412 if (dataset_name_hidden(zc.zc_name))
416 * Get statistics for this dataset, to determine the type of the
417 * dataset and clone statistics. If this fails, the dataset has
418 * since been removed, and we're pretty much screwed anyway.
420 if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
424 * Add an edge between the parent and the child.
426 if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
427 zc.zc_objset_stats.dds_creation_txg) != 0)
431 * Iterate over all children
433 err = iterate_children(hdl, zgp, zc.zc_name);
440 * Indicate if we found a dataset with a non-zero clone count.
442 if (zc.zc_objset_stats.dds_num_clones != 0)
447 * Now iterate over all snapshots.
449 bzero(&zc, sizeof (zc));
451 for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
452 ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
453 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
456 * Get statistics for this dataset, to determine the type of the
457 * dataset and clone statistics. If this fails, the dataset has
458 * since been removed, and we're pretty much screwed anyway.
460 if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
464 * Add an edge between the parent and the child.
466 if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
467 zc.zc_objset_stats.dds_creation_txg) != 0)
471 * Indicate if we found a dataset with a non-zero clone count.
473 if (zc.zc_objset_stats.dds_num_clones != 0)
477 zvp->zv_visited = VISIT_SEEN;
483 * Construct a complete graph of all necessary vertices. First, we iterate over
484 * only our object's children. If we don't find any cloned snapshots, then we
485 * simple return that. Otherwise, we have to start at the pool root and iterate
489 construct_graph(libzfs_handle_t *hdl, const char *dataset)
491 zfs_graph_t *zgp = zfs_graph_create(hdl, ZFS_GRAPH_SIZE);
492 zfs_cmd_t zc = { 0 };
499 * We need to explicitly check whether this dataset has clones or not,
500 * since iterate_children() only checks the children.
502 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
503 (void) ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc);
505 if (zc.zc_objset_stats.dds_num_clones != 0 ||
506 (ret = iterate_children(hdl, zgp, dataset)) != 0) {
508 * Determine pool name and try again.
512 if ((slash = strchr(dataset, '/')) != NULL ||
513 (slash = strchr(dataset, '@')) != NULL) {
514 pool = zfs_alloc(hdl, slash - dataset + 1);
516 zfs_graph_destroy(zgp);
519 (void) strncpy(pool, dataset, slash - dataset);
520 pool[slash - dataset] = '\0';
522 if (iterate_children(hdl, zgp, pool) == -1 ||
523 zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) {
525 zfs_graph_destroy(zgp);
533 if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) {
534 zfs_graph_destroy(zgp);
542 * Given a graph, do a recursive topological sort into the given array. This is
543 * really just a depth first search, so that the deepest nodes appear first.
544 * hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
547 topo_sort(libzfs_handle_t *hdl, boolean_t allowrecursion, char **result,
548 size_t *idx, zfs_vertex_t *zgv)
552 if (zgv->zv_visited == VISIT_SORT_PRE && !allowrecursion) {
554 * If we've already seen this vertex as part of our depth-first
555 * search, then we have a cyclic dependency, and we must return
558 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
559 "recursive dependency at '%s'"),
561 return (zfs_error(hdl, EZFS_RECURSIVE,
562 dgettext(TEXT_DOMAIN,
563 "cannot determine dependent datasets")));
564 } else if (zgv->zv_visited >= VISIT_SORT_PRE) {
566 * If we've already processed this as part of the topological
567 * sort, then don't bother doing so again.
572 zgv->zv_visited = VISIT_SORT_PRE;
574 /* avoid doing a search if we don't have to */
575 zfs_vertex_sort_edges(zgv);
576 for (i = 0; i < zgv->zv_edgecount; i++) {
577 if (topo_sort(hdl, allowrecursion, result, idx,
578 zgv->zv_edges[i]->ze_dest) != 0)
582 /* we may have visited this in the course of the above */
583 if (zgv->zv_visited == VISIT_SORT_POST)
586 if ((result[*idx] = zfs_alloc(hdl,
587 strlen(zgv->zv_dataset) + 1)) == NULL)
590 (void) strcpy(result[*idx], zgv->zv_dataset);
592 zgv->zv_visited = VISIT_SORT_POST;
597 * The only public interface for this file. Do the dirty work of constructing a
598 * child list for the given object. Construct the graph, do the toplogical
599 * sort, and then return the array of strings to the caller.
601 * The 'allowrecursion' parameter controls behavior when cycles are found. If
602 * it is set, the the cycle is ignored and the results returned as if the cycle
603 * did not exist. If it is not set, then the routine will generate an error if
607 get_dependents(libzfs_handle_t *hdl, boolean_t allowrecursion,
608 const char *dataset, char ***result, size_t *count)
613 if ((zgp = construct_graph(hdl, dataset)) == NULL)
616 if ((*result = zfs_alloc(hdl,
617 zgp->zg_nvertex * sizeof (char *))) == NULL) {
618 zfs_graph_destroy(zgp);
622 if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) {
624 zfs_graph_destroy(zgp);
629 if (topo_sort(hdl, allowrecursion, *result, count, zvp) != 0) {
631 zfs_graph_destroy(zgp);
636 * Get rid of the last entry, which is our starting vertex and not
637 * strictly a dependent.
640 free((*result)[*count - 1]);
643 zfs_graph_destroy(zgp);