cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  24  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  25  * Copyright (c) 2014 Integros [integros.com]
  26  */
  27
  28 /*
  29  * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
  30  * It has the following characteristics:
  31  *
  32  *  - Thread Safe.  libzfs_core is accessible concurrently from multiple
  33  *  threads.  This is accomplished primarily by avoiding global data
  34  *  (e.g. caching).  Since it's thread-safe, there is no reason for a
  35  *  process to have multiple libzfs "instances".  Therefore, we store
  36  *  our few pieces of data (e.g. the file descriptor) in global
  37  *  variables.  The fd is reference-counted so that the libzfs_core
  38  *  library can be "initialized" multiple times (e.g. by different
  39  *  consumers within the same process).
  40  *
  41  *  - Committed Interface.  The libzfs_core interface will be committed,
  42  *  therefore consumers can compile against it and be confident that
  43  *  their code will continue to work on future releases of this code.
  44  *  Currently, the interface is Evolving (not Committed), but we intend
  45  *  to commit to it once it is more complete and we determine that it
  46  *  meets the needs of all consumers.
  47  *
  48  *  - Programatic Error Handling.  libzfs_core communicates errors with
  49  *  defined error numbers, and doesn't print anything to stdout/stderr.
  50  *
  51  *  - Thin Layer.  libzfs_core is a thin layer, marshaling arguments
  52  *  to/from the kernel ioctls.  There is generally a 1:1 correspondence
  53  *  between libzfs_core functions and ioctls to /dev/zfs.
  54  *
  55  *  - Clear Atomicity.  Because libzfs_core functions are generally 1:1
  56  *  with kernel ioctls, and kernel ioctls are general atomic, each
  57  *  libzfs_core function is atomic.  For example, creating multiple
  58  *  snapshots with a single call to lzc_snapshot() is atomic -- it
  59  *  can't fail with only some of the requested snapshots created, even
  60  *  in the event of power loss or system crash.
  61  *
  62  *  - Continued libzfs Support.  Some higher-level operations (e.g.
  63  *  support for "zfs send -R") are too complicated to fit the scope of
  64  *  libzfs_core.  This functionality will continue to live in libzfs.
  65  *  Where appropriate, libzfs will use the underlying atomic operations
  66  *  of libzfs_core.  For example, libzfs may implement "zfs send -R |
  67  *  zfs receive" by using individual "send one snapshot", rename,
  68  *  destroy, and "receive one snapshot" operations in libzfs_core.
  69  *  /sbin/zfs and /zbin/zpool will link with both libzfs and
  70  *  libzfs_core.  Other consumers should aim to use only libzfs_core,
  71  *  since that will be the supported, stable interface going forwards.
  72  */
  73
  74 #define _IN_LIBZFS_CORE_
  75
  76 #include <libzfs_core.h>
  77 #include <ctype.h>
  78 #include <unistd.h>
  79 #include <stdlib.h>
  80 #include <string.h>
  81 #include <errno.h>
  82 #include <fcntl.h>
  83 #include <pthread.h>
  84 #include <sys/nvpair.h>
  85 #include <sys/param.h>
  86 #include <sys/types.h>
  87 #include <sys/stat.h>
  88 #include <sys/zfs_ioctl.h>
  89 #include "libzfs_core_compat.h"
  90 #include "libzfs_compat.h"
  91
  92 #ifdef __FreeBSD__
  93 extern int zfs_ioctl_version;
  94 #endif
  95
  96 static int g_fd;
  97 static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
  98 static int g_refcount;
  99
 100 int
 101 libzfs_core_init(void)
 102 {
 103         (void) pthread_mutex_lock(&g_lock);
 104         if (g_refcount == 0) {
 105                 g_fd = open("/dev/zfs", O_RDWR);
 106                 if (g_fd < 0) {
 107                         (void) pthread_mutex_unlock(&g_lock);
 108                         return (errno);
 109                 }
 110         }
 111         g_refcount++;
 112         (void) pthread_mutex_unlock(&g_lock);
 113
 114         return (0);
 115 }
 116
 117 void
 118 libzfs_core_fini(void)
 119 {
 120         (void) pthread_mutex_lock(&g_lock);
 121         ASSERT3S(g_refcount, >, 0);
 122         g_refcount--;
 123         if (g_refcount == 0)
 124                 (void) close(g_fd);
 125         (void) pthread_mutex_unlock(&g_lock);
 126 }
 127
 128 static int
 129 lzc_ioctl(zfs_ioc_t ioc, const char *name,
 130     nvlist_t *source, nvlist_t **resultp)
 131 {
 132         zfs_cmd_t zc = { 0 };
 133         int error = 0;
 134         char *packed;
 135 #ifdef __FreeBSD__
 136         nvlist_t *oldsource;
 137 #endif
 138         size_t size;
 139
 140         ASSERT3S(g_refcount, >, 0);
 141
 142         (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
 143
 144 #ifdef __FreeBSD__
 145         if (zfs_ioctl_version == ZFS_IOCVER_UNDEF)
 146                 zfs_ioctl_version = get_zfs_ioctl_version();
 147
 148         if (zfs_ioctl_version < ZFS_IOCVER_LZC) {
 149                 oldsource = source;
 150                 error = lzc_compat_pre(&zc, &ioc, &source);
 151                 if (error)
 152                         return (error);
 153         }
 154 #endif
 155
 156         packed = fnvlist_pack(source, &size);
 157         zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
 158         zc.zc_nvlist_src_size = size;
 159
 160         if (resultp != NULL) {
 161                 *resultp = NULL;
 162                 zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
 163                 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
 164                     malloc(zc.zc_nvlist_dst_size);
 165 #ifdef illumos
 166                 if (zc.zc_nvlist_dst == NULL) {
 167 #else
 168                 if (zc.zc_nvlist_dst == 0) {
 169 #endif
 170                         error = ENOMEM;
 171                         goto out;
 172                 }
 173         }
 174
 175         while (ioctl(g_fd, ioc, &zc) != 0) {
 176                 if (errno == ENOMEM && resultp != NULL) {
 177                         free((void *)(uintptr_t)zc.zc_nvlist_dst);
 178                         zc.zc_nvlist_dst_size *= 2;
 179                         zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
 180                             malloc(zc.zc_nvlist_dst_size);
 181 #ifdef illumos
 182                         if (zc.zc_nvlist_dst == NULL) {
 183 #else
 184                         if (zc.zc_nvlist_dst == 0) {
 185 #endif
 186                                 error = ENOMEM;
 187                                 goto out;
 188                         }
 189                 } else {
 190                         error = errno;
 191                         break;
 192                 }
 193         }
 194
 195 #ifdef __FreeBSD__
 196         if (zfs_ioctl_version < ZFS_IOCVER_LZC)
 197                 lzc_compat_post(&zc, ioc);
 198 #endif
 199         if (zc.zc_nvlist_dst_filled) {
 200                 *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
 201                     zc.zc_nvlist_dst_size);
 202         }
 203 #ifdef __FreeBSD__
 204         if (zfs_ioctl_version < ZFS_IOCVER_LZC)
 205                 lzc_compat_outnvl(&zc, ioc, resultp);
 206 #endif
 207 out:
 208 #ifdef __FreeBSD__
 209         if (zfs_ioctl_version < ZFS_IOCVER_LZC) {
 210                 if (source != oldsource)
 211                         nvlist_free(source);
 212                 source = oldsource;
 213         }
 214 #endif
 215         fnvlist_pack_free(packed, size);
 216         free((void *)(uintptr_t)zc.zc_nvlist_dst);
 217         return (error);
 218 }
 219
 220 int
 221 lzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props)
 222 {
 223         int error;
 224         nvlist_t *args = fnvlist_alloc();
 225         fnvlist_add_int32(args, "type", type);
 226         if (props != NULL)
 227                 fnvlist_add_nvlist(args, "props", props);
 228         error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
 229         nvlist_free(args);
 230         return (error);
 231 }
 232
 233 int
 234 lzc_clone(const char *fsname, const char *origin,
 235     nvlist_t *props)
 236 {
 237         int error;
 238         nvlist_t *args = fnvlist_alloc();
 239         fnvlist_add_string(args, "origin", origin);
 240         if (props != NULL)
 241                 fnvlist_add_nvlist(args, "props", props);
 242         error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
 243         nvlist_free(args);
 244         return (error);
 245 }
 246
 247 /*
 248  * Creates snapshots.
 249  *
 250  * The keys in the snaps nvlist are the snapshots to be created.
 251  * They must all be in the same pool.
 252  *
 253  * The props nvlist is properties to set.  Currently only user properties
 254  * are supported.  { user:prop_name -> string value }
 255  *
 256  * The returned results nvlist will have an entry for each snapshot that failed.
 257  * The value will be the (int32) error code.
 258  *
 259  * The return value will be 0 if all snapshots were created, otherwise it will
 260  * be the errno of a (unspecified) snapshot that failed.
 261  */
 262 int
 263 lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
 264 {
 265         nvpair_t *elem;
 266         nvlist_t *args;
 267         int error;
 268         char pool[MAXNAMELEN];
 269
 270         *errlist = NULL;
 271
 272         /* determine the pool name */
 273         elem = nvlist_next_nvpair(snaps, NULL);
 274         if (elem == NULL)
 275                 return (0);
 276         (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 277         pool[strcspn(pool, "/@")] = '\0';
 278
 279         args = fnvlist_alloc();
 280         fnvlist_add_nvlist(args, "snaps", snaps);
 281         if (props != NULL)
 282                 fnvlist_add_nvlist(args, "props", props);
 283
 284         error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
 285         nvlist_free(args);
 286
 287         return (error);
 288 }
 289
 290 /*
 291  * Destroys snapshots.
 292  *
 293  * The keys in the snaps nvlist are the snapshots to be destroyed.
 294  * They must all be in the same pool.
 295  *
 296  * Snapshots that do not exist will be silently ignored.
 297  *
 298  * If 'defer' is not set, and a snapshot has user holds or clones, the
 299  * destroy operation will fail and none of the snapshots will be
 300  * destroyed.
 301  *
 302  * If 'defer' is set, and a snapshot has user holds or clones, it will be
 303  * marked for deferred destruction, and will be destroyed when the last hold
 304  * or clone is removed/destroyed.
 305  *
 306  * The return value will be 0 if all snapshots were destroyed (or marked for
 307  * later destruction if 'defer' is set) or didn't exist to begin with.
 308  *
 309  * Otherwise the return value will be the errno of a (unspecified) snapshot
 310  * that failed, no snapshots will be destroyed, and the errlist will have an
 311  * entry for each snapshot that failed.  The value in the errlist will be
 312  * the (int32) error code.
 313  */
 314 int
 315 lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
 316 {
 317         nvpair_t *elem;
 318         nvlist_t *args;
 319         int error;
 320         char pool[MAXNAMELEN];
 321
 322         /* determine the pool name */
 323         elem = nvlist_next_nvpair(snaps, NULL);
 324         if (elem == NULL)
 325                 return (0);
 326         (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 327         pool[strcspn(pool, "/@")] = '\0';
 328
 329         args = fnvlist_alloc();
 330         fnvlist_add_nvlist(args, "snaps", snaps);
 331         if (defer)
 332                 fnvlist_add_boolean(args, "defer");
 333
 334         error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
 335         nvlist_free(args);
 336
 337         return (error);
 338 }
 339
 340 int
 341 lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
 342     uint64_t *usedp)
 343 {
 344         nvlist_t *args;
 345         nvlist_t *result;
 346         int err;
 347         char fs[MAXNAMELEN];
 348         char *atp;
 349
 350         /* determine the fs name */
 351         (void) strlcpy(fs, firstsnap, sizeof (fs));
 352         atp = strchr(fs, '@');
 353         if (atp == NULL)
 354                 return (EINVAL);
 355         *atp = '\0';
 356
 357         args = fnvlist_alloc();
 358         fnvlist_add_string(args, "firstsnap", firstsnap);
 359
 360         err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
 361         nvlist_free(args);
 362         if (err == 0)
 363                 *usedp = fnvlist_lookup_uint64(result, "used");
 364         fnvlist_free(result);
 365
 366         return (err);
 367 }
 368
 369 boolean_t
 370 lzc_exists(const char *dataset)
 371 {
 372         /*
 373          * The objset_stats ioctl is still legacy, so we need to construct our
 374          * own zfs_cmd_t rather than using zfsc_ioctl().
 375          */
 376         zfs_cmd_t zc = { 0 };
 377
 378         (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 379         return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
 380 }
 381
 382 /*
 383  * Create "user holds" on snapshots.  If there is a hold on a snapshot,
 384  * the snapshot can not be destroyed.  (However, it can be marked for deletion
 385  * by lzc_destroy_snaps(defer=B_TRUE).)
 386  *
 387  * The keys in the nvlist are snapshot names.
 388  * The snapshots must all be in the same pool.
 389  * The value is the name of the hold (string type).
 390  *
 391  * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL).
 392  * In this case, when the cleanup_fd is closed (including on process
 393  * termination), the holds will be released.  If the system is shut down
 394  * uncleanly, the holds will be released when the pool is next opened
 395  * or imported.
 396  *
 397  * Holds for snapshots which don't exist will be skipped and have an entry
 398  * added to errlist, but will not cause an overall failure.
 399  *
 400  * The return value will be 0 if all holds, for snapshots that existed,
 401  * were succesfully created.
 402  *
 403  * Otherwise the return value will be the errno of a (unspecified) hold that
 404  * failed and no holds will be created.
 405  *
 406  * In all cases the errlist will have an entry for each hold that failed
 407  * (name = snapshot), with its value being the error code (int32).
 408  */
 409 int
 410 lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
 411 {
 412         char pool[MAXNAMELEN];
 413         nvlist_t *args;
 414         nvpair_t *elem;
 415         int error;
 416
 417         /* determine the pool name */
 418         elem = nvlist_next_nvpair(holds, NULL);
 419         if (elem == NULL)
 420                 return (0);
 421         (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 422         pool[strcspn(pool, "/@")] = '\0';
 423
 424         args = fnvlist_alloc();
 425         fnvlist_add_nvlist(args, "holds", holds);
 426         if (cleanup_fd != -1)
 427                 fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
 428
 429         error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
 430         nvlist_free(args);
 431         return (error);
 432 }
 433
 434 /*
 435  * Release "user holds" on snapshots.  If the snapshot has been marked for
 436  * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
 437  * any clones, and all the user holds are removed, then the snapshot will be
 438  * destroyed.
 439  *
 440  * The keys in the nvlist are snapshot names.
 441  * The snapshots must all be in the same pool.
 442  * The value is a nvlist whose keys are the holds to remove.
 443  *
 444  * Holds which failed to release because they didn't exist will have an entry
 445  * added to errlist, but will not cause an overall failure.
 446  *
 447  * The return value will be 0 if the nvl holds was empty or all holds that
 448  * existed, were successfully removed.
 449  *
 450  * Otherwise the return value will be the errno of a (unspecified) hold that
 451  * failed to release and no holds will be released.
 452  *
 453  * In all cases the errlist will have an entry for each hold that failed to
 454  * to release.
 455  */
 456 int
 457 lzc_release(nvlist_t *holds, nvlist_t **errlist)
 458 {
 459         char pool[MAXNAMELEN];
 460         nvpair_t *elem;
 461
 462         /* determine the pool name */
 463         elem = nvlist_next_nvpair(holds, NULL);
 464         if (elem == NULL)
 465                 return (0);
 466         (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 467         pool[strcspn(pool, "/@")] = '\0';
 468
 469         return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
 470 }
 471
 472 /*
 473  * Retrieve list of user holds on the specified snapshot.
 474  *
 475  * On success, *holdsp will be set to a nvlist which the caller must free.
 476  * The keys are the names of the holds, and the value is the creation time
 477  * of the hold (uint64) in seconds since the epoch.
 478  */
 479 int
 480 lzc_get_holds(const char *snapname, nvlist_t **holdsp)
 481 {
 482         int error;
 483         nvlist_t *innvl = fnvlist_alloc();
 484         error = lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, innvl, holdsp);
 485         fnvlist_free(innvl);
 486         return (error);
 487 }
 488
 489 /*
 490  * Generate a zfs send stream for the specified snapshot and write it to
 491  * the specified file descriptor.
 492  *
 493  * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
 494  *
 495  * If "from" is NULL, a full (non-incremental) stream will be sent.
 496  * If "from" is non-NULL, it must be the full name of a snapshot or
 497  * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
 498  * "pool/fs#earlier_bmark").  If non-NULL, the specified snapshot or
 499  * bookmark must represent an earlier point in the history of "snapname").
 500  * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
 501  * or it can be the origin of "snapname"'s filesystem, or an earlier
 502  * snapshot in the origin, etc.
 503  *
 504  * "fd" is the file descriptor to write the send stream to.
 505  *
 506  * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
 507  * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
 508  * records with drr_blksz > 128K.
 509  *
 510  * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
 511  * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
 512  * which the receiving system must support (as indicated by support
 513  * for the "embedded_data" feature).
 514  */
 515 int
 516 lzc_send(const char *snapname, const char *from, int fd,
 517     enum lzc_send_flags flags)
 518 {
 519         return (lzc_send_resume(snapname, from, fd, flags, 0, 0));
 520 }
 521
 522 int
 523 lzc_send_resume(const char *snapname, const char *from, int fd,
 524     enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
 525 {
 526         nvlist_t *args;
 527         int err;
 528
 529         args = fnvlist_alloc();
 530         fnvlist_add_int32(args, "fd", fd);
 531         if (from != NULL)
 532                 fnvlist_add_string(args, "fromsnap", from);
 533         if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
 534                 fnvlist_add_boolean(args, "largeblockok");
 535         if (flags & LZC_SEND_FLAG_EMBED_DATA)
 536                 fnvlist_add_boolean(args, "embedok");
 537         if (resumeobj != 0 || resumeoff != 0) {
 538                 fnvlist_add_uint64(args, "resume_object", resumeobj);
 539                 fnvlist_add_uint64(args, "resume_offset", resumeoff);
 540         }
 541         err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
 542         nvlist_free(args);
 543         return (err);
 544 }
 545
 546 /*
 547  * "from" can be NULL, a snapshot, or a bookmark.
 548  *
 549  * If from is NULL, a full (non-incremental) stream will be estimated.  This
 550  * is calculated very efficiently.
 551  *
 552  * If from is a snapshot, lzc_send_space uses the deadlists attached to
 553  * each snapshot to efficiently estimate the stream size.
 554  *
 555  * If from is a bookmark, the indirect blocks in the destination snapshot
 556  * are traversed, looking for blocks with a birth time since the creation TXG of
 557  * the snapshot this bookmark was created from.  This will result in
 558  * significantly more I/O and be less efficient than a send space estimation on
 559  * an equivalent snapshot.
 560  */
 561 int
 562 lzc_send_space(const char *snapname, const char *from, uint64_t *spacep)
 563 {
 564         nvlist_t *args;
 565         nvlist_t *result;
 566         int err;
 567
 568         args = fnvlist_alloc();
 569         if (from != NULL)
 570                 fnvlist_add_string(args, "from", from);
 571         err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
 572         nvlist_free(args);
 573         if (err == 0)
 574                 *spacep = fnvlist_lookup_uint64(result, "space");
 575         nvlist_free(result);
 576         return (err);
 577 }
 578
 579 static int
 580 recv_read(int fd, void *buf, int ilen)
 581 {
 582         char *cp = buf;
 583         int rv;
 584         int len = ilen;
 585
 586         do {
 587                 rv = read(fd, cp, len);
 588                 cp += rv;
 589                 len -= rv;
 590         } while (rv > 0);
 591
 592         if (rv < 0 || len != 0)
 593                 return (EIO);
 594
 595         return (0);
 596 }
 597
 598 static int
 599 lzc_receive_impl(const char *snapname, nvlist_t *props, const char *origin,
 600     boolean_t force, boolean_t resumable, int fd)
 601 {
 602         /*
 603          * The receive ioctl is still legacy, so we need to construct our own
 604          * zfs_cmd_t rather than using zfsc_ioctl().
 605          */
 606         zfs_cmd_t zc = { 0 };
 607         char *atp;
 608         char *packed = NULL;
 609         size_t size;
 610         int error;
 611
 612         ASSERT3S(g_refcount, >, 0);
 613
 614         /* zc_name is name of containing filesystem */
 615         (void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name));
 616         atp = strchr(zc.zc_name, '@');
 617         if (atp == NULL)
 618                 return (EINVAL);
 619         *atp = '\0';
 620
 621         /* if the fs does not exist, try its parent. */
 622         if (!lzc_exists(zc.zc_name)) {
 623                 char *slashp = strrchr(zc.zc_name, '/');
 624                 if (slashp == NULL)
 625                         return (ENOENT);
 626                 *slashp = '\0';
 627
 628         }
 629
 630         /* zc_value is full name of the snapshot to create */
 631         (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
 632
 633         if (props != NULL) {
 634                 /* zc_nvlist_src is props to set */
 635                 packed = fnvlist_pack(props, &size);
 636                 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
 637                 zc.zc_nvlist_src_size = size;
 638         }
 639
 640         /* zc_string is name of clone origin (if DRR_FLAG_CLONE) */
 641         if (origin != NULL)
 642                 (void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string));
 643
 644         /* zc_begin_record is non-byteswapped BEGIN record */
 645         error = recv_read(fd, &zc.zc_begin_record, sizeof (zc.zc_begin_record));
 646         if (error != 0)
 647                 goto out;
 648
 649         /* zc_cookie is fd to read from */
 650         zc.zc_cookie = fd;
 651
 652         /* zc guid is force flag */
 653         zc.zc_guid = force;
 654
 655         zc.zc_resumable = resumable;
 656
 657         /* zc_cleanup_fd is unused */
 658         zc.zc_cleanup_fd = -1;
 659
 660         error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
 661         if (error != 0)
 662                 error = errno;
 663
 664 out:
 665         if (packed != NULL)
 666                 fnvlist_pack_free(packed, size);
 667         free((void*)(uintptr_t)zc.zc_nvlist_dst);
 668         return (error);
 669 }
 670
 671 /*
 672  * The simplest receive case: receive from the specified fd, creating the
 673  * specified snapshot.  Apply the specified properties as "received" properties
 674  * (which can be overridden by locally-set properties).  If the stream is a
 675  * clone, its origin snapshot must be specified by 'origin'.  The 'force'
 676  * flag will cause the target filesystem to be rolled back or destroyed if
 677  * necessary to receive.
 678  *
 679  * Return 0 on success or an errno on failure.
 680  *
 681  * Note: this interface does not work on dedup'd streams
 682  * (those with DMU_BACKUP_FEATURE_DEDUP).
 683  */
 684 int
 685 lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
 686     boolean_t force, int fd)
 687 {
 688         return (lzc_receive_impl(snapname, props, origin, force, B_FALSE, fd));
 689 }
 690
 691 /*
 692  * Like lzc_receive, but if the receive fails due to premature stream
 693  * termination, the intermediate state will be preserved on disk.  In this
 694  * case, ECKSUM will be returned.  The receive may subsequently be resumed
 695  * with a resuming send stream generated by lzc_send_resume().
 696  */
 697 int
 698 lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
 699     boolean_t force, int fd)
 700 {
 701         return (lzc_receive_impl(snapname, props, origin, force, B_TRUE, fd));
 702 }
 703
 704 /*
 705  * Roll back this filesystem or volume to its most recent snapshot.
 706  * If snapnamebuf is not NULL, it will be filled in with the name
 707  * of the most recent snapshot.
 708  *
 709  * Return 0 on success or an errno on failure.
 710  */
 711 int
 712 lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
 713 {
 714         nvlist_t *args;
 715         nvlist_t *result;
 716         int err;
 717
 718         args = fnvlist_alloc();
 719         err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
 720         nvlist_free(args);
 721         if (err == 0 && snapnamebuf != NULL) {
 722                 const char *snapname = fnvlist_lookup_string(result, "target");
 723                 (void) strlcpy(snapnamebuf, snapname, snapnamelen);
 724         }
 725         return (err);
 726 }
 727
 728 /*
 729  * Creates bookmarks.
 730  *
 731  * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to
 732  * the name of the snapshot (e.g. "pool/fs@snap").  All the bookmarks and
 733  * snapshots must be in the same pool.
 734  *
 735  * The returned results nvlist will have an entry for each bookmark that failed.
 736  * The value will be the (int32) error code.
 737  *
 738  * The return value will be 0 if all bookmarks were created, otherwise it will
 739  * be the errno of a (undetermined) bookmarks that failed.
 740  */
 741 int
 742 lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
 743 {
 744         nvpair_t *elem;
 745         int error;
 746         char pool[MAXNAMELEN];
 747
 748         /* determine the pool name */
 749         elem = nvlist_next_nvpair(bookmarks, NULL);
 750         if (elem == NULL)
 751                 return (0);
 752         (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 753         pool[strcspn(pool, "/#")] = '\0';
 754
 755         error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
 756
 757         return (error);
 758 }
 759
 760 /*
 761  * Retrieve bookmarks.
 762  *
 763  * Retrieve the list of bookmarks for the given file system. The props
 764  * parameter is an nvlist of property names (with no values) that will be
 765  * returned for each bookmark.
 766  *
 767  * The following are valid properties on bookmarks, all of which are numbers
 768  * (represented as uint64 in the nvlist)
 769  *
 770  * "guid" - globally unique identifier of the snapshot it refers to
 771  * "createtxg" - txg when the snapshot it refers to was created
 772  * "creation" - timestamp when the snapshot it refers to was created
 773  *
 774  * The format of the returned nvlist as follows:
 775  * <short name of bookmark> -> {
 776  *     <name of property> -> {
 777  *         "value" -> uint64
 778  *     }
 779  *  }
 780  */
 781 int
 782 lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
 783 {
 784         return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
 785 }
 786
 787 /*
 788  * Destroys bookmarks.
 789  *
 790  * The keys in the bmarks nvlist are the bookmarks to be destroyed.
 791  * They must all be in the same pool.  Bookmarks are specified as
 792  * <fs>#<bmark>.
 793  *
 794  * Bookmarks that do not exist will be silently ignored.
 795  *
 796  * The return value will be 0 if all bookmarks that existed were destroyed.
 797  *
 798  * Otherwise the return value will be the errno of a (undetermined) bookmark
 799  * that failed, no bookmarks will be destroyed, and the errlist will have an
 800  * entry for each bookmarks that failed.  The value in the errlist will be
 801  * the (int32) error code.
 802  */
 803 int
 804 lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
 805 {
 806         nvpair_t *elem;
 807         int error;
 808         char pool[MAXNAMELEN];
 809
 810         /* determine the pool name */
 811         elem = nvlist_next_nvpair(bmarks, NULL);
 812         if (elem == NULL)
 813                 return (0);
 814         (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 815         pool[strcspn(pool, "/#")] = '\0';
 816
 817         error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
 818
 819         return (error);
 820 }