lib/libzfs/libzfs_mount.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  24  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  25  * Copyright (c) 2014, 2019 by Delphix. All rights reserved.
  26  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
  27  * Copyright 2017 RackTop Systems.
  28  * Copyright (c) 2018 Datto Inc.
  29  * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
  30  */
  31
  32 /*
  33  * Routines to manage ZFS mounts.  We separate all the nasty routines that have
  34  * to deal with the OS.  The following functions are the main entry points --
  35  * they are used by mount and unmount and when changing a filesystem's
  36  * mountpoint.
  37  *
  38  *      zfs_is_mounted()
  39  *      zfs_mount()
  40  *      zfs_unmount()
  41  *      zfs_unmountall()
  42  *
  43  * This file also contains the functions used to manage sharing filesystems via
  44  * NFS and iSCSI:
  45  *
  46  *      zfs_is_shared()
  47  *      zfs_share()
  48  *      zfs_unshare()
  49  *
  50  *      zfs_is_shared_nfs()
  51  *      zfs_is_shared_smb()
  52  *      zfs_share_proto()
  53  *      zfs_shareall();
  54  *      zfs_unshare_nfs()
  55  *      zfs_unshare_smb()
  56  *      zfs_unshareall_nfs()
  57  *      zfs_unshareall_smb()
  58  *      zfs_unshareall()
  59  *      zfs_unshareall_bypath()
  60  *
  61  * The following functions are available for pool consumers, and will
  62  * mount/unmount and share/unshare all datasets within pool:
  63  *
  64  *      zpool_enable_datasets()
  65  *      zpool_disable_datasets()
  66  */
  67
  68 #include <dirent.h>
  69 #include <dlfcn.h>
  70 #include <errno.h>
  71 #include <fcntl.h>
  72 #include <libgen.h>
  73 #include <libintl.h>
  74 #include <stdio.h>
  75 #include <stdlib.h>
  76 #include <strings.h>
  77 #include <unistd.h>
  78 #include <zone.h>
  79 #include <sys/mntent.h>
  80 #include <sys/mount.h>
  81 #include <sys/stat.h>
  82 #include <sys/vfs.h>
  83 #include <sys/dsl_crypt.h>
  84
  85 #include <libzfs.h>
  86
  87 #include "libzfs_impl.h"
  88 #include <thread_pool.h>
  89
  90 #include <libshare.h>
  91 #include <sys/systeminfo.h>
  92 #define MAXISALEN       257     /* based on sysinfo(2) man page */
  93
  94 static int mount_tp_nthr = 512; /* tpool threads for multi-threaded mounting */
  95
  96 static void zfs_mount_task(void *);
  97 static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
  98 zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
  99     zfs_share_proto_t);
 100
 101 /*
 102  * The share protocols table must be in the same order as the zfs_share_proto_t
 103  * enum in libzfs_impl.h
 104  */
 105 typedef struct {
 106         zfs_prop_t p_prop;
 107         char *p_name;
 108         int p_share_err;
 109         int p_unshare_err;
 110 } proto_table_t;
 111
 112 proto_table_t proto_table[PROTO_END] = {
 113         {ZFS_PROP_SHARENFS, "nfs", EZFS_SHARENFSFAILED, EZFS_UNSHARENFSFAILED},
 114         {ZFS_PROP_SHARESMB, "smb", EZFS_SHARESMBFAILED, EZFS_UNSHARESMBFAILED},
 115 };
 116
 117 zfs_share_proto_t nfs_only[] = {
 118         PROTO_NFS,
 119         PROTO_END
 120 };
 121
 122 zfs_share_proto_t smb_only[] = {
 123         PROTO_SMB,
 124         PROTO_END
 125 };
 126 zfs_share_proto_t share_all_proto[] = {
 127         PROTO_NFS,
 128         PROTO_SMB,
 129         PROTO_END
 130 };
 131
 132 /*
 133  * Search the sharetab for the given mountpoint and protocol, returning
 134  * a zfs_share_type_t value.
 135  */
 136 static zfs_share_type_t
 137 is_shared(libzfs_handle_t *hdl, const char *mountpoint, zfs_share_proto_t proto)
 138 {
 139         char buf[MAXPATHLEN], *tab;
 140         char *ptr;
 141
 142         if (hdl->libzfs_sharetab == NULL)
 143                 return (SHARED_NOT_SHARED);
 144
 145         /* Reopen ZFS_SHARETAB to prevent reading stale data from open file */
 146         if (freopen(ZFS_SHARETAB, "r", hdl->libzfs_sharetab) == NULL)
 147                 return (SHARED_NOT_SHARED);
 148
 149         (void) fseek(hdl->libzfs_sharetab, 0, SEEK_SET);
 150
 151         while (fgets(buf, sizeof (buf), hdl->libzfs_sharetab) != NULL) {
 152
 153                 /* the mountpoint is the first entry on each line */
 154                 if ((tab = strchr(buf, '\t')) == NULL)
 155                         continue;
 156
 157                 *tab = '\0';
 158                 if (strcmp(buf, mountpoint) == 0) {
 159                         /*
 160                          * the protocol field is the third field
 161                          * skip over second field
 162                          */
 163                         ptr = ++tab;
 164                         if ((tab = strchr(ptr, '\t')) == NULL)
 165                                 continue;
 166                         ptr = ++tab;
 167                         if ((tab = strchr(ptr, '\t')) == NULL)
 168                                 continue;
 169                         *tab = '\0';
 170                         if (strcmp(ptr,
 171                             proto_table[proto].p_name) == 0) {
 172                                 switch (proto) {
 173                                 case PROTO_NFS:
 174                                         return (SHARED_NFS);
 175                                 case PROTO_SMB:
 176                                         return (SHARED_SMB);
 177                                 default:
 178                                         return (0);
 179                                 }
 180                         }
 181                 }
 182         }
 183
 184         return (SHARED_NOT_SHARED);
 185 }
 186
 187 static boolean_t
 188 dir_is_empty_stat(const char *dirname)
 189 {
 190         struct stat st;
 191
 192         /*
 193          * We only want to return false if the given path is a non empty
 194          * directory, all other errors are handled elsewhere.
 195          */
 196         if (stat(dirname, &st) < 0 || !S_ISDIR(st.st_mode)) {
 197                 return (B_TRUE);
 198         }
 199
 200         /*
 201          * An empty directory will still have two entries in it, one
 202          * entry for each of "." and "..".
 203          */
 204         if (st.st_size > 2) {
 205                 return (B_FALSE);
 206         }
 207
 208         return (B_TRUE);
 209 }
 210
 211 static boolean_t
 212 dir_is_empty_readdir(const char *dirname)
 213 {
 214         DIR *dirp;
 215         struct dirent64 *dp;
 216         int dirfd;
 217
 218         if ((dirfd = openat(AT_FDCWD, dirname,
 219             O_RDONLY | O_NDELAY | O_LARGEFILE | O_CLOEXEC, 0)) < 0) {
 220                 return (B_TRUE);
 221         }
 222
 223         if ((dirp = fdopendir(dirfd)) == NULL) {
 224                 (void) close(dirfd);
 225                 return (B_TRUE);
 226         }
 227
 228         while ((dp = readdir64(dirp)) != NULL) {
 229
 230                 if (strcmp(dp->d_name, ".") == 0 ||
 231                     strcmp(dp->d_name, "..") == 0)
 232                         continue;
 233
 234                 (void) closedir(dirp);
 235                 return (B_FALSE);
 236         }
 237
 238         (void) closedir(dirp);
 239         return (B_TRUE);
 240 }
 241
 242 /*
 243  * Returns true if the specified directory is empty.  If we can't open the
 244  * directory at all, return true so that the mount can fail with a more
 245  * informative error message.
 246  */
 247 static boolean_t
 248 dir_is_empty(const char *dirname)
 249 {
 250         struct statfs64 st;
 251
 252         /*
 253          * If the statvfs call fails or the filesystem is not a ZFS
 254          * filesystem, fall back to the slow path which uses readdir.
 255          */
 256         if ((statfs64(dirname, &st) != 0) ||
 257             (st.f_type != ZFS_SUPER_MAGIC)) {
 258                 return (dir_is_empty_readdir(dirname));
 259         }
 260
 261         /*
 262          * At this point, we know the provided path is on a ZFS
 263          * filesystem, so we can use stat instead of readdir to
 264          * determine if the directory is empty or not. We try to avoid
 265          * using readdir because that requires opening "dirname"; this
 266          * open file descriptor can potentially end up in a child
 267          * process if there's a concurrent fork, thus preventing the
 268          * zfs_mount() from otherwise succeeding (the open file
 269          * descriptor inherited by the child process will cause the
 270          * parent's mount to fail with EBUSY). The performance
 271          * implications of replacing the open, read, and close with a
 272          * single stat is nice; but is not the main motivation for the
 273          * added complexity.
 274          */
 275         return (dir_is_empty_stat(dirname));
 276 }
 277
 278 /*
 279  * Checks to see if the mount is active.  If the filesystem is mounted, we fill
 280  * in 'where' with the current mountpoint, and return 1.  Otherwise, we return
 281  * 0.
 282  */
 283 boolean_t
 284 is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where)
 285 {
 286         struct mnttab entry;
 287
 288         if (libzfs_mnttab_find(zfs_hdl, special, &entry) != 0)
 289                 return (B_FALSE);
 290
 291         if (where != NULL)
 292                 *where = zfs_strdup(zfs_hdl, entry.mnt_mountp);
 293
 294         return (B_TRUE);
 295 }
 296
 297 boolean_t
 298 zfs_is_mounted(zfs_handle_t *zhp, char **where)
 299 {
 300         return (is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where));
 301 }
 302
 303 /*
 304  * Returns true if the given dataset is mountable, false otherwise.  Returns the
 305  * mountpoint in 'buf'.
 306  */
 307 static boolean_t
 308 zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen,
 309     zprop_source_t *source, int flags)
 310 {
 311         char sourceloc[MAXNAMELEN];
 312         zprop_source_t sourcetype;
 313
 314         if (!zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type,
 315             B_FALSE))
 316                 return (B_FALSE);
 317
 318         verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, buf, buflen,
 319             &sourcetype, sourceloc, sizeof (sourceloc), B_FALSE) == 0);
 320
 321         if (strcmp(buf, ZFS_MOUNTPOINT_NONE) == 0 ||
 322             strcmp(buf, ZFS_MOUNTPOINT_LEGACY) == 0)
 323                 return (B_FALSE);
 324
 325         if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_OFF)
 326                 return (B_FALSE);
 327
 328         if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
 329             getzoneid() == GLOBAL_ZONEID)
 330                 return (B_FALSE);
 331
 332         if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
 333             getzoneid() == GLOBAL_ZONEID)
 334                 return (B_FALSE);
 335
 336         if (zfs_prop_get_int(zhp, ZFS_PROP_REDACTED) && !(flags & MS_FORCE))
 337                 return (B_FALSE);
 338
 339         if (source)
 340                 *source = sourcetype;
 341
 342         return (B_TRUE);
 343 }
 344
 345 /*
 346  * The filesystem is mounted by invoking the system mount utility rather
 347  * than by the system call mount(2).  This ensures that the /etc/mtab
 348  * file is correctly locked for the update.  Performing our own locking
 349  * and /etc/mtab update requires making an unsafe assumption about how
 350  * the mount utility performs its locking.  Unfortunately, this also means
 351  * in the case of a mount failure we do not have the exact errno.  We must
 352  * make due with return value from the mount process.
 353  *
 354  * In the long term a shared library called libmount is under development
 355  * which provides a common API to address the locking and errno issues.
 356  * Once the standard mount utility has been updated to use this library
 357  * we can add an autoconf check to conditionally use it.
 358  *
 359  * http://www.kernel.org/pub/linux/utils/util-linux/libmount-docs/index.html
 360  */
 361
 362 static int
 363 do_mount(const char *src, const char *mntpt, char *opts)
 364 {
 365         char *argv[9] = {
 366             "/bin/mount",
 367             "--no-canonicalize",
 368             "-t", MNTTYPE_ZFS,
 369             "-o", opts,
 370             (char *)src,
 371             (char *)mntpt,
 372             (char *)NULL };
 373         int rc;
 374
 375         /* Return only the most critical mount error */
 376         rc = libzfs_run_process(argv[0], argv, STDOUT_VERBOSE|STDERR_VERBOSE);
 377         if (rc) {
 378                 if (rc & MOUNT_FILEIO)
 379                         return (EIO);
 380                 if (rc & MOUNT_USER)
 381                         return (EINTR);
 382                 if (rc & MOUNT_SOFTWARE)
 383                         return (EPIPE);
 384                 if (rc & MOUNT_BUSY)
 385                         return (EBUSY);
 386                 if (rc & MOUNT_SYSERR)
 387                         return (EAGAIN);
 388                 if (rc & MOUNT_USAGE)
 389                         return (EINVAL);
 390
 391                 return (ENXIO); /* Generic error */
 392         }
 393
 394         return (0);
 395 }
 396
 397 static int
 398 do_unmount(const char *mntpt, int flags)
 399 {
 400         char force_opt[] = "-f";
 401         char lazy_opt[] = "-l";
 402         char *argv[7] = {
 403             "/bin/umount",
 404             "-t", MNTTYPE_ZFS,
 405             NULL, NULL, NULL, NULL };
 406         int rc, count = 3;
 407
 408         if (flags & MS_FORCE) {
 409                 argv[count] = force_opt;
 410                 count++;
 411         }
 412
 413         if (flags & MS_DETACH) {
 414                 argv[count] = lazy_opt;
 415                 count++;
 416         }
 417
 418         argv[count] = (char *)mntpt;
 419         rc = libzfs_run_process(argv[0], argv, STDOUT_VERBOSE|STDERR_VERBOSE);
 420
 421         return (rc ? EINVAL : 0);
 422 }
 423
 424 static int
 425 zfs_add_option(zfs_handle_t *zhp, char *options, int len,
 426     zfs_prop_t prop, char *on, char *off)
 427 {
 428         char *source;
 429         uint64_t value;
 430
 431         /* Skip adding duplicate default options */
 432         if ((strstr(options, on) != NULL) || (strstr(options, off) != NULL))
 433                 return (0);
 434
 435         /*
 436          * zfs_prop_get_int() is not used to ensure our mount options
 437          * are not influenced by the current /proc/self/mounts contents.
 438          */
 439         value = getprop_uint64(zhp, prop, &source);
 440
 441         (void) strlcat(options, ",", len);
 442         (void) strlcat(options, value ? on : off, len);
 443
 444         return (0);
 445 }
 446
 447 static int
 448 zfs_add_options(zfs_handle_t *zhp, char *options, int len)
 449 {
 450         int error = 0;
 451
 452         error = zfs_add_option(zhp, options, len,
 453             ZFS_PROP_ATIME, MNTOPT_ATIME, MNTOPT_NOATIME);
 454         /*
 455          * don't add relatime/strictatime when atime=off, otherwise strictatime
 456          * will force atime=on
 457          */
 458         if (strstr(options, MNTOPT_NOATIME) == NULL) {
 459                 error = zfs_add_option(zhp, options, len,
 460                     ZFS_PROP_RELATIME, MNTOPT_RELATIME, MNTOPT_STRICTATIME);
 461         }
 462         error = error ? error : zfs_add_option(zhp, options, len,
 463             ZFS_PROP_DEVICES, MNTOPT_DEVICES, MNTOPT_NODEVICES);
 464         error = error ? error : zfs_add_option(zhp, options, len,
 465             ZFS_PROP_EXEC, MNTOPT_EXEC, MNTOPT_NOEXEC);
 466         error = error ? error : zfs_add_option(zhp, options, len,
 467             ZFS_PROP_READONLY, MNTOPT_RO, MNTOPT_RW);
 468         error = error ? error : zfs_add_option(zhp, options, len,
 469             ZFS_PROP_SETUID, MNTOPT_SETUID, MNTOPT_NOSETUID);
 470         error = error ? error : zfs_add_option(zhp, options, len,
 471             ZFS_PROP_NBMAND, MNTOPT_NBMAND, MNTOPT_NONBMAND);
 472
 473         return (error);
 474 }
 475
 476 /*
 477  * Mount the given filesystem.
 478  */
 479 int
 480 zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
 481 {
 482         struct stat buf;
 483         char mountpoint[ZFS_MAXPROPLEN];
 484         char mntopts[MNT_LINE_MAX];
 485         char overlay[ZFS_MAXPROPLEN];
 486         libzfs_handle_t *hdl = zhp->zfs_hdl;
 487         uint64_t keystatus;
 488         int remount = 0, rc;
 489
 490         if (options == NULL) {
 491                 (void) strlcpy(mntopts, MNTOPT_DEFAULTS, sizeof (mntopts));
 492         } else {
 493                 (void) strlcpy(mntopts, options, sizeof (mntopts));
 494         }
 495
 496         if (strstr(mntopts, MNTOPT_REMOUNT) != NULL)
 497                 remount = 1;
 498
 499         /*
 500          * If the pool is imported read-only then all mounts must be read-only
 501          */
 502         if (zpool_get_prop_int(zhp->zpool_hdl, ZPOOL_PROP_READONLY, NULL))
 503                 (void) strlcat(mntopts, "," MNTOPT_RO, sizeof (mntopts));
 504
 505         if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL,
 506             flags)) {
 507                 return (0);
 508         }
 509
 510         /*
 511          * Append default mount options which apply to the mount point.
 512          * This is done because under Linux (unlike Solaris) multiple mount
 513          * points may reference a single super block.  This means that just
 514          * given a super block there is no back reference to update the per
 515          * mount point options.
 516          */
 517         rc = zfs_add_options(zhp, mntopts, sizeof (mntopts));
 518         if (rc) {
 519                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 520                     "default options unavailable"));
 521                 return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
 522                     dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
 523                     mountpoint));
 524         }
 525
 526         /*
 527          * If the filesystem is encrypted the key must be loaded  in order to
 528          * mount. If the key isn't loaded, the MS_CRYPT flag decides whether
 529          * or not we attempt to load the keys. Note: we must call
 530          * zfs_refresh_properties() here since some callers of this function
 531          * (most notably zpool_enable_datasets()) may implicitly load our key
 532          * by loading the parent's key first.
 533          */
 534         if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
 535                 zfs_refresh_properties(zhp);
 536                 keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);
 537
 538                 /*
 539                  * If the key is unavailable and MS_CRYPT is set give the
 540                  * user a chance to enter the key. Otherwise just fail
 541                  * immediately.
 542                  */
 543                 if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) {
 544                         if (flags & MS_CRYPT) {
 545                                 rc = zfs_crypto_load_key(zhp, B_FALSE, NULL);
 546                                 if (rc)
 547                                         return (rc);
 548                         } else {
 549                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 550                                     "encryption key not loaded"));
 551                                 return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
 552                                     dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
 553                                     mountpoint));
 554                         }
 555                 }
 556
 557         }
 558
 559         /*
 560          * Append zfsutil option so the mount helper allow the mount
 561          */
 562         strlcat(mntopts, "," MNTOPT_ZFSUTIL, sizeof (mntopts));
 563
 564         /* Create the directory if it doesn't already exist */
 565         if (lstat(mountpoint, &buf) != 0) {
 566                 if (mkdirp(mountpoint, 0755) != 0) {
 567                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 568                             "failed to create mountpoint"));
 569                         return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
 570                             dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
 571                             mountpoint));
 572                 }
 573         }
 574
 575         /*
 576          * Overlay mounts are disabled by default but may be enabled
 577          * via the 'overlay' property or the 'zfs mount -O' option.
 578          */
 579         if (!(flags & MS_OVERLAY)) {
 580                 if (zfs_prop_get(zhp, ZFS_PROP_OVERLAY, overlay,
 581                     sizeof (overlay), NULL, NULL, 0, B_FALSE) == 0) {
 582                         if (strcmp(overlay, "on") == 0) {
 583                                 flags |= MS_OVERLAY;
 584                         }
 585                 }
 586         }
 587
 588         /*
 589          * Determine if the mountpoint is empty.  If so, refuse to perform the
 590          * mount.  We don't perform this check if 'remount' is
 591          * specified or if overlay option(-O) is given
 592          */
 593         if ((flags & MS_OVERLAY) == 0 && !remount &&
 594             !dir_is_empty(mountpoint)) {
 595                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 596                     "directory is not empty"));
 597                 return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
 598                     dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint));
 599         }
 600
 601         /* perform the mount */
 602         rc = do_mount(zfs_get_name(zhp), mountpoint, mntopts);
 603         if (rc) {
 604                 /*
 605                  * Generic errors are nasty, but there are just way too many
 606                  * from mount(), and they're well-understood.  We pick a few
 607                  * common ones to improve upon.
 608                  */
 609                 if (rc == EBUSY) {
 610                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 611                             "mountpoint or dataset is busy"));
 612                 } else if (rc == EPERM) {
 613                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 614                             "Insufficient privileges"));
 615                 } else if (rc == ENOTSUP) {
 616                         char buf[256];
 617                         int spa_version;
 618
 619                         VERIFY(zfs_spa_version(zhp, &spa_version) == 0);
 620                         (void) snprintf(buf, sizeof (buf),
 621                             dgettext(TEXT_DOMAIN, "Can't mount a version %lld "
 622                             "file system on a version %d pool. Pool must be"
 623                             " upgraded to mount this file system."),
 624                             (u_longlong_t)zfs_prop_get_int(zhp,
 625                             ZFS_PROP_VERSION), spa_version);
 626                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, buf));
 627                 } else {
 628                         zfs_error_aux(hdl, strerror(rc));
 629                 }
 630                 return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
 631                     dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
 632                     zhp->zfs_name));
 633         }
 634
 635         /* remove the mounted entry before re-adding on remount */
 636         if (remount)
 637                 libzfs_mnttab_remove(hdl, zhp->zfs_name);
 638
 639         /* add the mounted entry into our cache */
 640         libzfs_mnttab_add(hdl, zfs_get_name(zhp), mountpoint, mntopts);
 641         return (0);
 642 }
 643
 644 /*
 645  * Unmount a single filesystem.
 646  */
 647 static int
 648 unmount_one(libzfs_handle_t *hdl, const char *mountpoint, int flags)
 649 {
 650         int error;
 651
 652         error = do_unmount(mountpoint, flags);
 653         if (error != 0) {
 654                 return (zfs_error_fmt(hdl, EZFS_UMOUNTFAILED,
 655                     dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
 656                     mountpoint));
 657         }
 658
 659         return (0);
 660 }
 661
 662 /*
 663  * Unmount the given filesystem.
 664  */
 665 int
 666 zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
 667 {
 668         libzfs_handle_t *hdl = zhp->zfs_hdl;
 669         struct mnttab entry;
 670         char *mntpt = NULL;
 671         boolean_t encroot, unmounted = B_FALSE;
 672
 673         /* check to see if we need to unmount the filesystem */
 674         if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
 675             libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0)) {
 676                 /*
 677                  * mountpoint may have come from a call to
 678                  * getmnt/getmntany if it isn't NULL. If it is NULL,
 679                  * we know it comes from libzfs_mnttab_find which can
 680                  * then get freed later. We strdup it to play it safe.
 681                  */
 682                 if (mountpoint == NULL)
 683                         mntpt = zfs_strdup(hdl, entry.mnt_mountp);
 684                 else
 685                         mntpt = zfs_strdup(hdl, mountpoint);
 686
 687                 /*
 688                  * Unshare and unmount the filesystem
 689                  */
 690                 if (zfs_unshare_proto(zhp, mntpt, share_all_proto) != 0) {
 691                         free(mntpt);
 692                         return (-1);
 693                 }
 694
 695                 if (unmount_one(hdl, mntpt, flags) != 0) {
 696                         free(mntpt);
 697                         (void) zfs_shareall(zhp);
 698                         return (-1);
 699                 }
 700
 701                 libzfs_mnttab_remove(hdl, zhp->zfs_name);
 702                 free(mntpt);
 703                 unmounted = B_TRUE;
 704         }
 705
 706         /*
 707          * If the MS_CRYPT flag is provided we must ensure we attempt to
 708          * unload the dataset's key regardless of whether we did any work
 709          * to unmount it. We only do this for encryption roots.
 710          */
 711         if ((flags & MS_CRYPT) != 0 &&
 712             zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
 713                 zfs_refresh_properties(zhp);
 714
 715                 if (zfs_crypto_get_encryption_root(zhp, &encroot, NULL) != 0 &&
 716                     unmounted) {
 717                         (void) zfs_mount(zhp, NULL, 0);
 718                         return (-1);
 719                 }
 720
 721                 if (encroot && zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
 722                     ZFS_KEYSTATUS_AVAILABLE &&
 723                     zfs_crypto_unload_key(zhp) != 0) {
 724                         (void) zfs_mount(zhp, NULL, 0);
 725                         return (-1);
 726                 }
 727         }
 728
 729         return (0);
 730 }
 731
 732 /*
 733  * Unmount this filesystem and any children inheriting the mountpoint property.
 734  * To do this, just act like we're changing the mountpoint property, but don't
 735  * remount the filesystems afterwards.
 736  */
 737 int
 738 zfs_unmountall(zfs_handle_t *zhp, int flags)
 739 {
 740         prop_changelist_t *clp;
 741         int ret;
 742
 743         clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
 744             CL_GATHER_ITER_MOUNTED, flags);
 745         if (clp == NULL)
 746                 return (-1);
 747
 748         ret = changelist_prefix(clp);
 749         changelist_free(clp);
 750
 751         return (ret);
 752 }
 753
 754 boolean_t
 755 zfs_is_shared(zfs_handle_t *zhp)
 756 {
 757         zfs_share_type_t rc = 0;
 758         zfs_share_proto_t *curr_proto;
 759
 760         if (ZFS_IS_VOLUME(zhp))
 761                 return (B_FALSE);
 762
 763         for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
 764             curr_proto++)
 765                 rc |= zfs_is_shared_proto(zhp, NULL, *curr_proto);
 766
 767         return (rc ? B_TRUE : B_FALSE);
 768 }
 769
 770 int
 771 zfs_share(zfs_handle_t *zhp)
 772 {
 773         assert(!ZFS_IS_VOLUME(zhp));
 774         return (zfs_share_proto(zhp, share_all_proto));
 775 }
 776
 777 int
 778 zfs_unshare(zfs_handle_t *zhp)
 779 {
 780         assert(!ZFS_IS_VOLUME(zhp));
 781         return (zfs_unshareall(zhp));
 782 }
 783
 784 /*
 785  * Check to see if the filesystem is currently shared.
 786  */
 787 zfs_share_type_t
 788 zfs_is_shared_proto(zfs_handle_t *zhp, char **where, zfs_share_proto_t proto)
 789 {
 790         char *mountpoint;
 791         zfs_share_type_t rc;
 792
 793         if (!zfs_is_mounted(zhp, &mountpoint))
 794                 return (SHARED_NOT_SHARED);
 795
 796         if ((rc = is_shared(zhp->zfs_hdl, mountpoint, proto))
 797             != SHARED_NOT_SHARED) {
 798                 if (where != NULL)
 799                         *where = mountpoint;
 800                 else
 801                         free(mountpoint);
 802                 return (rc);
 803         } else {
 804                 free(mountpoint);
 805                 return (SHARED_NOT_SHARED);
 806         }
 807 }
 808
 809 boolean_t
 810 zfs_is_shared_nfs(zfs_handle_t *zhp, char **where)
 811 {
 812         return (zfs_is_shared_proto(zhp, where,
 813             PROTO_NFS) != SHARED_NOT_SHARED);
 814 }
 815
 816 boolean_t
 817 zfs_is_shared_smb(zfs_handle_t *zhp, char **where)
 818 {
 819         return (zfs_is_shared_proto(zhp, where,
 820             PROTO_SMB) != SHARED_NOT_SHARED);
 821 }
 822
 823 /*
 824  * zfs_init_libshare(zhandle, service)
 825  *
 826  * Initialize the libshare API if it hasn't already been initialized.
 827  * In all cases it returns 0 if it succeeded and an error if not. The
 828  * service value is which part(s) of the API to initialize and is a
 829  * direct map to the libshare sa_init(service) interface.
 830  */
 831 int
 832 zfs_init_libshare(libzfs_handle_t *zhandle, int service)
 833 {
 834         int ret = SA_OK;
 835
 836         if (ret == SA_OK && zhandle->libzfs_shareflags & ZFSSHARE_MISS) {
 837                 /*
 838                  * We had a cache miss. Most likely it is a new ZFS
 839                  * dataset that was just created. We want to make sure
 840                  * so check timestamps to see if a different process
 841                  * has updated any of the configuration. If there was
 842                  * some non-ZFS change, we need to re-initialize the
 843                  * internal cache.
 844                  */
 845                 zhandle->libzfs_shareflags &= ~ZFSSHARE_MISS;
 846                 if (sa_needs_refresh(zhandle->libzfs_sharehdl)) {
 847                         zfs_uninit_libshare(zhandle);
 848                         zhandle->libzfs_sharehdl = sa_init(service);
 849                 }
 850         }
 851
 852         if (ret == SA_OK && zhandle && zhandle->libzfs_sharehdl == NULL)
 853                 zhandle->libzfs_sharehdl = sa_init(service);
 854
 855         if (ret == SA_OK && zhandle->libzfs_sharehdl == NULL)
 856                 ret = SA_NO_MEMORY;
 857
 858         return (ret);
 859 }
 860
 861 /*
 862  * zfs_uninit_libshare(zhandle)
 863  *
 864  * Uninitialize the libshare API if it hasn't already been
 865  * uninitialized. It is OK to call multiple times.
 866  */
 867 void
 868 zfs_uninit_libshare(libzfs_handle_t *zhandle)
 869 {
 870         if (zhandle != NULL && zhandle->libzfs_sharehdl != NULL) {
 871                 sa_fini(zhandle->libzfs_sharehdl);
 872                 zhandle->libzfs_sharehdl = NULL;
 873         }
 874 }
 875
 876 /*
 877  * zfs_parse_options(options, proto)
 878  *
 879  * Call the legacy parse interface to get the protocol specific
 880  * options using the NULL arg to indicate that this is a "parse" only.
 881  */
 882 int
 883 zfs_parse_options(char *options, zfs_share_proto_t proto)
 884 {
 885         return (sa_parse_legacy_options(NULL, options,
 886             proto_table[proto].p_name));
 887 }
 888
 889 /*
 890  * Share the given filesystem according to the options in the specified
 891  * protocol specific properties (sharenfs, sharesmb).  We rely
 892  * on "libshare" to do the dirty work for us.
 893  */
 894 static int
 895 zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
 896 {
 897         char mountpoint[ZFS_MAXPROPLEN];
 898         char shareopts[ZFS_MAXPROPLEN];
 899         char sourcestr[ZFS_MAXPROPLEN];
 900         libzfs_handle_t *hdl = zhp->zfs_hdl;
 901         sa_share_t share;
 902         zfs_share_proto_t *curr_proto;
 903         zprop_source_t sourcetype;
 904         int ret;
 905
 906         if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL, 0))
 907                 return (0);
 908
 909         for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) {
 910                 /*
 911                  * Return success if there are no share options.
 912                  */
 913                 if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop,
 914                     shareopts, sizeof (shareopts), &sourcetype, sourcestr,
 915                     ZFS_MAXPROPLEN, B_FALSE) != 0 ||
 916                     strcmp(shareopts, "off") == 0)
 917                         continue;
 918
 919                 ret = zfs_init_libshare(hdl, SA_INIT_SHARE_API);
 920                 if (ret != SA_OK) {
 921                         (void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
 922                             dgettext(TEXT_DOMAIN, "cannot share '%s': %s"),
 923                             zfs_get_name(zhp), sa_errorstr(ret));
 924                         return (-1);
 925                 }
 926
 927                 /*
 928                  * If the 'zoned' property is set, then zfs_is_mountable()
 929                  * will have already bailed out if we are in the global zone.
 930                  * But local zones cannot be NFS servers, so we ignore it for
 931                  * local zones as well.
 932                  */
 933                 if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))
 934                         continue;
 935
 936                 share = sa_find_share(hdl->libzfs_sharehdl, mountpoint);
 937                 if (share == NULL) {
 938                         /*
 939                          * This may be a new file system that was just
 940                          * created so isn't in the internal cache
 941                          * (second time through). Rather than
 942                          * reloading the entire configuration, we can
 943                          * assume ZFS has done the checking and it is
 944                          * safe to add this to the internal
 945                          * configuration.
 946                          */
 947                         if (sa_zfs_process_share(hdl->libzfs_sharehdl,
 948                             NULL, NULL, mountpoint,
 949                             proto_table[*curr_proto].p_name, sourcetype,
 950                             shareopts, sourcestr, zhp->zfs_name) != SA_OK) {
 951                                 (void) zfs_error_fmt(hdl,
 952                                     proto_table[*curr_proto].p_share_err,
 953                                     dgettext(TEXT_DOMAIN, "cannot share '%s'"),
 954                                     zfs_get_name(zhp));
 955                                 return (-1);
 956                         }
 957                         hdl->libzfs_shareflags |= ZFSSHARE_MISS;
 958                         share = sa_find_share(hdl->libzfs_sharehdl,
 959                             mountpoint);
 960                 }
 961                 if (share != NULL) {
 962                         int err;
 963                         err = sa_enable_share(share,
 964                             proto_table[*curr_proto].p_name);
 965                         if (err != SA_OK) {
 966                                 (void) zfs_error_fmt(hdl,
 967                                     proto_table[*curr_proto].p_share_err,
 968                                     dgettext(TEXT_DOMAIN, "cannot share '%s'"),
 969                                     zfs_get_name(zhp));
 970                                 return (-1);
 971                         }
 972                 } else {
 973                         (void) zfs_error_fmt(hdl,
 974                             proto_table[*curr_proto].p_share_err,
 975                             dgettext(TEXT_DOMAIN, "cannot share '%s'"),
 976                             zfs_get_name(zhp));
 977                         return (-1);
 978                 }
 979
 980         }
 981         return (0);
 982 }
 983
 984
 985 int
 986 zfs_share_nfs(zfs_handle_t *zhp)
 987 {
 988         return (zfs_share_proto(zhp, nfs_only));
 989 }
 990
 991 int
 992 zfs_share_smb(zfs_handle_t *zhp)
 993 {
 994         return (zfs_share_proto(zhp, smb_only));
 995 }
 996
 997 int
 998 zfs_shareall(zfs_handle_t *zhp)
 999 {
1000         return (zfs_share_proto(zhp, share_all_proto));
1001 }
1002
1003 /*
1004  * Unshare a filesystem by mountpoint.
1005  */
1006 static int
1007 unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint,
1008     zfs_share_proto_t proto)
1009 {
1010         sa_share_t share;
1011         int err;
1012         char *mntpt;
1013         /*
1014          * Mountpoint could get trashed if libshare calls getmntany
1015          * which it does during API initialization, so strdup the
1016          * value.
1017          */
1018         mntpt = zfs_strdup(hdl, mountpoint);
1019
1020         /* make sure libshare initialized */
1021         if ((err = zfs_init_libshare(hdl, SA_INIT_SHARE_API)) != SA_OK) {
1022                 free(mntpt);    /* don't need the copy anymore */
1023                 return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
1024                     dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
1025                     name, sa_errorstr(err)));
1026         }
1027
1028         share = sa_find_share(hdl->libzfs_sharehdl, mntpt);
1029         free(mntpt);    /* don't need the copy anymore */
1030
1031         if (share != NULL) {
1032                 err = sa_disable_share(share, proto_table[proto].p_name);
1033                 if (err != SA_OK) {
1034                         return (zfs_error_fmt(hdl,
1035                             proto_table[proto].p_unshare_err,
1036                             dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
1037                             name, sa_errorstr(err)));
1038                 }
1039         } else {
1040                 return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
1041                     dgettext(TEXT_DOMAIN, "cannot unshare '%s': not found"),
1042                     name));
1043         }
1044         return (0);
1045 }
1046
1047 /*
1048  * Unshare the given filesystem.
1049  */
1050 int
1051 zfs_unshare_proto(zfs_handle_t *zhp, const char *mountpoint,
1052     zfs_share_proto_t *proto)
1053 {
1054         libzfs_handle_t *hdl = zhp->zfs_hdl;
1055         struct mnttab entry;
1056         char *mntpt = NULL;
1057
1058         /* check to see if need to unmount the filesystem */
1059         if (mountpoint != NULL)
1060                 mntpt = zfs_strdup(hdl, mountpoint);
1061
1062         if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
1063             libzfs_mnttab_find(hdl, zfs_get_name(zhp), &entry) == 0)) {
1064                 zfs_share_proto_t *curr_proto;
1065
1066                 if (mountpoint == NULL)
1067                         mntpt = zfs_strdup(zhp->zfs_hdl, entry.mnt_mountp);
1068
1069                 for (curr_proto = proto; *curr_proto != PROTO_END;
1070                     curr_proto++) {
1071
1072                         if (is_shared(hdl, mntpt, *curr_proto) &&
1073                             unshare_one(hdl, zhp->zfs_name,
1074                             mntpt, *curr_proto) != 0) {
1075                                 if (mntpt != NULL)
1076                                         free(mntpt);
1077                                 return (-1);
1078                         }
1079                 }
1080         }
1081         if (mntpt != NULL)
1082                 free(mntpt);
1083
1084         return (0);
1085 }
1086
1087 int
1088 zfs_unshare_nfs(zfs_handle_t *zhp, const char *mountpoint)
1089 {
1090         return (zfs_unshare_proto(zhp, mountpoint, nfs_only));
1091 }
1092
1093 int
1094 zfs_unshare_smb(zfs_handle_t *zhp, const char *mountpoint)
1095 {
1096         return (zfs_unshare_proto(zhp, mountpoint, smb_only));
1097 }
1098
1099 /*
1100  * Same as zfs_unmountall(), but for NFS and SMB unshares.
1101  */
1102 int
1103 zfs_unshareall_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
1104 {
1105         prop_changelist_t *clp;
1106         int ret;
1107
1108         clp = changelist_gather(zhp, ZFS_PROP_SHARENFS, 0, 0);
1109         if (clp == NULL)
1110                 return (-1);
1111
1112         ret = changelist_unshare(clp, proto);
1113         changelist_free(clp);
1114
1115         return (ret);
1116 }
1117
1118 int
1119 zfs_unshareall_nfs(zfs_handle_t *zhp)
1120 {
1121         return (zfs_unshareall_proto(zhp, nfs_only));
1122 }
1123
1124 int
1125 zfs_unshareall_smb(zfs_handle_t *zhp)
1126 {
1127         return (zfs_unshareall_proto(zhp, smb_only));
1128 }
1129
1130 int
1131 zfs_unshareall(zfs_handle_t *zhp)
1132 {
1133         return (zfs_unshareall_proto(zhp, share_all_proto));
1134 }
1135
1136 int
1137 zfs_unshareall_bypath(zfs_handle_t *zhp, const char *mountpoint)
1138 {
1139         return (zfs_unshare_proto(zhp, mountpoint, share_all_proto));
1140 }
1141
1142 int
1143 zfs_unshareall_bytype(zfs_handle_t *zhp, const char *mountpoint,
1144     const char *proto)
1145 {
1146         if (proto == NULL)
1147                 return (zfs_unshare_proto(zhp, mountpoint, share_all_proto));
1148         if (strcmp(proto, "nfs") == 0)
1149                 return (zfs_unshare_proto(zhp, mountpoint, nfs_only));
1150         else if (strcmp(proto, "smb") == 0)
1151                 return (zfs_unshare_proto(zhp, mountpoint, smb_only));
1152         else
1153                 return (1);
1154 }
1155
1156 /*
1157  * Remove the mountpoint associated with the current dataset, if necessary.
1158  * We only remove the underlying directory if:
1159  *
1160  *      - The mountpoint is not 'none' or 'legacy'
1161  *      - The mountpoint is non-empty
1162  *      - The mountpoint is the default or inherited
1163  *      - The 'zoned' property is set, or we're in a local zone
1164  *
1165  * Any other directories we leave alone.
1166  */
1167 void
1168 remove_mountpoint(zfs_handle_t *zhp)
1169 {
1170         char mountpoint[ZFS_MAXPROPLEN];
1171         zprop_source_t source;
1172
1173         if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), &source, 0))
1174                 return;
1175
1176         if (source == ZPROP_SRC_DEFAULT ||
1177             source == ZPROP_SRC_INHERITED) {
1178                 /*
1179                  * Try to remove the directory, silently ignoring any errors.
1180                  * The filesystem may have since been removed or moved around,
1181                  * and this error isn't really useful to the administrator in
1182                  * any way.
1183                  */
1184                 (void) rmdir(mountpoint);
1185         }
1186 }
1187
1188 /*
1189  * Add the given zfs handle to the cb_handles array, dynamically reallocating
1190  * the array if it is out of space.
1191  */
1192 void
1193 libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp)
1194 {
1195         if (cbp->cb_alloc == cbp->cb_used) {
1196                 size_t newsz;
1197                 zfs_handle_t **newhandles;
1198
1199                 newsz = cbp->cb_alloc != 0 ? cbp->cb_alloc * 2 : 64;
1200                 newhandles = zfs_realloc(zhp->zfs_hdl,
1201                     cbp->cb_handles, cbp->cb_alloc * sizeof (zfs_handle_t *),
1202                     newsz * sizeof (zfs_handle_t *));
1203                 cbp->cb_handles = newhandles;
1204                 cbp->cb_alloc = newsz;
1205         }
1206         cbp->cb_handles[cbp->cb_used++] = zhp;
1207 }
1208
1209 /*
1210  * Recursive helper function used during file system enumeration
1211  */
1212 static int
1213 zfs_iter_cb(zfs_handle_t *zhp, void *data)
1214 {
1215         get_all_cb_t *cbp = data;
1216
1217         if (!(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM)) {
1218                 zfs_close(zhp);
1219                 return (0);
1220         }
1221
1222         if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_NOAUTO) {
1223                 zfs_close(zhp);
1224                 return (0);
1225         }
1226
1227         if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
1228             ZFS_KEYSTATUS_UNAVAILABLE) {
1229                 zfs_close(zhp);
1230                 return (0);
1231         }
1232
1233         /*
1234          * If this filesystem is inconsistent and has a receive resume
1235          * token, we can not mount it.
1236          */
1237         if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) &&
1238             zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
1239             NULL, 0, NULL, NULL, 0, B_TRUE) == 0) {
1240                 zfs_close(zhp);
1241                 return (0);
1242         }
1243
1244         libzfs_add_handle(cbp, zhp);
1245         if (zfs_iter_filesystems(zhp, zfs_iter_cb, cbp) != 0) {
1246                 zfs_close(zhp);
1247                 return (-1);
1248         }
1249         return (0);
1250 }
1251
1252 /*
1253  * Sort comparator that compares two mountpoint paths. We sort these paths so
1254  * that subdirectories immediately follow their parents. This means that we
1255  * effectively treat the '/' character as the lowest value non-nul char.
1256  * Since filesystems from non-global zones can have the same mountpoint
1257  * as other filesystems, the comparator sorts global zone filesystems to
1258  * the top of the list. This means that the global zone will traverse the
1259  * filesystem list in the correct order and can stop when it sees the
1260  * first zoned filesystem. In a non-global zone, only the delegated
1261  * filesystems are seen.
1262  *
1263  * An example sorted list using this comparator would look like:
1264  *
1265  * /foo
1266  * /foo/bar
1267  * /foo/bar/baz
1268  * /foo/baz
1269  * /foo.bar
1270  * /foo (NGZ1)
1271  * /foo (NGZ2)
1272  *
1273  * The mounting code depends on this ordering to deterministically iterate
1274  * over filesystems in order to spawn parallel mount tasks.
1275  */
1276 static int
1277 mountpoint_cmp(const void *arga, const void *argb)
1278 {
1279         zfs_handle_t *const *zap = arga;
1280         zfs_handle_t *za = *zap;
1281         zfs_handle_t *const *zbp = argb;
1282         zfs_handle_t *zb = *zbp;
1283         char mounta[MAXPATHLEN];
1284         char mountb[MAXPATHLEN];
1285         const char *a = mounta;
1286         const char *b = mountb;
1287         boolean_t gota, gotb;
1288         uint64_t zoneda, zonedb;
1289
1290         zoneda = zfs_prop_get_int(za, ZFS_PROP_ZONED);
1291         zonedb = zfs_prop_get_int(zb, ZFS_PROP_ZONED);
1292         if (zoneda && !zonedb)
1293                 return (1);
1294         if (!zoneda && zonedb)
1295                 return (-1);
1296
1297         gota = (zfs_get_type(za) == ZFS_TYPE_FILESYSTEM);
1298         if (gota) {
1299                 verify(zfs_prop_get(za, ZFS_PROP_MOUNTPOINT, mounta,
1300                     sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
1301         }
1302         gotb = (zfs_get_type(zb) == ZFS_TYPE_FILESYSTEM);
1303         if (gotb) {
1304                 verify(zfs_prop_get(zb, ZFS_PROP_MOUNTPOINT, mountb,
1305                     sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
1306         }
1307
1308         if (gota && gotb) {
1309                 while (*a != '\0' && (*a == *b)) {
1310                         a++;
1311                         b++;
1312                 }
1313                 if (*a == *b)
1314                         return (0);
1315                 if (*a == '\0')
1316                         return (-1);
1317                 if (*b == '\0')
1318                         return (1);
1319                 if (*a == '/')
1320                         return (-1);
1321                 if (*b == '/')
1322                         return (1);
1323                 return (*a < *b ? -1 : *a > *b);
1324         }
1325
1326         if (gota)
1327                 return (-1);
1328         if (gotb)
1329                 return (1);
1330
1331         /*
1332          * If neither filesystem has a mountpoint, revert to sorting by
1333          * dataset name.
1334          */
1335         return (strcmp(zfs_get_name(za), zfs_get_name(zb)));
1336 }
1337
1338 /*
1339  * Return true if path2 is a child of path1.
1340  */
1341 static boolean_t
1342 libzfs_path_contains(const char *path1, const char *path2)
1343 {
1344         return (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/');
1345 }
1346
1347 /*
1348  * Given a mountpoint specified by idx in the handles array, find the first
1349  * non-descendent of that mountpoint and return its index. Descendant paths
1350  * start with the parent's path. This function relies on the ordering
1351  * enforced by mountpoint_cmp().
1352  */
1353 static int
1354 non_descendant_idx(zfs_handle_t **handles, size_t num_handles, int idx)
1355 {
1356         char parent[ZFS_MAXPROPLEN];
1357         char child[ZFS_MAXPROPLEN];
1358         int i;
1359
1360         verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, parent,
1361             sizeof (parent), NULL, NULL, 0, B_FALSE) == 0);
1362
1363         for (i = idx + 1; i < num_handles; i++) {
1364                 verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, child,
1365                     sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
1366                 if (!libzfs_path_contains(parent, child))
1367                         break;
1368         }
1369         return (i);
1370 }
1371
1372 typedef struct mnt_param {
1373         libzfs_handle_t *mnt_hdl;
1374         tpool_t         *mnt_tp;
1375         zfs_handle_t    **mnt_zhps; /* filesystems to mount */
1376         size_t          mnt_num_handles;
1377         int             mnt_idx;        /* Index of selected entry to mount */
1378         zfs_iter_f      mnt_func;
1379         void            *mnt_data;
1380 } mnt_param_t;
1381
1382 /*
1383  * Allocate and populate the parameter struct for mount function, and
1384  * schedule mounting of the entry selected by idx.
1385  */
1386 static void
1387 zfs_dispatch_mount(libzfs_handle_t *hdl, zfs_handle_t **handles,
1388     size_t num_handles, int idx, zfs_iter_f func, void *data, tpool_t *tp)
1389 {
1390         mnt_param_t *mnt_param = zfs_alloc(hdl, sizeof (mnt_param_t));
1391
1392         mnt_param->mnt_hdl = hdl;
1393         mnt_param->mnt_tp = tp;
1394         mnt_param->mnt_zhps = handles;
1395         mnt_param->mnt_num_handles = num_handles;
1396         mnt_param->mnt_idx = idx;
1397         mnt_param->mnt_func = func;
1398         mnt_param->mnt_data = data;
1399
1400         (void) tpool_dispatch(tp, zfs_mount_task, (void*)mnt_param);
1401 }
1402
1403 /*
1404  * This is the structure used to keep state of mounting or sharing operations
1405  * during a call to zpool_enable_datasets().
1406  */
1407 typedef struct mount_state {
1408         /*
1409          * ms_mntstatus is set to -1 if any mount fails. While multiple threads
1410          * could update this variable concurrently, no synchronization is
1411          * needed as it's only ever set to -1.
1412          */
1413         int             ms_mntstatus;
1414         int             ms_mntflags;
1415         const char      *ms_mntopts;
1416 } mount_state_t;
1417
1418 static int
1419 zfs_mount_one(zfs_handle_t *zhp, void *arg)
1420 {
1421         mount_state_t *ms = arg;
1422         int ret = 0;
1423
1424         /*
1425          * don't attempt to mount encrypted datasets with
1426          * unloaded keys
1427          */
1428         if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
1429             ZFS_KEYSTATUS_UNAVAILABLE)
1430                 return (0);
1431
1432         if (zfs_mount(zhp, ms->ms_mntopts, ms->ms_mntflags) != 0)
1433                 ret = ms->ms_mntstatus = -1;
1434         return (ret);
1435 }
1436
1437 static int
1438 zfs_share_one(zfs_handle_t *zhp, void *arg)
1439 {
1440         mount_state_t *ms = arg;
1441         int ret = 0;
1442
1443         if (zfs_share(zhp) != 0)
1444                 ret = ms->ms_mntstatus = -1;
1445         return (ret);
1446 }
1447
1448 /*
1449  * Thread pool function to mount one file system. On completion, it finds and
1450  * schedules its children to be mounted. This depends on the sorting done in
1451  * zfs_foreach_mountpoint(). Note that the degenerate case (chain of entries
1452  * each descending from the previous) will have no parallelism since we always
1453  * have to wait for the parent to finish mounting before we can schedule
1454  * its children.
1455  */
1456 static void
1457 zfs_mount_task(void *arg)
1458 {
1459         mnt_param_t *mp = arg;
1460         int idx = mp->mnt_idx;
1461         zfs_handle_t **handles = mp->mnt_zhps;
1462         size_t num_handles = mp->mnt_num_handles;
1463         char mountpoint[ZFS_MAXPROPLEN];
1464
1465         verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, mountpoint,
1466             sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
1467
1468         if (mp->mnt_func(handles[idx], mp->mnt_data) != 0)
1469                 return;
1470
1471         /*
1472          * We dispatch tasks to mount filesystems with mountpoints underneath
1473          * this one. We do this by dispatching the next filesystem with a
1474          * descendant mountpoint of the one we just mounted, then skip all of
1475          * its descendants, dispatch the next descendant mountpoint, and so on.
1476          * The non_descendant_idx() function skips over filesystems that are
1477          * descendants of the filesystem we just dispatched.
1478          */
1479         for (int i = idx + 1; i < num_handles;
1480             i = non_descendant_idx(handles, num_handles, i)) {
1481                 char child[ZFS_MAXPROPLEN];
1482                 verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT,
1483                     child, sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
1484
1485                 if (!libzfs_path_contains(mountpoint, child))
1486                         break; /* not a descendant, return */
1487                 zfs_dispatch_mount(mp->mnt_hdl, handles, num_handles, i,
1488                     mp->mnt_func, mp->mnt_data, mp->mnt_tp);
1489         }
1490         free(mp);
1491 }
1492
1493 /*
1494  * Issue the func callback for each ZFS handle contained in the handles
1495  * array. This function is used to mount all datasets, and so this function
1496  * guarantees that filesystems for parent mountpoints are called before their
1497  * children. As such, before issuing any callbacks, we first sort the array
1498  * of handles by mountpoint.
1499  *
1500  * Callbacks are issued in one of two ways:
1501  *
1502  * 1. Sequentially: If the parallel argument is B_FALSE or the ZFS_SERIAL_MOUNT
1503  *    environment variable is set, then we issue callbacks sequentially.
1504  *
1505  * 2. In parallel: If the parallel argument is B_TRUE and the ZFS_SERIAL_MOUNT
1506  *    environment variable is not set, then we use a tpool to dispatch threads
1507  *    to mount filesystems in parallel. This function dispatches tasks to mount
1508  *    the filesystems at the top-level mountpoints, and these tasks in turn
1509  *    are responsible for recursively mounting filesystems in their children
1510  *    mountpoints.
1511  */
1512 void
1513 zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,
1514     size_t num_handles, zfs_iter_f func, void *data, boolean_t parallel)
1515 {
1516         zoneid_t zoneid = getzoneid();
1517
1518         /*
1519          * The ZFS_SERIAL_MOUNT environment variable is an undocumented
1520          * variable that can be used as a convenience to do a/b comparison
1521          * of serial vs. parallel mounting.
1522          */
1523         boolean_t serial_mount = !parallel ||
1524             (getenv("ZFS_SERIAL_MOUNT") != NULL);
1525
1526         /*
1527          * Sort the datasets by mountpoint. See mountpoint_cmp for details
1528          * of how these are sorted.
1529          */
1530         qsort(handles, num_handles, sizeof (zfs_handle_t *), mountpoint_cmp);
1531
1532         if (serial_mount) {
1533                 for (int i = 0; i < num_handles; i++) {
1534                         func(handles[i], data);
1535                 }
1536                 return;
1537         }
1538
1539         /*
1540          * Issue the callback function for each dataset using a parallel
1541          * algorithm that uses a thread pool to manage threads.
1542          */
1543         tpool_t *tp = tpool_create(1, mount_tp_nthr, 0, NULL);
1544
1545         /*
1546          * There may be multiple "top level" mountpoints outside of the pool's
1547          * root mountpoint, e.g.: /foo /bar. Dispatch a mount task for each of
1548          * these.
1549          */
1550         for (int i = 0; i < num_handles;
1551             i = non_descendant_idx(handles, num_handles, i)) {
1552                 /*
1553                  * Since the mountpoints have been sorted so that the zoned
1554                  * filesystems are at the end, a zoned filesystem seen from
1555                  * the global zone means that we're done.
1556                  */
1557                 if (zoneid == GLOBAL_ZONEID &&
1558                     zfs_prop_get_int(handles[i], ZFS_PROP_ZONED))
1559                         break;
1560                 zfs_dispatch_mount(hdl, handles, num_handles, i, func, data,
1561                     tp);
1562         }
1563
1564         tpool_wait(tp); /* wait for all scheduled mounts to complete */
1565         tpool_destroy(tp);
1566 }
1567
1568 /*
1569  * Mount and share all datasets within the given pool.  This assumes that no
1570  * datasets within the pool are currently mounted.
1571  */
1572 #pragma weak zpool_mount_datasets = zpool_enable_datasets
1573 int
1574 zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
1575 {
1576         get_all_cb_t cb = { 0 };
1577         mount_state_t ms = { 0 };
1578         zfs_handle_t *zfsp;
1579         int ret = 0;
1580
1581         if ((zfsp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
1582             ZFS_TYPE_DATASET)) == NULL)
1583                 goto out;
1584
1585         /*
1586          * Gather all non-snapshot datasets within the pool. Start by adding
1587          * the root filesystem for this pool to the list, and then iterate
1588          * over all child filesystems.
1589          */
1590         libzfs_add_handle(&cb, zfsp);
1591         if (zfs_iter_filesystems(zfsp, zfs_iter_cb, &cb) != 0)
1592                 goto out;
1593
1594         /*
1595          * Mount all filesystems
1596          */
1597         ms.ms_mntopts = mntopts;
1598         ms.ms_mntflags = flags;
1599         zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
1600             zfs_mount_one, &ms, B_TRUE);
1601         if (ms.ms_mntstatus != 0)
1602                 ret = ms.ms_mntstatus;
1603
1604         /*
1605          * Share all filesystems that need to be shared. This needs to be
1606          * a separate pass because libshare is not mt-safe, and so we need
1607          * to share serially.
1608          */
1609         ms.ms_mntstatus = 0;
1610         zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
1611             zfs_share_one, &ms, B_FALSE);
1612         if (ms.ms_mntstatus != 0)
1613                 ret = ms.ms_mntstatus;
1614
1615 out:
1616         for (int i = 0; i < cb.cb_used; i++)
1617                 zfs_close(cb.cb_handles[i]);
1618         free(cb.cb_handles);
1619
1620         return (ret);
1621 }
1622
1623 static int
1624 mountpoint_compare(const void *a, const void *b)
1625 {
1626         const char *mounta = *((char **)a);
1627         const char *mountb = *((char **)b);
1628
1629         return (strcmp(mountb, mounta));
1630 }
1631
1632 /* alias for 2002/240 */
1633 #pragma weak zpool_unmount_datasets = zpool_disable_datasets
1634 /*
1635  * Unshare and unmount all datasets within the given pool.  We don't want to
1636  * rely on traversing the DSL to discover the filesystems within the pool,
1637  * because this may be expensive (if not all of them are mounted), and can fail
1638  * arbitrarily (on I/O error, for example).  Instead, we walk /proc/self/mounts
1639  * and gather all the filesystems that are currently mounted.
1640  */
1641 int
1642 zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
1643 {
1644         int used, alloc;
1645         struct mnttab entry;
1646         size_t namelen;
1647         char **mountpoints = NULL;
1648         zfs_handle_t **datasets = NULL;
1649         libzfs_handle_t *hdl = zhp->zpool_hdl;
1650         int i;
1651         int ret = -1;
1652         int flags = (force ? MS_FORCE : 0);
1653
1654         namelen = strlen(zhp->zpool_name);
1655
1656         /* Reopen MNTTAB to prevent reading stale data from open file */
1657         if (freopen(MNTTAB, "r", hdl->libzfs_mnttab) == NULL)
1658                 return (ENOENT);
1659
1660         used = alloc = 0;
1661         while (getmntent(hdl->libzfs_mnttab, &entry) == 0) {
1662                 /*
1663                  * Ignore non-ZFS entries.
1664                  */
1665                 if (entry.mnt_fstype == NULL ||
1666                     strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
1667                         continue;
1668
1669                 /*
1670                  * Ignore filesystems not within this pool.
1671                  */
1672                 if (entry.mnt_mountp == NULL ||
1673                     strncmp(entry.mnt_special, zhp->zpool_name, namelen) != 0 ||
1674                     (entry.mnt_special[namelen] != '/' &&
1675                     entry.mnt_special[namelen] != '\0'))
1676                         continue;
1677
1678                 /*
1679                  * At this point we've found a filesystem within our pool.  Add
1680                  * it to our growing list.
1681                  */
1682                 if (used == alloc) {
1683                         if (alloc == 0) {
1684                                 if ((mountpoints = zfs_alloc(hdl,
1685                                     8 * sizeof (void *))) == NULL)
1686                                         goto out;
1687
1688                                 if ((datasets = zfs_alloc(hdl,
1689                                     8 * sizeof (void *))) == NULL)
1690                                         goto out;
1691
1692                                 alloc = 8;
1693                         } else {
1694                                 void *ptr;
1695
1696                                 if ((ptr = zfs_realloc(hdl, mountpoints,
1697                                     alloc * sizeof (void *),
1698                                     alloc * 2 * sizeof (void *))) == NULL)
1699                                         goto out;
1700                                 mountpoints = ptr;
1701
1702                                 if ((ptr = zfs_realloc(hdl, datasets,
1703                                     alloc * sizeof (void *),
1704                                     alloc * 2 * sizeof (void *))) == NULL)
1705                                         goto out;
1706                                 datasets = ptr;
1707
1708                                 alloc *= 2;
1709                         }
1710                 }
1711
1712                 if ((mountpoints[used] = zfs_strdup(hdl,
1713                     entry.mnt_mountp)) == NULL)
1714                         goto out;
1715
1716                 /*
1717                  * This is allowed to fail, in case there is some I/O error.  It
1718                  * is only used to determine if we need to remove the underlying
1719                  * mountpoint, so failure is not fatal.
1720                  */
1721                 datasets[used] = make_dataset_handle(hdl, entry.mnt_special);
1722
1723                 used++;
1724         }
1725
1726         /*
1727          * At this point, we have the entire list of filesystems, so sort it by
1728          * mountpoint.
1729          */
1730         qsort(mountpoints, used, sizeof (char *), mountpoint_compare);
1731
1732         /*
1733          * Walk through and first unshare everything.
1734          */
1735         for (i = 0; i < used; i++) {
1736                 zfs_share_proto_t *curr_proto;
1737                 for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
1738                     curr_proto++) {
1739                         if (is_shared(hdl, mountpoints[i], *curr_proto) &&
1740                             unshare_one(hdl, mountpoints[i],
1741                             mountpoints[i], *curr_proto) != 0)
1742                                 goto out;
1743                 }
1744         }
1745
1746         /*
1747          * Now unmount everything, removing the underlying directories as
1748          * appropriate.
1749          */
1750         for (i = 0; i < used; i++) {
1751                 if (unmount_one(hdl, mountpoints[i], flags) != 0)
1752                         goto out;
1753         }
1754
1755         for (i = 0; i < used; i++) {
1756                 if (datasets[i])
1757                         remove_mountpoint(datasets[i]);
1758         }
1759
1760         ret = 0;
1761 out:
1762         for (i = 0; i < used; i++) {
1763                 if (datasets[i])
1764                         zfs_close(datasets[i]);
1765                 free(mountpoints[i]);
1766         }
1767         free(datasets);
1768         free(mountpoints);
1769
1770         return (ret);
1771 }