sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25
  26 #pragma ident   "%Z%%M% %I%     %E% SMI"
  27
  28 /*
  29  * ZFS control directory (a.k.a. ".zfs")
  30  *
  31  * This directory provides a common location for all ZFS meta-objects.
  32  * Currently, this is only the 'snapshot' directory, but this may expand in the
  33  * future.  The elements are built using the GFS primitives, as the hierarchy
  34  * does not actually exist on disk.
  35  *
  36  * For 'snapshot', we don't want to have all snapshots always mounted, because
  37  * this would take up a huge amount of space in /etc/mnttab.  We have three
  38  * types of objects:
  39  *
  40  *      ctldir ------> snapshotdir -------> snapshot
  41  *                                             |
  42  *                                             |
  43  *                                             V
  44  *                                         mounted fs
  45  *
  46  * The 'snapshot' node contains just enough information to lookup '..' and act
  47  * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
  48  * perform an automount of the underlying filesystem and return the
  49  * corresponding vnode.
  50  *
  51  * All mounts are handled automatically by the kernel, but unmounts are
  52  * (currently) handled from user land.  The main reason is that there is no
  53  * reliable way to auto-unmount the filesystem when it's "no longer in use".
  54  * When the user unmounts a filesystem, we call zfsctl_unmount(), which
  55  * unmounts any snapshots within the snapshot directory.
  56  */
  57
  58 #include <sys/zfs_context.h>
  59 #include <sys/zfs_ctldir.h>
  60 #include <sys/zfs_ioctl.h>
  61 #include <sys/zfs_vfsops.h>
  62 #include <sys/namei.h>
  63 #include <sys/gfs.h>
  64 #include <sys/stat.h>
  65 #include <sys/dmu.h>
  66 #include <sys/mount.h>
  67
  68 typedef struct {
  69         char            *se_name;
  70         vnode_t         *se_root;
  71         avl_node_t      se_node;
  72 } zfs_snapentry_t;
  73
  74 static int
  75 snapentry_compare(const void *a, const void *b)
  76 {
  77         const zfs_snapentry_t *sa = a;
  78         const zfs_snapentry_t *sb = b;
  79         int ret = strcmp(sa->se_name, sb->se_name);
  80
  81         if (ret < 0)
  82                 return (-1);
  83         else if (ret > 0)
  84                 return (1);
  85         else
  86                 return (0);
  87 }
  88
  89 static struct vop_vector zfsctl_ops_root;
  90 static struct vop_vector zfsctl_ops_snapdir;
  91 static struct vop_vector zfsctl_ops_snapshot;
  92
  93 static vnode_t *zfsctl_mknode_snapdir(vnode_t *);
  94 static vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset);
  95
  96 typedef struct zfsctl_node {
  97         gfs_dir_t       zc_gfs_private;
  98         uint64_t        zc_id;
  99         timestruc_t     zc_cmtime;      /* ctime and mtime, always the same */
 100 } zfsctl_node_t;
 101
 102 typedef struct zfsctl_snapdir {
 103         zfsctl_node_t   sd_node;
 104         kmutex_t        sd_lock;
 105         avl_tree_t      sd_snaps;
 106 } zfsctl_snapdir_t;
 107
 108 /*
 109  * Root directory elements.  We have only a single static entry, 'snapshot'.
 110  */
 111 static gfs_dirent_t zfsctl_root_entries[] = {
 112         { "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE },
 113         { NULL }
 114 };
 115
 116 /* include . and .. in the calculation */
 117 #define NROOT_ENTRIES   ((sizeof (zfsctl_root_entries) / \
 118     sizeof (gfs_dirent_t)) + 1)
 119
 120
 121 /*
 122  * Initialize the various GFS pieces we'll need to create and manipulate .zfs
 123  * directories.  This is called from the ZFS init routine, and initializes the
 124  * vnode ops vectors that we'll be using.
 125  */
 126 void
 127 zfsctl_init(void)
 128 {
 129 }
 130
 131 void
 132 zfsctl_fini(void)
 133 {
 134 }
 135
 136 /*
 137  * Return the inode number associated with the 'snapshot' directory.
 138  */
 139 /* ARGSUSED */
 140 static ino64_t
 141 zfsctl_root_inode_cb(vnode_t *vp, int index)
 142 {
 143         ASSERT(index == 0);
 144         return (ZFSCTL_INO_SNAPDIR);
 145 }
 146
 147 /*
 148  * Create the '.zfs' directory.  This directory is cached as part of the VFS
 149  * structure.  This results in a hold on the vfs_t.  The code in zfs_umount()
 150  * therefore checks against a vfs_count of 2 instead of 1.  This reference
 151  * is removed when the ctldir is destroyed in the unmount.
 152  */
 153 void
 154 zfsctl_create(zfsvfs_t *zfsvfs)
 155 {
 156         vnode_t *vp, *rvp;
 157         zfsctl_node_t *zcp;
 158
 159         ASSERT(zfsvfs->z_ctldir == NULL);
 160
 161         vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs,
 162             &zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries,
 163             zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL);
 164         zcp = vp->v_data;
 165         zcp->zc_id = ZFSCTL_INO_ROOT;
 166
 167         VERIFY(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp, curthread) == 0);
 168         ZFS_TIME_DECODE(&zcp->zc_cmtime, VTOZ(rvp)->z_phys->zp_crtime);
 169         VN_URELE(rvp);
 170
 171         /*
 172          * We're only faking the fact that we have a root of a filesystem for
 173          * the sake of the GFS interfaces.  Undo the flag manipulation it did
 174          * for us.
 175          */
 176         vp->v_vflag &= ~VV_ROOT;
 177
 178         zfsvfs->z_ctldir = vp;
 179
 180         VOP_UNLOCK(vp, 0, curthread);
 181 }
 182
 183 /*
 184  * Destroy the '.zfs' directory.  Only called when the filesystem is unmounted.
 185  * There might still be more references if we were force unmounted, but only
 186  * new zfs_inactive() calls can occur and they don't reference .zfs
 187  */
 188 void
 189 zfsctl_destroy(zfsvfs_t *zfsvfs)
 190 {
 191         VN_RELE(zfsvfs->z_ctldir);
 192         zfsvfs->z_ctldir = NULL;
 193 }
 194
 195 /*
 196  * Given a root znode, retrieve the associated .zfs directory.
 197  * Add a hold to the vnode and return it.
 198  */
 199 vnode_t *
 200 zfsctl_root(znode_t *zp)
 201 {
 202         ASSERT(zfs_has_ctldir(zp));
 203         VN_HOLD(zp->z_zfsvfs->z_ctldir);
 204         return (zp->z_zfsvfs->z_ctldir);
 205 }
 206
 207 /*
 208  * Common open routine.  Disallow any write access.
 209  */
 210 /* ARGSUSED */
 211 static int
 212 zfsctl_common_open(struct vop_open_args *ap)
 213 {
 214         int flags = ap->a_mode;
 215
 216         if (flags & FWRITE)
 217                 return (EACCES);
 218
 219         return (0);
 220 }
 221
 222 /*
 223  * Common close routine.  Nothing to do here.
 224  */
 225 /* ARGSUSED */
 226 static int
 227 zfsctl_common_close(struct vop_close_args *ap)
 228 {
 229         return (0);
 230 }
 231
 232 /*
 233  * Common access routine.  Disallow writes.
 234  */
 235 /* ARGSUSED */
 236 static int
 237 zfsctl_common_access(ap)
 238         struct vop_access_args /* {
 239                 struct vnode *a_vp;
 240                 int  a_mode;
 241                 struct ucred *a_cred;
 242                 struct thread *a_td;
 243         } */ *ap;
 244 {
 245         int mode = ap->a_mode;
 246
 247         if (mode & VWRITE)
 248                 return (EACCES);
 249
 250         return (0);
 251 }
 252
 253 /*
 254  * Common getattr function.  Fill in basic information.
 255  */
 256 static void
 257 zfsctl_common_getattr(vnode_t *vp, vattr_t *vap)
 258 {
 259         zfsctl_node_t   *zcp = vp->v_data;
 260         timestruc_t     now;
 261
 262         vap->va_uid = 0;
 263         vap->va_gid = 0;
 264         vap->va_rdev = 0;
 265         /*
 266          * We are a purly virtual object, so we have no
 267          * blocksize or allocated blocks.
 268          */
 269         vap->va_blksize = 0;
 270         vap->va_nblocks = 0;
 271         vap->va_seq = 0;
 272         vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
 273         vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
 274             S_IROTH | S_IXOTH;
 275         vap->va_type = VDIR;
 276         /*
 277          * We live in the now (for atime).
 278          */
 279         gethrestime(&now);
 280         vap->va_atime = now;
 281         vap->va_mtime = vap->va_ctime = vap->va_birthtime = zcp->zc_cmtime;
 282         /* FreeBSD: Reset chflags(2) flags. */
 283         vap->va_flags = 0;
 284 }
 285
 286 static int
 287 zfsctl_common_fid(ap)
 288         struct vop_fid_args /* {
 289                 struct vnode *a_vp;
 290                 struct fid *a_fid;
 291         } */ *ap;
 292 {
 293         vnode_t         *vp = ap->a_vp;
 294         fid_t           *fidp = (void *)ap->a_fid;
 295         zfsvfs_t        *zfsvfs = vp->v_vfsp->vfs_data;
 296         zfsctl_node_t   *zcp = vp->v_data;
 297         uint64_t        object = zcp->zc_id;
 298         zfid_short_t    *zfid;
 299         int             i;
 300
 301         ZFS_ENTER(zfsvfs);
 302
 303         fidp->fid_len = SHORT_FID_LEN;
 304
 305         zfid = (zfid_short_t *)fidp;
 306
 307         zfid->zf_len = SHORT_FID_LEN;
 308
 309         for (i = 0; i < sizeof (zfid->zf_object); i++)
 310                 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
 311
 312         /* .zfs znodes always have a generation number of 0 */
 313         for (i = 0; i < sizeof (zfid->zf_gen); i++)
 314                 zfid->zf_gen[i] = 0;
 315
 316         ZFS_EXIT(zfsvfs);
 317         return (0);
 318 }
 319
 320 static int
 321 zfsctl_common_reclaim(ap)
 322         struct vop_reclaim_args /* {
 323                 struct vnode *a_vp;
 324                 struct thread *a_td;
 325         } */ *ap;
 326 {
 327         vnode_t *vp = ap->a_vp;
 328
 329         /*
 330          * Destroy the vm object and flush associated pages.
 331          */
 332         vnode_destroy_vobject(vp);
 333         VI_LOCK(vp);
 334         vp->v_data = NULL;
 335         VI_UNLOCK(vp);
 336         return (0);
 337 }
 338
 339 /*
 340  * .zfs inode namespace
 341  *
 342  * We need to generate unique inode numbers for all files and directories
 343  * within the .zfs pseudo-filesystem.  We use the following scheme:
 344  *
 345  *      ENTRY                   ZFSCTL_INODE
 346  *      .zfs                    1
 347  *      .zfs/snapshot           2
 348  *      .zfs/snapshot/<snap>    objectid(snap)
 349  */
 350
 351 #define ZFSCTL_INO_SNAP(id)     (id)
 352
 353 /*
 354  * Get root directory attributes.
 355  */
 356 /* ARGSUSED */
 357 static int
 358 zfsctl_root_getattr(ap)
 359         struct vop_getattr_args /* {
 360                 struct vnode *a_vp;
 361                 struct vattr *a_vap;
 362                 struct ucred *a_cred;
 363                 struct thread *a_td;
 364         } */ *ap;
 365 {
 366         struct vnode *vp = ap->a_vp;
 367         struct vattr *vap = ap->a_vap;
 368         zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
 369
 370         ZFS_ENTER(zfsvfs);
 371         vap->va_nodeid = ZFSCTL_INO_ROOT;
 372         vap->va_nlink = vap->va_size = NROOT_ENTRIES;
 373
 374         zfsctl_common_getattr(vp, vap);
 375         ZFS_EXIT(zfsvfs);
 376
 377         return (0);
 378 }
 379
 380 /*
 381  * Special case the handling of "..".
 382  */
 383 /* ARGSUSED */
 384 int
 385 zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
 386     int flags, vnode_t *rdir, cred_t *cr)
 387 {
 388         zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
 389         int err;
 390
 391         ZFS_ENTER(zfsvfs);
 392
 393         if (strcmp(nm, "..") == 0) {
 394                 err = VFS_ROOT(dvp->v_vfsp, LK_EXCLUSIVE, vpp, curthread);
 395                 if (err == 0)
 396                         VOP_UNLOCK(*vpp, 0, curthread);
 397         } else {
 398                 err = gfs_dir_lookup(dvp, nm, vpp);
 399         }
 400
 401         ZFS_EXIT(zfsvfs);
 402
 403         return (err);
 404 }
 405
 406 /*
 407  * Special case the handling of "..".
 408  */
 409 /* ARGSUSED */
 410 int
 411 zfsctl_root_lookup_vop(ap)
 412         struct vop_lookup_args /* {
 413                 struct vnode *a_dvp;
 414                 struct vnode **a_vpp;
 415                 struct componentname *a_cnp;
 416         } */ *ap;
 417 {
 418         vnode_t *dvp = ap->a_dvp;
 419         vnode_t **vpp = ap->a_vpp;
 420         cred_t *cr = ap->a_cnp->cn_cred;
 421         int flags = ap->a_cnp->cn_flags;
 422         int nameiop = ap->a_cnp->cn_nameiop;
 423         char nm[NAME_MAX + 1];
 424         int err;
 425
 426         if ((flags & ISLASTCN) && (nameiop == RENAME || nameiop == CREATE))
 427                 return (EOPNOTSUPP);
 428
 429         ASSERT(ap->a_cnp->cn_namelen < sizeof(nm));
 430         strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);
 431
 432         err = zfsctl_root_lookup(dvp, nm, vpp, NULL, 0, NULL, cr);
 433         if (err == 0 && (nm[0] != '.' || nm[1] != '\0'))
 434                 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, ap->a_cnp->cn_thread);
 435
 436         return (err);
 437 }
 438
 439 static struct vop_vector zfsctl_ops_root = {
 440         .vop_default =  &default_vnodeops,
 441         .vop_open =     zfsctl_common_open,
 442         .vop_close =    zfsctl_common_close,
 443         .vop_ioctl =    VOP_EINVAL,
 444         .vop_getattr =  zfsctl_root_getattr,
 445         .vop_access =   zfsctl_common_access,
 446         .vop_readdir =  gfs_vop_readdir,
 447         .vop_lookup =   zfsctl_root_lookup_vop,
 448         .vop_inactive = gfs_vop_inactive,
 449         .vop_reclaim =  zfsctl_common_reclaim,
 450         .vop_fid =      zfsctl_common_fid,
 451 };
 452
 453 static int
 454 zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname)
 455 {
 456         objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
 457
 458         dmu_objset_name(os, zname);
 459         if (strlen(zname) + 1 + strlen(name) >= len)
 460                 return (ENAMETOOLONG);
 461         (void) strcat(zname, "@");
 462         (void) strcat(zname, name);
 463         return (0);
 464 }
 465
 466 static int
 467 zfsctl_unmount_snap(vnode_t *dvp, const char *name, int force, cred_t *cr)
 468 {
 469         zfsctl_snapdir_t *sdp = dvp->v_data;
 470         zfs_snapentry_t search, *sep;
 471         struct vop_inactive_args ap;
 472         avl_index_t where;
 473         int err;
 474
 475         ASSERT(MUTEX_HELD(&sdp->sd_lock));
 476
 477         search.se_name = (char *)name;
 478         if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL)
 479                 return (ENOENT);
 480
 481         ASSERT(vn_ismntpt(sep->se_root));
 482
 483         /* this will be dropped by dounmount() */
 484         if ((err = vn_vfswlock(sep->se_root)) != 0)
 485                 return (err);
 486
 487         err = dounmount(vn_mountedvfs(sep->se_root), force, curthread);
 488         if (err)
 489                 return (err);
 490         ASSERT(sep->se_root->v_count == 1);
 491         ap.a_vp = sep->se_root;
 492         gfs_vop_inactive(&ap);
 493
 494         avl_remove(&sdp->sd_snaps, sep);
 495         kmem_free(sep->se_name, strlen(sep->se_name) + 1);
 496         kmem_free(sep, sizeof (zfs_snapentry_t));
 497
 498         return (0);
 499 }
 500
 501 #if 0
 502 static void
 503 zfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm)
 504 {
 505         avl_index_t where;
 506         vfs_t *vfsp;
 507         refstr_t *pathref;
 508         char newpath[MAXNAMELEN];
 509         char *tail;
 510
 511         ASSERT(MUTEX_HELD(&sdp->sd_lock));
 512         ASSERT(sep != NULL);
 513
 514         vfsp = vn_mountedvfs(sep->se_root);
 515         ASSERT(vfsp != NULL);
 516
 517         vfs_lock_wait(vfsp);
 518
 519         /*
 520          * Change the name in the AVL tree.
 521          */
 522         avl_remove(&sdp->sd_snaps, sep);
 523         kmem_free(sep->se_name, strlen(sep->se_name) + 1);
 524         sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
 525         (void) strcpy(sep->se_name, nm);
 526         VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL);
 527         avl_insert(&sdp->sd_snaps, sep, where);
 528
 529         /*
 530          * Change the current mountpoint info:
 531          *      - update the tail of the mntpoint path
 532          *      - update the tail of the resource path
 533          */
 534         pathref = vfs_getmntpoint(vfsp);
 535         (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
 536         VERIFY((tail = strrchr(newpath, '/')) != NULL);
 537         *(tail+1) = '\0';
 538         ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
 539         (void) strcat(newpath, nm);
 540         refstr_rele(pathref);
 541         vfs_setmntpoint(vfsp, newpath);
 542
 543         pathref = vfs_getresource(vfsp);
 544         (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
 545         VERIFY((tail = strrchr(newpath, '@')) != NULL);
 546         *(tail+1) = '\0';
 547         ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
 548         (void) strcat(newpath, nm);
 549         refstr_rele(pathref);
 550         vfs_setresource(vfsp, newpath);
 551
 552         vfs_unlock(vfsp);
 553 }
 554 #endif
 555
 556 #if 0
 557 static int
 558 zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
 559     cred_t *cr)
 560 {
 561         zfsctl_snapdir_t *sdp = sdvp->v_data;
 562         zfs_snapentry_t search, *sep;
 563         avl_index_t where;
 564         char from[MAXNAMELEN], to[MAXNAMELEN];
 565         int err;
 566
 567         err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from);
 568         if (err)
 569                 return (err);
 570         err = zfs_secpolicy_write(from, cr);
 571         if (err)
 572                 return (err);
 573
 574         /*
 575          * Cannot move snapshots out of the snapdir.
 576          */
 577         if (sdvp != tdvp)
 578                 return (EINVAL);
 579
 580         if (strcmp(snm, tnm) == 0)
 581                 return (0);
 582
 583         err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to);
 584         if (err)
 585                 return (err);
 586
 587         mutex_enter(&sdp->sd_lock);
 588
 589         search.se_name = (char *)snm;
 590         if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) {
 591                 mutex_exit(&sdp->sd_lock);
 592                 return (ENOENT);
 593         }
 594
 595         err = dmu_objset_rename(from, to, B_FALSE);
 596         if (err == 0)
 597                 zfsctl_rename_snap(sdp, sep, tnm);
 598
 599         mutex_exit(&sdp->sd_lock);
 600
 601         return (err);
 602 }
 603 #endif
 604
 605 #if 0
 606 /* ARGSUSED */
 607 static int
 608 zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr)
 609 {
 610         zfsctl_snapdir_t *sdp = dvp->v_data;
 611         char snapname[MAXNAMELEN];
 612         int err;
 613
 614         err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname);
 615         if (err)
 616                 return (err);
 617         err = zfs_secpolicy_write(snapname, cr);
 618         if (err)
 619                 return (err);
 620
 621         mutex_enter(&sdp->sd_lock);
 622
 623         err = zfsctl_unmount_snap(dvp, name, 0, cr);
 624         if (err) {
 625                 mutex_exit(&sdp->sd_lock);
 626                 return (err);
 627         }
 628
 629         err = dmu_objset_destroy(snapname);
 630
 631         mutex_exit(&sdp->sd_lock);
 632
 633         return (err);
 634 }
 635 #endif
 636
 637 /*
 638  * Lookup entry point for the 'snapshot' directory.  Try to open the
 639  * snapshot if it exist, creating the pseudo filesystem vnode as necessary.
 640  * Perform a mount of the associated dataset on top of the vnode.
 641  */
 642 /* ARGSUSED */
 643 int
 644 zfsctl_snapdir_lookup(ap)
 645         struct vop_lookup_args /* {
 646                 struct vnode *a_dvp;
 647                 struct vnode **a_vpp;
 648                 struct componentname *a_cnp;
 649         } */ *ap;
 650 {
 651         vnode_t *dvp = ap->a_dvp;
 652         vnode_t **vpp = ap->a_vpp;
 653         char nm[NAME_MAX + 1];
 654         zfsctl_snapdir_t *sdp = dvp->v_data;
 655         objset_t *snap;
 656         char snapname[MAXNAMELEN];
 657         char *mountpoint;
 658         zfs_snapentry_t *sep, search;
 659         size_t mountpoint_len;
 660         avl_index_t where;
 661         zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
 662         int err;
 663
 664         ASSERT(ap->a_cnp->cn_namelen < sizeof(nm));
 665         strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);
 666
 667         ASSERT(dvp->v_type == VDIR);
 668
 669         if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0)
 670                 return (0);
 671
 672         *vpp = NULL;
 673
 674         /*
 675          * If we get a recursive call, that means we got called
 676          * from the domount() code while it was trying to look up the
 677          * spec (which looks like a local path for zfs).  We need to
 678          * add some flag to domount() to tell it not to do this lookup.
 679          */
 680         if (MUTEX_HELD(&sdp->sd_lock))
 681                 return (ENOENT);
 682
 683         ZFS_ENTER(zfsvfs);
 684
 685         mutex_enter(&sdp->sd_lock);
 686         search.se_name = (char *)nm;
 687         if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) {
 688                 *vpp = sep->se_root;
 689                 VN_HOLD(*vpp);
 690                 if ((*vpp)->v_mountedhere == NULL) {
 691                         /*
 692                          * The snapshot was unmounted behind our backs,
 693                          * try to remount it.
 694                          */
 695                         goto domount;
 696                 }
 697                 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, ap->a_cnp->cn_thread);
 698                 mutex_exit(&sdp->sd_lock);
 699                 ZFS_EXIT(zfsvfs);
 700                 return (0);
 701         }
 702
 703         /*
 704          * The requested snapshot is not currently mounted, look it up.
 705          */
 706         err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname);
 707         if (err) {
 708                 mutex_exit(&sdp->sd_lock);
 709                 ZFS_EXIT(zfsvfs);
 710                 return (err);
 711         }
 712         if (dmu_objset_open(snapname, DMU_OST_ZFS,
 713             DS_MODE_STANDARD | DS_MODE_READONLY, &snap) != 0) {
 714                 mutex_exit(&sdp->sd_lock);
 715                 ZFS_EXIT(zfsvfs);
 716                 return (ENOENT);
 717         }
 718
 719         sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP);
 720         sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
 721         (void) strcpy(sep->se_name, nm);
 722         *vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap));
 723         VN_HOLD(*vpp);
 724         avl_insert(&sdp->sd_snaps, sep, where);
 725
 726         dmu_objset_close(snap);
 727 domount:
 728         mountpoint_len = strlen(dvp->v_vfsp->mnt_stat.f_mntonname) +
 729             strlen("/.zfs/snapshot/") + strlen(nm) + 1;
 730         mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP);
 731         (void) snprintf(mountpoint, mountpoint_len, "%s/.zfs/snapshot/%s",
 732             dvp->v_vfsp->mnt_stat.f_mntonname, nm);
 733         err = domount(curthread, *vpp, "zfs", mountpoint, snapname, 0);
 734         kmem_free(mountpoint, mountpoint_len);
 735         /* FreeBSD: This line was moved from below to avoid a lock recursion. */
 736         if (err == 0)
 737                 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, curthread);
 738         mutex_exit(&sdp->sd_lock);
 739
 740         /*
 741          * If we had an error, drop our hold on the vnode and
 742          * zfsctl_snapshot_inactive() will clean up.
 743          */
 744         if (err) {
 745                 VN_RELE(*vpp);
 746                 *vpp = NULL;
 747         }
 748         return (err);
 749 }
 750
 751 /* ARGSUSED */
 752 static int
 753 zfsctl_snapdir_readdir_cb(vnode_t *vp, struct dirent64 *dp, int *eofp,
 754     offset_t *offp, offset_t *nextp, void *data)
 755 {
 756         zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
 757         char snapname[MAXNAMELEN];
 758         uint64_t id, cookie;
 759
 760         ZFS_ENTER(zfsvfs);
 761
 762         cookie = *offp;
 763         if (dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id,
 764             &cookie) == ENOENT) {
 765                 *eofp = 1;
 766                 ZFS_EXIT(zfsvfs);
 767                 return (0);
 768         }
 769
 770         (void) strcpy(dp->d_name, snapname);
 771         dp->d_ino = ZFSCTL_INO_SNAP(id);
 772         *nextp = cookie;
 773
 774         ZFS_EXIT(zfsvfs);
 775
 776         return (0);
 777 }
 778
 779 vnode_t *
 780 zfsctl_mknode_snapdir(vnode_t *pvp)
 781 {
 782         vnode_t *vp;
 783         zfsctl_snapdir_t *sdp;
 784
 785         vp = gfs_dir_create(sizeof (zfsctl_snapdir_t), pvp, pvp->v_vfsp,
 786             &zfsctl_ops_snapdir, NULL, NULL, MAXNAMELEN,
 787             zfsctl_snapdir_readdir_cb, NULL);
 788         sdp = vp->v_data;
 789         sdp->sd_node.zc_id = ZFSCTL_INO_SNAPDIR;
 790         sdp->sd_node.zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
 791         mutex_init(&sdp->sd_lock, NULL, MUTEX_DEFAULT, NULL);
 792         avl_create(&sdp->sd_snaps, snapentry_compare,
 793             sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node));
 794         VOP_UNLOCK(vp, 0, curthread);
 795         return (vp);
 796 }
 797
 798 /* ARGSUSED */
 799 static int
 800 zfsctl_snapdir_getattr(ap)
 801         struct vop_getattr_args /* {
 802                 struct vnode *a_vp;
 803                 struct vattr *a_vap;
 804                 struct ucred *a_cred;
 805                 struct thread *a_td;
 806         } */ *ap;
 807 {
 808         struct vnode *vp = ap->a_vp;
 809         struct vattr *vap = ap->a_vap;
 810         zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
 811         zfsctl_snapdir_t *sdp = vp->v_data;
 812
 813         ZFS_ENTER(zfsvfs);
 814         zfsctl_common_getattr(vp, vap);
 815         vap->va_nodeid = gfs_file_inode(vp);
 816         vap->va_nlink = vap->va_size = avl_numnodes(&sdp->sd_snaps) + 2;
 817         ZFS_EXIT(zfsvfs);
 818
 819         return (0);
 820 }
 821
 822 /* ARGSUSED */
 823 static int
 824 zfsctl_snapdir_inactive(ap)
 825         struct vop_inactive_args /* {
 826                 struct vnode *a_vp;
 827                 struct thread *a_td;
 828         } */ *ap;
 829 {
 830         vnode_t *vp = ap->a_vp;
 831         zfsctl_snapdir_t *sdp = vp->v_data;
 832         void *private;
 833
 834         private = gfs_dir_inactive(vp);
 835         if (private != NULL) {
 836                 ASSERT(avl_numnodes(&sdp->sd_snaps) == 0);
 837                 mutex_destroy(&sdp->sd_lock);
 838                 avl_destroy(&sdp->sd_snaps);
 839                 kmem_free(private, sizeof (zfsctl_snapdir_t));
 840         }
 841         return (0);
 842 }
 843
 844 static struct vop_vector zfsctl_ops_snapdir = {
 845         .vop_default =  &default_vnodeops,
 846         .vop_open =     zfsctl_common_open,
 847         .vop_close =    zfsctl_common_close,
 848         .vop_ioctl =    VOP_EINVAL,
 849         .vop_getattr =  zfsctl_snapdir_getattr,
 850         .vop_access =   zfsctl_common_access,
 851         .vop_readdir =  gfs_vop_readdir,
 852         .vop_lookup =   zfsctl_snapdir_lookup,
 853         .vop_inactive = zfsctl_snapdir_inactive,
 854         .vop_reclaim =  zfsctl_common_reclaim,
 855         .vop_fid =      zfsctl_common_fid,
 856 };
 857
 858 static vnode_t *
 859 zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset)
 860 {
 861         vnode_t *vp;
 862         zfsctl_node_t *zcp;
 863
 864         vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp, pvp->v_vfsp,
 865             &zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL);
 866         zcp = vp->v_data;
 867         zcp->zc_id = objset;
 868         VOP_UNLOCK(vp, 0, curthread);
 869
 870         return (vp);
 871 }
 872
 873 static int
 874 zfsctl_snapshot_inactive(ap)
 875         struct vop_inactive_args /* {
 876                 struct vnode *a_vp;
 877                 struct thread *a_td;
 878         } */ *ap;
 879 {
 880         vnode_t *vp = ap->a_vp;
 881         struct vop_inactive_args iap;
 882         zfsctl_snapdir_t *sdp;
 883         zfs_snapentry_t *sep, *next;
 884         int locked;
 885         vnode_t *dvp;
 886
 887         VERIFY(gfs_dir_lookup(vp, "..", &dvp) == 0);
 888         sdp = dvp->v_data;
 889         VOP_UNLOCK(dvp, 0, ap->a_td);
 890
 891         if (!(locked = MUTEX_HELD(&sdp->sd_lock)))
 892                 mutex_enter(&sdp->sd_lock);
 893
 894         if (vp->v_count > 1) {
 895                 if (!locked)
 896                         mutex_exit(&sdp->sd_lock);
 897                 return (0);
 898         }
 899         ASSERT(!vn_ismntpt(vp));
 900
 901         sep = avl_first(&sdp->sd_snaps);
 902         while (sep != NULL) {
 903                 next = AVL_NEXT(&sdp->sd_snaps, sep);
 904
 905                 if (sep->se_root == vp) {
 906                         avl_remove(&sdp->sd_snaps, sep);
 907                         kmem_free(sep->se_name, strlen(sep->se_name) + 1);
 908                         kmem_free(sep, sizeof (zfs_snapentry_t));
 909                         break;
 910                 }
 911                 sep = next;
 912         }
 913         ASSERT(sep != NULL);
 914
 915         if (!locked)
 916                 mutex_exit(&sdp->sd_lock);
 917         VN_RELE(dvp);
 918
 919         /*
 920          * Dispose of the vnode for the snapshot mount point.
 921          * This is safe to do because once this entry has been removed
 922          * from the AVL tree, it can't be found again, so cannot become
 923          * "active".  If we lookup the same name again we will end up
 924          * creating a new vnode.
 925          */
 926         iap.a_vp = vp;
 927         return (gfs_vop_inactive(&iap));
 928 }
 929
 930 static int
 931 zfsctl_traverse_begin(vnode_t **vpp, int lktype, kthread_t *td)
 932 {
 933
 934         VN_HOLD(*vpp);
 935         /* Snapshot should be already mounted, but just in case. */
 936         if (vn_mountedvfs(*vpp) == NULL)
 937                 return (ENOENT);
 938         return (traverse(vpp, lktype));
 939 }
 940
 941 static void
 942 zfsctl_traverse_end(vnode_t *vp, int err)
 943 {
 944
 945         if (err == 0)
 946                 vput(vp);
 947         else
 948                 VN_RELE(vp);
 949 }
 950
 951 static int
 952 zfsctl_snapshot_getattr(ap)
 953         struct vop_getattr_args /* {
 954                 struct vnode *a_vp;
 955                 struct vattr *a_vap;
 956                 struct ucred *a_cred;
 957                 struct thread *a_td;
 958         } */ *ap;
 959 {
 960         vnode_t *vp = ap->a_vp;
 961         int err;
 962
 963         err = zfsctl_traverse_begin(&vp, LK_SHARED | LK_RETRY, ap->a_td);
 964         if (err == 0)
 965                 err = VOP_GETATTR(vp, ap->a_vap, ap->a_cred, ap->a_td);
 966         zfsctl_traverse_end(vp, err);
 967         return (err);
 968 }
 969
 970 static int
 971 zfsctl_snapshot_fid(ap)
 972         struct vop_fid_args /* {
 973                 struct vnode *a_vp;
 974                 struct fid *a_fid;
 975         } */ *ap;
 976 {
 977         vnode_t *vp = ap->a_vp;
 978         int err;
 979
 980         err = zfsctl_traverse_begin(&vp, LK_SHARED | LK_RETRY, curthread);
 981         if (err == 0)
 982                 err = VOP_VPTOFH(vp, (void *)ap->a_fid);
 983         zfsctl_traverse_end(vp, err);
 984         return (err);
 985 }
 986
 987 /*
 988  * These VP's should never see the light of day.  They should always
 989  * be covered.
 990  */
 991 static struct vop_vector zfsctl_ops_snapshot = {
 992         .vop_default =  &default_vnodeops,
 993         .vop_inactive = zfsctl_snapshot_inactive,
 994         .vop_reclaim =  zfsctl_common_reclaim,
 995         .vop_getattr =  zfsctl_snapshot_getattr,
 996         .vop_fid =      zfsctl_snapshot_fid,
 997 };
 998
 999 int
1000 zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
1001 {
1002         zfsvfs_t *zfsvfs = vfsp->vfs_data;
1003         vnode_t *dvp, *vp;
1004         zfsctl_snapdir_t *sdp;
1005         zfsctl_node_t *zcp;
1006         zfs_snapentry_t *sep;
1007         int error;
1008
1009         ASSERT(zfsvfs->z_ctldir != NULL);
1010         error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
1011             NULL, 0, NULL, kcred);
1012         if (error != 0)
1013                 return (error);
1014         sdp = dvp->v_data;
1015
1016         mutex_enter(&sdp->sd_lock);
1017         sep = avl_first(&sdp->sd_snaps);
1018         while (sep != NULL) {
1019                 vp = sep->se_root;
1020                 zcp = vp->v_data;
1021                 if (zcp->zc_id == objsetid)
1022                         break;
1023
1024                 sep = AVL_NEXT(&sdp->sd_snaps, sep);
1025         }
1026
1027         if (sep != NULL) {
1028                 VN_HOLD(vp);
1029                 error = traverse(&vp, LK_SHARED | LK_RETRY);
1030                 if (error == 0) {
1031                         if (vp == sep->se_root)
1032                                 error = EINVAL;
1033                         else
1034                                 *zfsvfsp = VTOZ(vp)->z_zfsvfs;
1035                 }
1036                 mutex_exit(&sdp->sd_lock);
1037                 if (error == 0)
1038                         VN_URELE(vp);
1039                 else
1040                         VN_RELE(vp);
1041         } else {
1042                 error = EINVAL;
1043                 mutex_exit(&sdp->sd_lock);
1044         }
1045
1046         VN_RELE(dvp);
1047
1048         return (error);
1049 }
1050
1051 /*
1052  * Unmount any snapshots for the given filesystem.  This is called from
1053  * zfs_umount() - if we have a ctldir, then go through and unmount all the
1054  * snapshots.
1055  */
1056 int
1057 zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
1058 {
1059         struct vop_inactive_args ap;
1060         zfsvfs_t *zfsvfs = vfsp->vfs_data;
1061         vnode_t *dvp, *svp;
1062         zfsctl_snapdir_t *sdp;
1063         zfs_snapentry_t *sep, *next;
1064         int error;
1065
1066         ASSERT(zfsvfs->z_ctldir != NULL);
1067         error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
1068             NULL, 0, NULL, cr);
1069         if (error != 0)
1070                 return (error);
1071         sdp = dvp->v_data;
1072
1073         mutex_enter(&sdp->sd_lock);
1074
1075         sep = avl_first(&sdp->sd_snaps);
1076         while (sep != NULL) {
1077                 svp = sep->se_root;
1078                 next = AVL_NEXT(&sdp->sd_snaps, sep);
1079
1080                 /*
1081                  * If this snapshot is not mounted, then it must
1082                  * have just been unmounted by somebody else, and
1083                  * will be cleaned up by zfsctl_snapdir_inactive().
1084                  */
1085                 if (vn_ismntpt(svp)) {
1086                         if ((error = vn_vfswlock(svp)) != 0)
1087                                 goto out;
1088
1089                         /*
1090                          * Increase usecount, so dounmount() won't vrele() it
1091                          * to 0 and call zfsctl_snapdir_inactive().
1092                          */
1093                         VN_HOLD(svp);
1094                         vfsp = vn_mountedvfs(svp);
1095                         mtx_lock(&Giant);
1096                         error = dounmount(vfsp, fflags, curthread);
1097                         mtx_unlock(&Giant);
1098                         if (error != 0) {
1099                                 VN_RELE(svp);
1100                                 goto out;
1101                         }
1102
1103                         avl_remove(&sdp->sd_snaps, sep);
1104                         kmem_free(sep->se_name, strlen(sep->se_name) + 1);
1105                         kmem_free(sep, sizeof (zfs_snapentry_t));
1106
1107                         /*
1108                          * We can't use VN_RELE(), as that will try to
1109                          * invoke zfsctl_snapdir_inactive(), and that
1110                          * would lead to an attempt to re-grab the sd_lock.
1111                          */
1112                         ASSERT3U(svp->v_count, ==, 1);
1113                         ap.a_vp = svp;
1114                         gfs_vop_inactive(&ap);
1115                 }
1116                 sep = next;
1117         }
1118 out:
1119         mutex_exit(&sdp->sd_lock);
1120         VN_RELE(dvp);
1121
1122         return (error);
1123 }