4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
26 * ZFS control directory (a.k.a. ".zfs")
28 * This directory provides a common location for all ZFS meta-objects.
29 * Currently, this is only the 'snapshot' directory, but this may expand in the
30 * future. The elements are built using the GFS primitives, as the hierarchy
31 * does not actually exist on disk.
33 * For 'snapshot', we don't want to have all snapshots always mounted, because
34 * this would take up a huge amount of space in /etc/mnttab. We have three
37 * ctldir ------> snapshotdir -------> snapshot
43 * The 'snapshot' node contains just enough information to lookup '..' and act
44 * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we
45 * perform an automount of the underlying filesystem and return the
46 * corresponding vnode.
48 * All mounts are handled automatically by the kernel, but unmounts are
49 * (currently) handled from user land. The main reason is that there is no
50 * reliable way to auto-unmount the filesystem when it's "no longer in use".
51 * When the user unmounts a filesystem, we call zfsctl_unmount(), which
52 * unmounts any snapshots within the snapshot directory.
54 * The '.zfs', '.zfs/snapshot', and all directories created under
55 * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and
56 * share the same vfs_t as the head filesystem (what '.zfs' lives under).
58 * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>'
59 * (ie: snapshots) are ZFS nodes and have their own unique vfs_t.
60 * However, vnodes within these mounted on file systems have their v_vfsp
61 * fields set to the head filesystem to make NFS happy (see
62 * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t
63 * so that it cannot be freed until all snapshots have been unmounted.
66 #include <sys/zfs_context.h>
67 #include <sys/zfs_ctldir.h>
68 #include <sys/zfs_ioctl.h>
69 #include <sys/zfs_vfsops.h>
70 #include <sys/namei.h>
74 #include <sys/dsl_deleg.h>
75 #include <sys/mount.h>
76 #include <sys/sunddi.h>
78 #include "zfs_namecheck.h"
80 typedef struct zfsctl_node {
81 gfs_dir_t zc_gfs_private;
83 timestruc_t zc_cmtime; /* ctime and mtime, always the same */
86 typedef struct zfsctl_snapdir {
87 zfsctl_node_t sd_node;
99 snapentry_compare(const void *a, const void *b)
101 const zfs_snapentry_t *sa = a;
102 const zfs_snapentry_t *sb = b;
103 int ret = strcmp(sa->se_name, sb->se_name);
114 vnodeops_t *zfsctl_ops_root;
115 vnodeops_t *zfsctl_ops_snapdir;
116 vnodeops_t *zfsctl_ops_snapshot;
117 vnodeops_t *zfsctl_ops_shares;
118 vnodeops_t *zfsctl_ops_shares_dir;
120 static const fs_operation_def_t zfsctl_tops_root[];
121 static const fs_operation_def_t zfsctl_tops_snapdir[];
122 static const fs_operation_def_t zfsctl_tops_snapshot[];
123 static const fs_operation_def_t zfsctl_tops_shares[];
125 static struct vop_vector zfsctl_ops_root;
126 static struct vop_vector zfsctl_ops_snapdir;
127 static struct vop_vector zfsctl_ops_snapshot;
128 static struct vop_vector zfsctl_ops_shares;
129 static struct vop_vector zfsctl_ops_shares_dir;
132 static vnode_t *zfsctl_mknode_snapdir(vnode_t *);
133 static vnode_t *zfsctl_mknode_shares(vnode_t *);
134 static vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset);
135 static int zfsctl_unmount_snap(zfs_snapentry_t *, int, cred_t *);
138 static gfs_opsvec_t zfsctl_opsvec[] = {
139 { ".zfs", zfsctl_tops_root, &zfsctl_ops_root },
140 { ".zfs/snapshot", zfsctl_tops_snapdir, &zfsctl_ops_snapdir },
141 { ".zfs/snapshot/vnode", zfsctl_tops_snapshot, &zfsctl_ops_snapshot },
142 { ".zfs/shares", zfsctl_tops_shares, &zfsctl_ops_shares_dir },
143 { ".zfs/shares/vnode", zfsctl_tops_shares, &zfsctl_ops_shares },
149 * Root directory elements. We only have two entries
150 * snapshot and shares.
152 static gfs_dirent_t zfsctl_root_entries[] = {
153 { "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE },
154 { "shares", zfsctl_mknode_shares, GFS_CACHE_VNODE },
158 /* include . and .. in the calculation */
159 #define NROOT_ENTRIES ((sizeof (zfsctl_root_entries) / \
160 sizeof (gfs_dirent_t)) + 1)
164 * Initialize the various GFS pieces we'll need to create and manipulate .zfs
165 * directories. This is called from the ZFS init routine, and initializes the
166 * vnode ops vectors that we'll be using.
172 VERIFY(gfs_make_opsvec(zfsctl_opsvec) == 0);
181 * Remove vfsctl vnode ops
184 vn_freevnodeops(zfsctl_ops_root);
185 if (zfsctl_ops_snapdir)
186 vn_freevnodeops(zfsctl_ops_snapdir);
187 if (zfsctl_ops_snapshot)
188 vn_freevnodeops(zfsctl_ops_snapshot);
189 if (zfsctl_ops_shares)
190 vn_freevnodeops(zfsctl_ops_shares);
191 if (zfsctl_ops_shares_dir)
192 vn_freevnodeops(zfsctl_ops_shares_dir);
194 zfsctl_ops_root = NULL;
195 zfsctl_ops_snapdir = NULL;
196 zfsctl_ops_snapshot = NULL;
197 zfsctl_ops_shares = NULL;
198 zfsctl_ops_shares_dir = NULL;
203 zfsctl_is_node(vnode_t *vp)
205 return (vn_matchops(vp, zfsctl_ops_root) ||
206 vn_matchops(vp, zfsctl_ops_snapdir) ||
207 vn_matchops(vp, zfsctl_ops_snapshot) ||
208 vn_matchops(vp, zfsctl_ops_shares) ||
209 vn_matchops(vp, zfsctl_ops_shares_dir));
214 * Return the inode number associated with the 'snapshot' or
215 * 'shares' directory.
219 zfsctl_root_inode_cb(vnode_t *vp, int index)
221 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
226 return (ZFSCTL_INO_SNAPDIR);
228 return (zfsvfs->z_shares_dir);
232 * Create the '.zfs' directory. This directory is cached as part of the VFS
233 * structure. This results in a hold on the vfs_t. The code in zfs_umount()
234 * therefore checks against a vfs_count of 2 instead of 1. This reference
235 * is removed when the ctldir is destroyed in the unmount.
238 zfsctl_create(zfsvfs_t *zfsvfs)
244 ASSERT(zfsvfs->z_ctldir == NULL);
246 vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs,
247 &zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries,
248 zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL);
250 zcp->zc_id = ZFSCTL_INO_ROOT;
252 VERIFY(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp) == 0);
253 VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
254 &crtime, sizeof (crtime)));
255 ZFS_TIME_DECODE(&zcp->zc_cmtime, crtime);
259 * We're only faking the fact that we have a root of a filesystem for
260 * the sake of the GFS interfaces. Undo the flag manipulation it did
263 vp->v_vflag &= ~VV_ROOT;
265 zfsvfs->z_ctldir = vp;
271 * Destroy the '.zfs' directory. Only called when the filesystem is unmounted.
272 * There might still be more references if we were force unmounted, but only
273 * new zfs_inactive() calls can occur and they don't reference .zfs
276 zfsctl_destroy(zfsvfs_t *zfsvfs)
278 VN_RELE(zfsvfs->z_ctldir);
279 zfsvfs->z_ctldir = NULL;
283 * Given a root znode, retrieve the associated .zfs directory.
284 * Add a hold to the vnode and return it.
287 zfsctl_root(znode_t *zp)
289 ASSERT(zfs_has_ctldir(zp));
290 VN_HOLD(zp->z_zfsvfs->z_ctldir);
291 return (zp->z_zfsvfs->z_ctldir);
295 * Common open routine. Disallow any write access.
299 zfsctl_common_open(struct vop_open_args *ap)
301 int flags = ap->a_mode;
310 * Common close routine. Nothing to do here.
314 zfsctl_common_close(struct vop_close_args *ap)
320 * Common access routine. Disallow writes.
324 zfsctl_common_access(ap)
325 struct vop_access_args /* {
328 struct ucred *a_cred;
332 accmode_t accmode = ap->a_accmode;
335 if (flags & V_ACE_MASK) {
336 if (accmode & ACE_ALL_WRITE_PERMS)
340 if (accmode & VWRITE)
350 * Common getattr function. Fill in basic information.
353 zfsctl_common_getattr(vnode_t *vp, vattr_t *vap)
361 * We are a purely virtual object, so we have no
362 * blocksize or allocated blocks.
367 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
368 vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
372 * We live in the now (for atime).
376 /* FreeBSD: Reset chflags(2) flags. */
382 zfsctl_common_fid(ap)
383 struct vop_fid_args /* {
388 vnode_t *vp = ap->a_vp;
389 fid_t *fidp = (void *)ap->a_fid;
390 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
391 zfsctl_node_t *zcp = vp->v_data;
392 uint64_t object = zcp->zc_id;
398 fidp->fid_len = SHORT_FID_LEN;
400 zfid = (zfid_short_t *)fidp;
402 zfid->zf_len = SHORT_FID_LEN;
404 for (i = 0; i < sizeof (zfid->zf_object); i++)
405 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
407 /* .zfs znodes always have a generation number of 0 */
408 for (i = 0; i < sizeof (zfid->zf_gen); i++)
418 zfsctl_shares_fid(ap)
419 struct vop_fid_args /* {
424 vnode_t *vp = ap->a_vp;
425 fid_t *fidp = (void *)ap->a_fid;
426 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
432 if (zfsvfs->z_shares_dir == 0) {
437 if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
438 error = VOP_FID(ZTOV(dzp), fidp);
447 zfsctl_common_reclaim(ap)
448 struct vop_reclaim_args /* {
453 vnode_t *vp = ap->a_vp;
456 * Destroy the vm object and flush associated pages.
458 vnode_destroy_vobject(vp);
466 * .zfs inode namespace
468 * We need to generate unique inode numbers for all files and directories
469 * within the .zfs pseudo-filesystem. We use the following scheme:
474 * .zfs/snapshot/<snap> objectid(snap)
477 #define ZFSCTL_INO_SNAP(id) (id)
480 * Get root directory attributes.
484 zfsctl_root_getattr(ap)
485 struct vop_getattr_args /* {
488 struct ucred *a_cred;
491 struct vnode *vp = ap->a_vp;
492 struct vattr *vap = ap->a_vap;
493 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
494 zfsctl_node_t *zcp = vp->v_data;
497 vap->va_nodeid = ZFSCTL_INO_ROOT;
498 vap->va_nlink = vap->va_size = NROOT_ENTRIES;
499 vap->va_mtime = vap->va_ctime = zcp->zc_cmtime;
500 vap->va_birthtime = vap->va_ctime;
502 zfsctl_common_getattr(vp, vap);
509 * Special case the handling of "..".
513 zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
514 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
515 int *direntflags, pathname_t *realpnp)
517 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
521 * No extended attributes allowed under .zfs
523 if (flags & LOOKUP_XATTR)
528 if (strcmp(nm, "..") == 0) {
529 err = VFS_ROOT(dvp->v_vfsp, LK_EXCLUSIVE, vpp);
533 err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir,
534 cr, ct, direntflags, realpnp);
544 zfsctl_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
545 caller_context_t *ct)
548 * We only care about ACL_ENABLED so that libsec can
549 * display ACL correctly and not default to POSIX draft.
551 if (cmd == _PC_ACL_ENABLED) {
552 *valp = _ACL_ACE_ENABLED;
556 return (fs_pathconf(vp, cmd, valp, cr, ct));
561 static const fs_operation_def_t zfsctl_tops_root[] = {
562 { VOPNAME_OPEN, { .vop_open = zfsctl_common_open } },
563 { VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } },
564 { VOPNAME_IOCTL, { .error = fs_inval } },
565 { VOPNAME_GETATTR, { .vop_getattr = zfsctl_root_getattr } },
566 { VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } },
567 { VOPNAME_READDIR, { .vop_readdir = gfs_vop_readdir } },
568 { VOPNAME_LOOKUP, { .vop_lookup = zfsctl_root_lookup } },
569 { VOPNAME_SEEK, { .vop_seek = fs_seek } },
570 { VOPNAME_INACTIVE, { .vop_inactive = gfs_vop_inactive } },
571 { VOPNAME_PATHCONF, { .vop_pathconf = zfsctl_pathconf } },
572 { VOPNAME_FID, { .vop_fid = zfsctl_common_fid } },
578 * Special case the handling of "..".
582 zfsctl_freebsd_root_lookup(ap)
583 struct vop_lookup_args /* {
585 struct vnode **a_vpp;
586 struct componentname *a_cnp;
589 vnode_t *dvp = ap->a_dvp;
590 vnode_t **vpp = ap->a_vpp;
591 cred_t *cr = ap->a_cnp->cn_cred;
592 int flags = ap->a_cnp->cn_flags;
593 int nameiop = ap->a_cnp->cn_nameiop;
594 char nm[NAME_MAX + 1];
597 if ((flags & ISLASTCN) && (nameiop == RENAME || nameiop == CREATE))
600 ASSERT(ap->a_cnp->cn_namelen < sizeof(nm));
601 strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);
603 err = zfsctl_root_lookup(dvp, nm, vpp, NULL, 0, NULL, cr, NULL, NULL, NULL);
604 if (err == 0 && (nm[0] != '.' || nm[1] != '\0'))
605 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
609 static struct vop_vector zfsctl_ops_root = {
610 .vop_default = &default_vnodeops,
611 .vop_open = zfsctl_common_open,
612 .vop_close = zfsctl_common_close,
613 .vop_ioctl = VOP_EINVAL,
614 .vop_getattr = zfsctl_root_getattr,
615 .vop_access = zfsctl_common_access,
616 .vop_readdir = gfs_vop_readdir,
617 .vop_lookup = zfsctl_freebsd_root_lookup,
618 .vop_inactive = gfs_vop_inactive,
619 .vop_reclaim = zfsctl_common_reclaim,
621 .vop_pathconf = zfsctl_pathconf,
623 .vop_fid = zfsctl_common_fid,
627 zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname)
629 objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
631 if (snapshot_namecheck(name, NULL, NULL) != 0)
633 dmu_objset_name(os, zname);
634 if (strlen(zname) + 1 + strlen(name) >= len)
635 return (ENAMETOOLONG);
636 (void) strcat(zname, "@");
637 (void) strcat(zname, name);
642 zfsctl_unmount_snap(zfs_snapentry_t *sep, int fflags, cred_t *cr)
644 vnode_t *svp = sep->se_root;
647 ASSERT(vn_ismntpt(svp));
649 /* this will be dropped by dounmount() */
650 if ((error = vn_vfswlock(svp)) != 0)
655 error = dounmount(vn_mountedvfs(svp), fflags, cr);
662 * We can't use VN_RELE(), as that will try to invoke
663 * zfsctl_snapdir_inactive(), which would cause us to destroy
664 * the sd_lock mutex held by our caller.
666 ASSERT(svp->v_count == 1);
667 gfs_vop_inactive(svp, cr, NULL);
669 kmem_free(sep->se_name, strlen(sep->se_name) + 1);
670 kmem_free(sep, sizeof (zfs_snapentry_t));
674 return (dounmount(vn_mountedvfs(svp), fflags, curthread));
680 zfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm)
685 char newpath[MAXNAMELEN];
688 ASSERT(MUTEX_HELD(&sdp->sd_lock));
691 vfsp = vn_mountedvfs(sep->se_root);
692 ASSERT(vfsp != NULL);
697 * Change the name in the AVL tree.
699 avl_remove(&sdp->sd_snaps, sep);
700 kmem_free(sep->se_name, strlen(sep->se_name) + 1);
701 sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
702 (void) strcpy(sep->se_name, nm);
703 VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL);
704 avl_insert(&sdp->sd_snaps, sep, where);
707 * Change the current mountpoint info:
708 * - update the tail of the mntpoint path
709 * - update the tail of the resource path
711 pathref = vfs_getmntpoint(vfsp);
712 (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
713 VERIFY((tail = strrchr(newpath, '/')) != NULL);
715 ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
716 (void) strcat(newpath, nm);
717 refstr_rele(pathref);
718 vfs_setmntpoint(vfsp, newpath, 0);
720 pathref = vfs_getresource(vfsp);
721 (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
722 VERIFY((tail = strrchr(newpath, '@')) != NULL);
724 ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
725 (void) strcat(newpath, nm);
726 refstr_rele(pathref);
727 vfs_setresource(vfsp, newpath, 0);
736 zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
737 cred_t *cr, caller_context_t *ct, int flags)
739 zfsctl_snapdir_t *sdp = sdvp->v_data;
740 zfs_snapentry_t search, *sep;
743 char from[MAXNAMELEN], to[MAXNAMELEN];
744 char real[MAXNAMELEN];
747 zfsvfs = sdvp->v_vfsp->vfs_data;
750 if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
751 err = dmu_snapshot_realname(zfsvfs->z_os, snm, real,
755 } else if (err != ENOTSUP) {
763 err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from);
765 err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to);
767 err = zfs_secpolicy_rename_perms(from, to, cr);
772 * Cannot move snapshots out of the snapdir.
777 if (strcmp(snm, tnm) == 0)
780 mutex_enter(&sdp->sd_lock);
782 search.se_name = (char *)snm;
783 if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) {
784 mutex_exit(&sdp->sd_lock);
788 err = dmu_objset_rename(from, to, B_FALSE);
790 zfsctl_rename_snap(sdp, sep, tnm);
792 mutex_exit(&sdp->sd_lock);
801 zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
802 caller_context_t *ct, int flags)
804 zfsctl_snapdir_t *sdp = dvp->v_data;
805 zfs_snapentry_t *sep;
806 zfs_snapentry_t search;
808 char snapname[MAXNAMELEN];
809 char real[MAXNAMELEN];
812 zfsvfs = dvp->v_vfsp->vfs_data;
815 if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
817 err = dmu_snapshot_realname(zfsvfs->z_os, name, real,
821 } else if (err != ENOTSUP) {
829 err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname);
831 err = zfs_secpolicy_destroy_perms(snapname, cr);
835 mutex_enter(&sdp->sd_lock);
837 search.se_name = name;
838 sep = avl_find(&sdp->sd_snaps, &search, NULL);
840 avl_remove(&sdp->sd_snaps, sep);
841 err = zfsctl_unmount_snap(sep, MS_FORCE, cr);
845 if (avl_find(&sdp->sd_snaps, sep, &where) == NULL)
846 avl_insert(&sdp->sd_snaps, sep, where);
848 err = dmu_objset_destroy(snapname, B_FALSE);
853 mutex_exit(&sdp->sd_lock);
860 * This creates a snapshot under '.zfs/snapshot'.
864 zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp,
865 cred_t *cr, caller_context_t *cc, int flags, vsecattr_t *vsecp)
867 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
868 char name[MAXNAMELEN];
870 static enum symfollow follow = NO_FOLLOW;
871 static enum uio_seg seg = UIO_SYSSPACE;
873 if (snapshot_namecheck(dirname, NULL, NULL) != 0)
876 dmu_objset_name(zfsvfs->z_os, name);
880 err = zfs_secpolicy_snapshot_perms(name, cr);
885 err = dmu_objset_snapshot(name, dirname, NULL, NULL,
886 B_FALSE, B_FALSE, -1);
889 err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp);
896 zfsctl_freebsd_snapdir_mkdir(ap)
897 struct vop_mkdir_args /* {
899 struct vnode **a_vpp;
900 struct componentname *a_cnp;
905 ASSERT(ap->a_cnp->cn_flags & SAVENAME);
907 return (zfsctl_snapdir_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, NULL,
908 ap->a_vpp, ap->a_cnp->cn_cred, NULL, 0, NULL));
912 * Lookup entry point for the 'snapshot' directory. Try to open the
913 * snapshot if it exist, creating the pseudo filesystem vnode as necessary.
914 * Perform a mount of the associated dataset on top of the vnode.
918 zfsctl_snapdir_lookup(ap)
919 struct vop_lookup_args /* {
921 struct vnode **a_vpp;
922 struct componentname *a_cnp;
925 vnode_t *dvp = ap->a_dvp;
926 vnode_t **vpp = ap->a_vpp;
927 struct componentname *cnp = ap->a_cnp;
928 char nm[NAME_MAX + 1];
929 zfsctl_snapdir_t *sdp = dvp->v_data;
931 char snapname[MAXNAMELEN];
932 char real[MAXNAMELEN];
934 zfs_snapentry_t *sep, search;
935 size_t mountpoint_len;
937 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
942 * No extended attributes allowed under .zfs
944 if (flags & LOOKUP_XATTR)
946 ASSERT(ap->a_cnp->cn_namelen < sizeof(nm));
947 strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);
949 ASSERT(dvp->v_type == VDIR);
954 * If we get a recursive call, that means we got called
955 * from the domount() code while it was trying to look up the
956 * spec (which looks like a local path for zfs). We need to
957 * add some flag to domount() to tell it not to do this lookup.
959 if (MUTEX_HELD(&sdp->sd_lock))
964 if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
969 if (flags & FIGNORECASE) {
970 boolean_t conflict = B_FALSE;
972 err = dmu_snapshot_realname(zfsvfs->z_os, nm, real,
973 MAXNAMELEN, &conflict);
975 strlcpy(nm, real, sizeof(nm));
976 } else if (err != ENOTSUP) {
982 (void) strlcpy(realpnp->pn_buf, nm,
983 realpnp->pn_bufsize);
984 if (conflict && direntflags)
985 *direntflags = ED_CASE_CONFLICT;
989 mutex_enter(&sdp->sd_lock);
990 search.se_name = (char *)nm;
991 if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) {
994 err = traverse(vpp, LK_EXCLUSIVE | LK_RETRY);
998 } else if (*vpp == sep->se_root) {
1000 * The snapshot was unmounted behind our backs,
1001 * try to remount it.
1006 * VROOT was set during the traverse call. We need
1007 * to clear it since we're pretending to be part
1008 * of our parent's vfs.
1010 (*vpp)->v_flag &= ~VROOT;
1012 mutex_exit(&sdp->sd_lock);
1018 * The requested snapshot is not currently mounted, look it up.
1020 err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname);
1022 mutex_exit(&sdp->sd_lock);
1025 * handle "ls *" or "?" in a graceful manner,
1026 * forcing EILSEQ to ENOENT.
1027 * Since shell ultimately passes "*" or "?" as name to lookup
1029 return (err == EILSEQ ? ENOENT : err);
1031 if (dmu_objset_hold(snapname, FTAG, &snap) != 0) {
1032 mutex_exit(&sdp->sd_lock);
1033 /* Translate errors and add SAVENAME when needed. */
1034 if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE) {
1036 cnp->cn_flags |= SAVENAME;
1044 sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP);
1045 sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
1046 (void) strcpy(sep->se_name, nm);
1047 *vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap));
1049 avl_insert(&sdp->sd_snaps, sep, where);
1051 dmu_objset_rele(snap, FTAG);
1053 mountpoint_len = strlen(dvp->v_vfsp->mnt_stat.f_mntonname) +
1054 strlen("/" ZFS_CTLDIR_NAME "/snapshot/") + strlen(nm) + 1;
1055 mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP);
1056 (void) snprintf(mountpoint, mountpoint_len,
1057 "%s/" ZFS_CTLDIR_NAME "/snapshot/%s",
1058 dvp->v_vfsp->mnt_stat.f_mntonname, nm);
1059 err = mount_snapshot(curthread, vpp, "zfs", mountpoint, snapname, 0);
1060 kmem_free(mountpoint, mountpoint_len);
1063 * Fix up the root vnode mounted on .zfs/snapshot/<snapname>.
1065 * This is where we lie about our v_vfsp in order to
1066 * make .zfs/snapshot/<snapname> accessible over NFS
1067 * without requiring manual mounts of <snapname>.
1069 ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);
1070 VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
1072 mutex_exit(&sdp->sd_lock);
1081 zfsctl_shares_lookup(ap)
1082 struct vop_lookup_args /* {
1083 struct vnode *a_dvp;
1084 struct vnode **a_vpp;
1085 struct componentname *a_cnp;
1088 vnode_t *dvp = ap->a_dvp;
1089 vnode_t **vpp = ap->a_vpp;
1090 struct componentname *cnp = ap->a_cnp;
1091 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
1092 char nm[NAME_MAX + 1];
1098 ASSERT(cnp->cn_namelen < sizeof(nm));
1099 strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1);
1101 if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
1106 if (zfsvfs->z_shares_dir == 0) {
1110 if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0)
1111 error = VOP_LOOKUP(ZTOV(dzp), vpp, cnp);
1121 zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp,
1122 offset_t *offp, offset_t *nextp, void *data, int flags)
1124 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1125 char snapname[MAXNAMELEN];
1126 uint64_t id, cookie;
1127 boolean_t case_conflict;
1133 error = dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id,
1134 &cookie, &case_conflict);
1137 if (error == ENOENT) {
1144 if (flags & V_RDDIR_ENTFLAGS) {
1145 edirent_t *eodp = dp;
1147 (void) strcpy(eodp->ed_name, snapname);
1148 eodp->ed_ino = ZFSCTL_INO_SNAP(id);
1149 eodp->ed_eflags = case_conflict ? ED_CASE_CONFLICT : 0;
1151 struct dirent64 *odp = dp;
1153 (void) strcpy(odp->d_name, snapname);
1154 odp->d_ino = ZFSCTL_INO_SNAP(id);
1165 zfsctl_shares_readdir(ap)
1166 struct vop_readdir_args /* {
1169 struct ucred *a_cred;
1175 vnode_t *vp = ap->a_vp;
1176 uio_t *uiop = ap->a_uio;
1177 cred_t *cr = ap->a_cred;
1178 int *eofp = ap->a_eofflag;
1179 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1185 if (zfsvfs->z_shares_dir == 0) {
1189 if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
1190 vn_lock(ZTOV(dzp), LK_SHARED | LK_RETRY);
1191 error = VOP_READDIR(ZTOV(dzp), uiop, cr, eofp, ap->a_ncookies, ap->a_cookies);
1192 VN_URELE(ZTOV(dzp));
1203 * pvp is the '.zfs' directory (zfsctl_node_t).
1204 * Creates vp, which is '.zfs/snapshot' (zfsctl_snapdir_t).
1206 * This function is the callback to create a GFS vnode for '.zfs/snapshot'
1207 * when a lookup is performed on .zfs for "snapshot".
1210 zfsctl_mknode_snapdir(vnode_t *pvp)
1213 zfsctl_snapdir_t *sdp;
1215 vp = gfs_dir_create(sizeof (zfsctl_snapdir_t), pvp, pvp->v_vfsp,
1216 &zfsctl_ops_snapdir, NULL, NULL, MAXNAMELEN,
1217 zfsctl_snapdir_readdir_cb, NULL);
1219 sdp->sd_node.zc_id = ZFSCTL_INO_SNAPDIR;
1220 sdp->sd_node.zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
1221 mutex_init(&sdp->sd_lock, NULL, MUTEX_DEFAULT, NULL);
1222 avl_create(&sdp->sd_snaps, snapentry_compare,
1223 sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node));
1229 zfsctl_mknode_shares(vnode_t *pvp)
1234 vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp, pvp->v_vfsp,
1235 &zfsctl_ops_shares, NULL, NULL, MAXNAMELEN,
1238 sdp->zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
1246 zfsctl_shares_getattr(ap)
1247 struct vop_getattr_args /* {
1249 struct vattr *a_vap;
1250 struct ucred *a_cred;
1251 struct thread *a_td;
1254 vnode_t *vp = ap->a_vp;
1255 vattr_t *vap = ap->a_vap;
1256 cred_t *cr = ap->a_cred;
1257 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1262 if (zfsvfs->z_shares_dir == 0) {
1266 if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
1267 vn_lock(ZTOV(dzp), LK_SHARED | LK_RETRY);
1268 error = VOP_GETATTR(ZTOV(dzp), vap, cr);
1269 VN_URELE(ZTOV(dzp));
1279 zfsctl_snapdir_getattr(ap)
1280 struct vop_getattr_args /* {
1282 struct vattr *a_vap;
1283 struct ucred *a_cred;
1286 vnode_t *vp = ap->a_vp;
1287 vattr_t *vap = ap->a_vap;
1288 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1289 zfsctl_snapdir_t *sdp = vp->v_data;
1292 zfsctl_common_getattr(vp, vap);
1293 vap->va_nodeid = gfs_file_inode(vp);
1294 vap->va_nlink = vap->va_size = avl_numnodes(&sdp->sd_snaps) + 2;
1295 vap->va_ctime = vap->va_mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
1296 vap->va_birthtime = vap->va_ctime;
1304 zfsctl_snapdir_inactive(ap)
1305 struct vop_inactive_args /* {
1307 struct thread *a_td;
1310 vnode_t *vp = ap->a_vp;
1311 zfsctl_snapdir_t *sdp = vp->v_data;
1312 zfs_snapentry_t *sep;
1315 * On forced unmount we have to free snapshots from here.
1317 mutex_enter(&sdp->sd_lock);
1318 while ((sep = avl_first(&sdp->sd_snaps)) != NULL) {
1319 avl_remove(&sdp->sd_snaps, sep);
1320 kmem_free(sep->se_name, strlen(sep->se_name) + 1);
1321 kmem_free(sep, sizeof (zfs_snapentry_t));
1323 mutex_exit(&sdp->sd_lock);
1324 gfs_dir_inactive(vp);
1325 ASSERT(avl_numnodes(&sdp->sd_snaps) == 0);
1326 mutex_destroy(&sdp->sd_lock);
1327 avl_destroy(&sdp->sd_snaps);
1328 kmem_free(sdp, sizeof (zfsctl_snapdir_t));
1334 static const fs_operation_def_t zfsctl_tops_snapdir[] = {
1335 { VOPNAME_OPEN, { .vop_open = zfsctl_common_open } },
1336 { VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } },
1337 { VOPNAME_IOCTL, { .error = fs_inval } },
1338 { VOPNAME_GETATTR, { .vop_getattr = zfsctl_snapdir_getattr } },
1339 { VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } },
1340 { VOPNAME_RENAME, { .vop_rename = zfsctl_snapdir_rename } },
1341 { VOPNAME_RMDIR, { .vop_rmdir = zfsctl_snapdir_remove } },
1342 { VOPNAME_MKDIR, { .vop_mkdir = zfsctl_snapdir_mkdir } },
1343 { VOPNAME_READDIR, { .vop_readdir = gfs_vop_readdir } },
1344 { VOPNAME_LOOKUP, { .vop_lookup = zfsctl_snapdir_lookup } },
1345 { VOPNAME_SEEK, { .vop_seek = fs_seek } },
1346 { VOPNAME_INACTIVE, { .vop_inactive = zfsctl_snapdir_inactive } },
1347 { VOPNAME_FID, { .vop_fid = zfsctl_common_fid } },
1351 static const fs_operation_def_t zfsctl_tops_shares[] = {
1352 { VOPNAME_OPEN, { .vop_open = zfsctl_common_open } },
1353 { VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } },
1354 { VOPNAME_IOCTL, { .error = fs_inval } },
1355 { VOPNAME_GETATTR, { .vop_getattr = zfsctl_shares_getattr } },
1356 { VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } },
1357 { VOPNAME_READDIR, { .vop_readdir = zfsctl_shares_readdir } },
1358 { VOPNAME_LOOKUP, { .vop_lookup = zfsctl_shares_lookup } },
1359 { VOPNAME_SEEK, { .vop_seek = fs_seek } },
1360 { VOPNAME_INACTIVE, { .vop_inactive = gfs_vop_inactive } },
1361 { VOPNAME_FID, { .vop_fid = zfsctl_shares_fid } },
1365 static struct vop_vector zfsctl_ops_snapdir = {
1366 .vop_default = &default_vnodeops,
1367 .vop_open = zfsctl_common_open,
1368 .vop_close = zfsctl_common_close,
1369 .vop_ioctl = VOP_EINVAL,
1370 .vop_getattr = zfsctl_snapdir_getattr,
1371 .vop_access = zfsctl_common_access,
1372 .vop_mkdir = zfsctl_freebsd_snapdir_mkdir,
1373 .vop_readdir = gfs_vop_readdir,
1374 .vop_lookup = zfsctl_snapdir_lookup,
1375 .vop_inactive = zfsctl_snapdir_inactive,
1376 .vop_reclaim = zfsctl_common_reclaim,
1377 .vop_fid = zfsctl_common_fid,
1380 static struct vop_vector zfsctl_ops_shares = {
1381 .vop_default = &default_vnodeops,
1382 .vop_open = zfsctl_common_open,
1383 .vop_close = zfsctl_common_close,
1384 .vop_ioctl = VOP_EINVAL,
1385 .vop_getattr = zfsctl_shares_getattr,
1386 .vop_access = zfsctl_common_access,
1387 .vop_readdir = zfsctl_shares_readdir,
1388 .vop_lookup = zfsctl_shares_lookup,
1389 .vop_inactive = gfs_vop_inactive,
1390 .vop_reclaim = zfsctl_common_reclaim,
1391 .vop_fid = zfsctl_shares_fid,
1396 * pvp is the GFS vnode '.zfs/snapshot'.
1398 * This creates a GFS node under '.zfs/snapshot' representing each
1399 * snapshot. This newly created GFS node is what we mount snapshot
1403 zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset)
1408 vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp, pvp->v_vfsp,
1409 &zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL);
1412 zcp->zc_id = objset;
1419 zfsctl_snapshot_inactive(ap)
1420 struct vop_inactive_args /* {
1422 struct thread *a_td;
1425 vnode_t *vp = ap->a_vp;
1426 cred_t *cr = ap->a_td->td_ucred;
1427 struct vop_inactive_args iap;
1428 zfsctl_snapdir_t *sdp;
1429 zfs_snapentry_t *sep, *next;
1433 if (vp->v_count > 0)
1436 VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0);
1440 if (!(locked = MUTEX_HELD(&sdp->sd_lock)))
1441 mutex_enter(&sdp->sd_lock);
1443 ASSERT(!vn_ismntpt(vp));
1445 sep = avl_first(&sdp->sd_snaps);
1446 while (sep != NULL) {
1447 next = AVL_NEXT(&sdp->sd_snaps, sep);
1449 if (sep->se_root == vp) {
1450 avl_remove(&sdp->sd_snaps, sep);
1451 kmem_free(sep->se_name, strlen(sep->se_name) + 1);
1452 kmem_free(sep, sizeof (zfs_snapentry_t));
1457 ASSERT(sep != NULL);
1460 mutex_exit(&sdp->sd_lock);
1465 * Dispose of the vnode for the snapshot mount point.
1466 * This is safe to do because once this entry has been removed
1467 * from the AVL tree, it can't be found again, so cannot become
1468 * "active". If we lookup the same name again we will end up
1469 * creating a new vnode.
1472 return (gfs_vop_inactive(&iap));
1476 zfsctl_traverse_begin(vnode_t **vpp, int lktype)
1480 /* Snapshot should be already mounted, but just in case. */
1481 if (vn_mountedvfs(*vpp) == NULL)
1483 return (traverse(vpp, lktype));
1487 zfsctl_traverse_end(vnode_t *vp, int err)
1497 zfsctl_snapshot_getattr(ap)
1498 struct vop_getattr_args /* {
1500 struct vattr *a_vap;
1501 struct ucred *a_cred;
1504 vnode_t *vp = ap->a_vp;
1507 err = zfsctl_traverse_begin(&vp, LK_SHARED | LK_RETRY);
1509 err = VOP_GETATTR(vp, ap->a_vap, ap->a_cred);
1510 zfsctl_traverse_end(vp, err);
1515 zfsctl_snapshot_fid(ap)
1516 struct vop_fid_args /* {
1521 vnode_t *vp = ap->a_vp;
1524 err = zfsctl_traverse_begin(&vp, LK_SHARED | LK_RETRY);
1526 err = VOP_VPTOFH(vp, (void *)ap->a_fid);
1527 zfsctl_traverse_end(vp, err);
1532 zfsctl_snapshot_lookup(ap)
1533 struct vop_lookup_args /* {
1534 struct vnode *a_dvp;
1535 struct vnode **a_vpp;
1536 struct componentname *a_cnp;
1539 vnode_t *dvp = ap->a_dvp;
1540 vnode_t **vpp = ap->a_vpp;
1541 struct componentname *cnp = ap->a_cnp;
1542 cred_t *cr = ap->a_cnp->cn_cred;
1543 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
1546 if (cnp->cn_namelen != 2 || cnp->cn_nameptr[0] != '.' ||
1547 cnp->cn_nameptr[1] != '.') {
1551 ASSERT(dvp->v_type == VDIR);
1552 ASSERT(zfsvfs->z_ctldir != NULL);
1554 error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", vpp,
1555 NULL, 0, NULL, cr, NULL, NULL, NULL);
1557 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
1562 zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap)
1564 zfsvfs_t *zfsvfs = ap->a_vp->v_vfsp->vfs_data;
1566 zfsctl_snapdir_t *sdp;
1567 zfs_snapentry_t *sep;
1570 ASSERT(zfsvfs->z_ctldir != NULL);
1571 error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
1572 NULL, 0, NULL, kcred, NULL, NULL, NULL);
1577 mutex_enter(&sdp->sd_lock);
1578 sep = avl_first(&sdp->sd_snaps);
1579 while (sep != NULL) {
1583 sep = AVL_NEXT(&sdp->sd_snaps, sep);
1586 mutex_exit(&sdp->sd_lock);
1591 len = strlen(sep->se_name);
1592 *ap->a_buflen -= len;
1593 bcopy(sep->se_name, ap->a_buf + *ap->a_buflen, len);
1594 mutex_exit(&sdp->sd_lock);
1604 * These VP's should never see the light of day. They should always
1607 static struct vop_vector zfsctl_ops_snapshot = {
1608 .vop_default = &default_vnodeops,
1609 .vop_inactive = zfsctl_snapshot_inactive,
1610 .vop_lookup = zfsctl_snapshot_lookup,
1611 .vop_reclaim = zfsctl_common_reclaim,
1612 .vop_getattr = zfsctl_snapshot_getattr,
1613 .vop_fid = zfsctl_snapshot_fid,
1614 .vop_vptocnp = zfsctl_snapshot_vptocnp,
1618 zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
1620 zfsvfs_t *zfsvfs = vfsp->vfs_data;
1622 zfsctl_snapdir_t *sdp;
1624 zfs_snapentry_t *sep;
1627 ASSERT(zfsvfs->z_ctldir != NULL);
1628 error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
1629 NULL, 0, NULL, kcred, NULL, NULL, NULL);
1634 mutex_enter(&sdp->sd_lock);
1635 sep = avl_first(&sdp->sd_snaps);
1636 while (sep != NULL) {
1639 if (zcp->zc_id == objsetid)
1642 sep = AVL_NEXT(&sdp->sd_snaps, sep);
1648 * Return the mounted root rather than the covered mount point.
1649 * Takes the GFS vnode at .zfs/snapshot/<snapshot objsetid>
1650 * and returns the ZFS vnode mounted on top of the GFS node.
1651 * This ZFS vnode is the root of the vfs for objset 'objsetid'.
1653 error = traverse(&vp, LK_SHARED | LK_RETRY);
1655 if (vp == sep->se_root)
1658 *zfsvfsp = VTOZ(vp)->z_zfsvfs;
1660 mutex_exit(&sdp->sd_lock);
1667 mutex_exit(&sdp->sd_lock);
1676 * Unmount any snapshots for the given filesystem. This is called from
1677 * zfs_umount() - if we have a ctldir, then go through and unmount all the
1681 zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
1683 zfsvfs_t *zfsvfs = vfsp->vfs_data;
1685 zfsctl_snapdir_t *sdp;
1686 zfs_snapentry_t *sep, *next;
1689 ASSERT(zfsvfs->z_ctldir != NULL);
1690 error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
1691 NULL, 0, NULL, cr, NULL, NULL, NULL);
1696 mutex_enter(&sdp->sd_lock);
1698 sep = avl_first(&sdp->sd_snaps);
1699 while (sep != NULL) {
1700 next = AVL_NEXT(&sdp->sd_snaps, sep);
1703 * If this snapshot is not mounted, then it must
1704 * have just been unmounted by somebody else, and
1705 * will be cleaned up by zfsctl_snapdir_inactive().
1707 if (vn_ismntpt(sep->se_root)) {
1708 error = zfsctl_unmount_snap(sep, fflags, cr);
1713 * Before reinserting snapshot to the tree,
1714 * check if it was actually removed. For example
1715 * when snapshot mount point is busy, we will
1716 * have an error here, but there will be no need
1717 * to reinsert snapshot.
1719 if (avl_find(&sdp->sd_snaps, sep, &where) == NULL)
1720 avl_insert(&sdp->sd_snaps, sep, where);
1727 mutex_exit(&sdp->sd_lock);