4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * Portions of this source code were derived from Berkeley 4.3 BSD
31 * under license from the Regents of the University of California.
37 #pragma ident "%Z%%M% %I% %E% SMI"
39 #include <sys/types.h>
40 #include <sys/t_lock.h>
42 #include <sys/vnode.h>
43 #include <sys/statvfs.h>
44 #include <sys/refstr.h>
52 * Data associated with mounted file systems.
56 * Operations vector. This is used internal to the kernel; file systems
57 * supply their list of operations via vfs_setfsops().
60 typedef struct vfsops vfsops_t;
63 * File system identifier. Should be unique (at least per machine).
66 int val[2]; /* file system id type */
70 * File identifier. Should be unique per filesystem on a single
71 * machine. This is typically called by a stateless file server
72 * in order to generate "file handles".
74 * Do not change the definition of struct fid ... fid_t without
75 * letting the CacheFS group know about it! They will have to do at
76 * least two things, in the same change that changes this structure:
77 * 1. change CFSVERSION in usr/src/uts/common/sys/fs/cachefs_fs.h
78 * 2. put the old version # in the canupgrade array
79 * in cachfs_upgrade() in usr/src/cmd/fs.d/cachefs/fsck/fsck.c
80 * This is necessary because CacheFS stores FIDs on disk.
82 * Many underlying file systems cast a struct fid into other
83 * file system dependent structures which may require 4 byte alignment.
84 * Because a fid starts with a short it may not be 4 byte aligned, the
85 * fid_pad will force the alignment.
88 #define OLD_MAXFIDSZ 16
94 ushort_t len; /* length of data in bytes */
95 char data[MAXFIDSZ]; /* data (variable len) */
102 * Solaris 64 - use old-style cache format with 32-bit aligned fid for on-disk
103 * struct compatibility.
105 typedef struct fid32 {
109 uint16_t len; /* length of data in bytes */
110 char data[MAXFIDSZ]; /* data (variable len) */
114 #else /* not _SYSCALL32 */
116 typedef fid_t fid32_t;
117 #endif /* _SYSCALL32 */
119 #define fid_len un._fid.len
120 #define fid_data un._fid.data
123 * Structure defining a mount option for a filesystem.
124 * option names are found in mntent.h
126 typedef struct mntopt {
127 char *mo_name; /* option name */
128 char **mo_cancel; /* list of options cancelled by this one */
129 char *mo_arg; /* argument string for this option */
130 int mo_flags; /* flags for this mount option */
131 void *mo_data; /* filesystem specific data */
135 * Flags that apply to mount options
138 #define MO_SET 0x01 /* option is set */
139 #define MO_NODISPLAY 0x02 /* option not listed in mnttab */
140 #define MO_HASVALUE 0x04 /* option takes a value */
141 #define MO_IGNORE 0x08 /* option ignored by parser */
142 #define MO_DEFAULT MO_SET /* option is on by default */
143 #define MO_TAG 0x10 /* flags a tag set by user program */
144 #define MO_EMPTY 0x20 /* empty space in option table */
146 #define VFS_NOFORCEOPT 0x01 /* honor MO_IGNORE (don't set option) */
147 #define VFS_DISPLAY 0x02 /* Turn off MO_NODISPLAY bit for opt */
148 #define VFS_NODISPLAY 0x04 /* Turn on MO_NODISPLAY bit for opt */
149 #define VFS_CREATEOPT 0x08 /* Create the opt if it's not there */
152 * Structure holding mount option strings for the mounted file system.
154 typedef struct mntopts {
155 uint_t mo_count; /* number of entries in table */
156 mntopt_t *mo_list; /* list of mount options */
160 * The kstat structures associated with the vopstats are kept in an
161 * AVL tree. This is to avoid the case where a file system does not
162 * use a unique fsid_t for each vfs (e.g., namefs). In order to do
163 * this, we need a structure that the AVL tree can use that also
164 * references the kstat.
165 * Note that the vks_fsid is generated from the value reported by
168 typedef struct vskstat_anchor {
169 avl_node_t vsk_node; /* Required for use by AVL routines */
170 kstat_t *vsk_ksp; /* kstat structure for vopstats */
171 ulong_t vsk_fsid; /* fsid associated w/this FS */
174 extern avl_tree_t vskstat_tree;
175 extern kmutex_t vskstat_tree_lock;
178 * Structure per mounted file system. Each mounted file system has
179 * an array of operations and an instance record.
181 * The file systems are kept on a doubly linked circular list headed by
183 * File system implementations should not access this list;
184 * it's intended for use only in the kernel's vfs layer.
186 * Each zone also has its own list of mounts, containing filesystems mounted
187 * somewhere within the filesystem tree rooted at the zone's rootpath. The
188 * list is doubly linked to match the global list.
190 * mnttab locking: the in-kernel mnttab uses the vfs_mntpt, vfs_resource and
191 * vfs_mntopts fields in the vfs_t. mntpt and resource are refstr_ts that
192 * are set at mount time and can only be modified during a remount.
193 * It is safe to read these fields if you can prevent a remount on the vfs,
194 * or through the convenience funcs vfs_getmntpoint() and vfs_getresource().
195 * The mntopts field may only be accessed through the provided convenience
196 * functions, as it is protected by the vfs list lock. Modifying a mount
197 * option requires grabbing the vfs list write lock, which can be a very
200 struct zone; /* from zone.h */
201 struct fem_head; /* from fem.h */
204 * Private vfs data, NOT to be used by a file system implementation.
206 typedef struct vfs_impl {
207 struct fem_head *vi_femhead; /* fs monitoring */
209 * Support for statistics on the vnode operations
211 vsk_anchor_t *vi_vskap; /* anchor for vopstats' kstat */
212 vopstats_t *vi_fstypevsp; /* ptr to per-fstype vopstats */
213 vopstats_t vi_vopstats; /* per-mount vnode op stats */
217 struct vfs *vfs_next; /* next VFS in VFS list */
218 struct vfs *vfs_prev; /* prev VFS in VFS list */
220 /* vfs_op should not be used directly. Accessor functions are provided */
221 vfsops_t *vfs_op; /* operations on VFS */
223 struct vnode *vfs_vnodecovered; /* vnode mounted on */
224 uint_t vfs_flag; /* flags */
225 uint_t vfs_bsize; /* native block size */
226 int vfs_fstype; /* file system type index */
227 fsid_t vfs_fsid; /* file system id */
228 void *vfs_data; /* private data */
229 dev_t vfs_dev; /* device of mounted VFS */
230 ulong_t vfs_bcount; /* I/O count (accounting) */
231 struct vfs *vfs_list; /* sync list pointer */
232 struct vfs *vfs_hash; /* hash list pointer */
233 ksema_t vfs_reflock; /* mount/unmount/sync lock */
234 uint_t vfs_count; /* vfs reference count */
235 mntopts_t vfs_mntopts; /* options mounted with */
236 refstr_t *vfs_resource; /* mounted resource name */
237 refstr_t *vfs_mntpt; /* mount point name */
238 time_t vfs_mtime; /* time we were mounted */
239 vfs_impl_t *vfs_implp; /* impl specific data */
241 * Zones support. Note that the zone that "owns" the mount isn't
242 * necessarily the same as the zone in which the zone is visible.
243 * That is, vfs_zone and (vfs_zone_next|vfs_zone_prev) may refer to
246 struct zone *vfs_zone; /* zone that owns the mount */
247 struct vfs *vfs_zone_next; /* next VFS visible in zone */
248 struct vfs *vfs_zone_prev; /* prev VFS visible in zone */
251 #define vfs_femhead vfs_implp->vi_femhead
252 #define vfs_vskap vfs_implp->vi_vskap
253 #define vfs_fstypevsp vfs_implp->vi_fstypevsp
254 #define vfs_vopstats vfs_implp->vi_vopstats
259 #define VFS_RDONLY 0x01 /* read-only vfs */
260 #define VFS_NOMNTTAB 0x02 /* vfs not seen in mnttab */
261 #define VFS_NOSETUID 0x08 /* setuid disallowed */
262 #define VFS_REMOUNT 0x10 /* modify mount options only */
263 #define VFS_NOTRUNC 0x20 /* does not truncate long file names */
264 #define VFS_UNLINKABLE 0x40 /* unlink(2) can be applied to root */
265 #define VFS_PXFS 0x80 /* clustering: global fs proxy vfs */
266 #define VFS_UNMOUNTED 0x100 /* file system has been unmounted */
267 #define VFS_NBMAND 0x200 /* allow non-blocking mandatory locks */
268 #define VFS_XATTR 0x400 /* fs supports extended attributes */
269 #define VFS_NODEVICES 0x800 /* device-special files disallowed */
270 #define VFS_NOEXEC 0x1000 /* executables disallowed */
271 #define VFS_STATS 0x2000 /* file system can collect stats */
273 #define VFS_NORESOURCE "unspecified_resource"
274 #define VFS_NOMNTPT "unspecified_mountpoint"
277 * Argument structure for mount(2).
279 * Flags are defined in <sys/mount.h>.
281 * Note that if the MS_SYSSPACE bit is set in flags, the pointer fields in
282 * this structure are to be interpreted as kernel addresses. File systems
283 * should be prepared for this possibility.
297 * Reasons for calling the vfs_mountroot() operation.
299 enum whymountroot { ROOT_INIT, ROOT_REMOUNT, ROOT_UNMOUNT};
300 typedef enum whymountroot whymountroot_t;
303 * Reasons for calling the VFS_VNSTATE():
311 typedef enum vntrans vntrans_t;
314 * VFS_OPS defines all the vfs operations. It is used to define
315 * the vfsops structure (below) and the fs_func_p union (vfs_opreg.h).
318 int (*vfs_mount)(vfs_t *, vnode_t *, struct mounta *, cred_t *); \
319 int (*vfs_unmount)(vfs_t *, int, cred_t *); \
320 int (*vfs_root)(vfs_t *, vnode_t **); \
321 int (*vfs_statvfs)(vfs_t *, statvfs64_t *); \
322 int (*vfs_sync)(vfs_t *, short, cred_t *); \
323 int (*vfs_vget)(vfs_t *, vnode_t **, fid_t *); \
324 int (*vfs_mountroot)(vfs_t *, enum whymountroot); \
325 void (*vfs_freevfs)(vfs_t *); \
326 int (*vfs_vnstate)(vfs_t *, vnode_t *, vntrans_t) /* NB: No ";" */
329 * Operations supported on virtual file system.
332 VFS_OPS; /* Signature of all vfs operations (vfsops) */
335 extern int fsop_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *);
336 extern int fsop_unmount(vfs_t *, int, cred_t *);
337 extern int fsop_root(vfs_t *, vnode_t **);
338 extern int fsop_statfs(vfs_t *, statvfs64_t *);
339 extern int fsop_sync(vfs_t *, short, cred_t *);
340 extern int fsop_vget(vfs_t *, vnode_t **, fid_t *);
341 extern int fsop_mountroot(vfs_t *, enum whymountroot);
342 extern void fsop_freefs(vfs_t *);
343 extern int fsop_sync_by_kind(int, short, cred_t *);
344 extern int fsop_vnstate(vfs_t *, vnode_t *, vntrans_t);
346 #define VFS_MOUNT(vfsp, mvp, uap, cr) fsop_mount(vfsp, mvp, uap, cr)
347 #define VFS_UNMOUNT(vfsp, flag, cr) fsop_unmount(vfsp, flag, cr)
348 #define VFS_ROOT(vfsp, vpp) fsop_root(vfsp, vpp)
349 #define VFS_STATVFS(vfsp, sp) fsop_statfs(vfsp, sp)
350 #define VFS_SYNC(vfsp, flag, cr) fsop_sync(vfsp, flag, cr)
351 #define VFS_VGET(vfsp, vpp, fidp) fsop_vget(vfsp, vpp, fidp)
352 #define VFS_MOUNTROOT(vfsp, init) fsop_mountroot(vfsp, init)
353 #define VFS_FREEVFS(vfsp) fsop_freefs(vfsp)
354 #define VFS_VNSTATE(vfsp, vn, ns) fsop_vnstate(vfsp, vn, ns)
356 #define VFSNAME_MOUNT "mount"
357 #define VFSNAME_UNMOUNT "unmount"
358 #define VFSNAME_ROOT "root"
359 #define VFSNAME_STATVFS "statvfs"
360 #define VFSNAME_SYNC "sync"
361 #define VFSNAME_VGET "vget"
362 #define VFSNAME_MOUNTROOT "mountroot"
363 #define VFSNAME_FREEVFS "freevfs"
364 #define VFSNAME_VNSTATE "vnstate"
366 * Filesystem type switch table.
369 typedef struct vfssw {
370 char *vsw_name; /* type name -- max len _ST_FSTYPSZ */
371 int (*vsw_init) (int, char *);
372 /* init routine (for non-loadable fs only) */
373 int vsw_flag; /* flags */
374 mntopts_t vsw_optproto; /* mount options table prototype */
375 uint_t vsw_count; /* count of references */
376 kmutex_t vsw_lock; /* lock to protect vsw_count */
377 vfsops_t vsw_vfsops; /* filesystem operations vector */
381 * Filesystem type definition record. All file systems must export a record
382 * of this type through their modlfs structure.
385 typedef struct vfsdef_v3 {
386 int def_version; /* structure version, must be first */
387 char *name; /* filesystem type name */
388 int (*init) (int, char *); /* init routine */
389 int flags; /* filesystem flags */
390 mntopts_t *optproto; /* mount options table prototype */
393 typedef struct vfsdef_v3 vfsdef_t;
400 * flags for vfssw and vfsdef
402 #define VSW_HASPROTO 0x01 /* struct has a mount options prototype */
403 #define VSW_CANRWRO 0x02 /* file system can transition from rw to ro */
404 #define VSW_CANREMOUNT 0x04 /* file system supports remounts */
405 #define VSW_NOTZONESAFE 0x08 /* zone_enter(2) should fail for these files */
406 #define VSW_VOLATILEDEV 0x10 /* vfs_dev can change each time fs is mounted */
407 #define VSW_STATS 0x20 /* file system can collect stats */
409 #define VSW_INSTALLED 0x8000 /* this vsw is associated with a file system */
419 void vfs_freevfsops(vfsops_t *);
420 int vfs_freevfsops_by_type(int);
421 void vfs_setops(vfs_t *, vfsops_t *);
422 vfsops_t *vfs_getops(vfs_t *vfsp);
423 int vfs_matchops(vfs_t *, vfsops_t *);
424 int vfs_can_sync(vfs_t *vfsp);
425 void vfs_init(vfs_t *vfsp, vfsops_t *, void *);
426 void vfsimpl_setup(vfs_t *vfsp);
427 void vfsimpl_teardown(vfs_t *vfsp);
428 void vn_exists(vnode_t *);
429 void vn_idle(vnode_t *);
430 void vn_reclaim(vnode_t *);
431 void vn_invalid(vnode_t *);
434 int svm_rootconf(void);
435 int domount(char *, struct mounta *, vnode_t *, struct cred *,
437 int dounmount(struct vfs *, int, cred_t *);
438 int vfs_lock(struct vfs *);
439 int vfs_rlock(struct vfs *);
440 void vfs_lock_wait(struct vfs *);
441 void vfs_rlock_wait(struct vfs *);
442 void vfs_unlock(struct vfs *);
443 int vfs_lock_held(struct vfs *);
444 struct _kthread *vfs_lock_owner(struct vfs *);
447 void vfs_mountroot(void);
448 void vfs_add(vnode_t *, struct vfs *, int);
449 void vfs_remove(struct vfs *);
451 /* The following functions are not for general use by filesystems */
453 void vfs_createopttbl(mntopts_t *, const char *);
454 void vfs_copyopttbl(const mntopts_t *, mntopts_t *);
455 void vfs_mergeopttbl(const mntopts_t *, const mntopts_t *, mntopts_t *);
456 void vfs_freeopttbl(mntopts_t *);
457 void vfs_parsemntopts(mntopts_t *, char *, int);
458 int vfs_buildoptionstr(const mntopts_t *, char *, int);
459 struct mntopt *vfs_hasopt(const mntopts_t *, const char *);
460 void vfs_mnttab_modtimeupd(void);
462 void vfs_clearmntopt(struct vfs *, const char *);
463 void vfs_setmntopt(struct vfs *, const char *, const char *, int);
464 void vfs_setresource(struct vfs *, const char *);
465 void vfs_setmntpoint(struct vfs *, const char *);
466 refstr_t *vfs_getresource(const struct vfs *);
467 refstr_t *vfs_getmntpoint(const struct vfs *);
468 int vfs_optionisset(const struct vfs *, const char *, char **);
469 int vfs_settag(uint_t, uint_t, const char *, const char *, cred_t *);
470 int vfs_clrtag(uint_t, uint_t, const char *, const char *, cred_t *);
471 void vfs_syncall(void);
472 void vfs_syncprogress(void);
474 void vfs_unmountall(void);
475 void vfs_make_fsid(fsid_t *, dev_t, int);
476 void vfs_addmip(dev_t, struct vfs *);
477 void vfs_delmip(struct vfs *);
478 int vfs_devismounted(dev_t);
479 int vfs_devmounting(dev_t, struct vfs *);
480 int vfs_opsinuse(vfsops_t *);
481 struct vfs *getvfs(fsid_t *);
482 struct vfs *vfs_dev2vfsp(dev_t);
483 struct vfs *vfs_mntpoint2vfsp(const char *);
484 struct vfssw *allocate_vfssw(char *);
485 struct vfssw *vfs_getvfssw(char *);
486 struct vfssw *vfs_getvfsswbyname(char *);
487 struct vfssw *vfs_getvfsswbyvfsops(vfsops_t *);
488 void vfs_refvfssw(struct vfssw *);
489 void vfs_unrefvfssw(struct vfssw *);
490 uint_t vf_to_stf(uint_t);
491 void vfs_mnttab_modtime(timespec_t *);
492 void vfs_mnttab_poll(timespec_t *, struct pollhead **);
494 void vfs_list_lock(void);
495 void vfs_list_read_lock(void);
496 void vfs_list_unlock(void);
497 void vfs_list_add(struct vfs *);
498 void vfs_list_remove(struct vfs *);
499 void vfs_hold(vfs_t *vfsp);
500 void vfs_rele(vfs_t *vfsp);
501 void fs_freevfs(vfs_t *);
502 void vfs_root_redev(vfs_t *vfsp, dev_t ndev, int fstype);
504 int vfs_zone_change_safe(vfs_t *);
506 #define VFSHASH(maj, min) (((int)((maj)+(min))) & (vfshsz - 1))
507 #define VFS_ON_LIST(vfsp) \
508 ((vfsp)->vfs_next != (vfsp) && (vfsp)->vfs_next != NULL)
514 extern struct vfssw vfssw[]; /* table of filesystem types */
515 extern krwlock_t vfssw_lock;
516 extern char rootfstype[]; /* name of root fstype */
517 extern const int nfstype; /* # of elements in vfssw array */
518 extern vfsops_t *EIO_vfsops; /* operations for vfs being torn-down */
521 * The following variables are private to the the kernel's vfs layer. File
522 * system implementations should not access them.
524 extern struct vfs *rootvfs; /* ptr to root vfs structure */
526 struct vfs *rvfs_head; /* head vfs in chain */
527 kmutex_t rvfs_lock; /* mutex protecting this chain */
528 uint32_t rvfs_len; /* length of this chain */
530 extern rvfs_t *rvfs_list;
531 extern int vfshsz; /* # of elements in rvfs_head array */
532 extern const mntopts_t vfs_mntopts; /* globally recognized options */
534 #endif /* defined(_KERNEL) */
536 #define VFS_HOLD(vfsp) { \
540 #define VFS_RELE(vfsp) { \
544 #define VFS_INIT(vfsp, op, data) { \
545 vfs_init((vfsp), (op), (data)); \
546 vfsimpl_setup((vfsp)); \
550 #define VFS_INSTALLED(vfsswp) (((vfsswp)->vsw_flag & VSW_INSTALLED) != 0)
551 #define ALLOCATED_VFSSW(vswp) ((vswp)->vsw_name[0] != '\0')
552 #define RLOCK_VFSSW() (rw_enter(&vfssw_lock, RW_READER))
553 #define RUNLOCK_VFSSW() (rw_exit(&vfssw_lock))
554 #define WLOCK_VFSSW() (rw_enter(&vfssw_lock, RW_WRITER))
555 #define WUNLOCK_VFSSW() (rw_exit(&vfssw_lock))
556 #define VFSSW_LOCKED() (RW_LOCK_HELD(&vfssw_lock))
557 #define VFSSW_WRITE_LOCKED() (RW_WRITE_HELD(&vfssw_lock))
561 #define SYNC_ATTR 0x01 /* sync attributes only */
562 #define SYNC_CLOSE 0x02 /* close open file */
563 #define SYNC_ALL 0x04 /* force to sync all fs */
569 #endif /* _SYS_VFS_H */