2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * @(#)vnode.h 8.7 (Berkeley) 2/4/94
40 #include <sys/queue.h>
41 #include <sys/select.h>
44 #include <machine/lock.h>
47 * The vnode is the focus of all file activity in UNIX. There is a
48 * unique vnode allocated for each active file, each current directory,
49 * each mounted-on file, text file, and the root.
53 * Vnode types. VNON means no type.
55 enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD };
59 * These are for the benefit of external programs only (e.g., pstat)
60 * and should NEVER be inspected by the kernel.
63 VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_PC, VT_LFS, VT_LOFS, VT_FDESC,
64 VT_PORTAL, VT_NULL, VT_UMAP, VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS,
65 VT_UNION, VT_MSDOSFS, VT_DEVFS, VT_TFS, VT_VFS, VT_CODA, VT_NTFS
69 * Each underlying filesystem allocates its own private area and hangs
70 * it from v_data. If non-null, this area is freed in getnewvnode().
72 TAILQ_HEAD(buflists, buf);
74 typedef int vop_t __P((void *));
78 * Reading or writing any of these items requires holding the appropriate lock.
79 * v_freelist is locked by the global vnode_free_list simple lock.
80 * v_mntvnodes is locked by the global mntvnodes simple lock.
81 * v_flag, v_usecount, v_holdcount and v_writecount are
82 * locked by the v_interlock simple lock.
83 * v_pollinfo is locked by the lock contained inside it.
86 u_long v_flag; /* vnode flags (see below) */
87 int v_usecount; /* reference count of users */
88 int v_writecount; /* reference count of writers */
89 int v_holdcnt; /* page & buffer references */
90 daddr_t v_lastr; /* last read (read-ahead) */
91 u_long v_id; /* capability identifier */
92 struct mount *v_mount; /* ptr to vfs we are in */
93 vop_t **v_op; /* vnode operations vector */
94 TAILQ_ENTRY(vnode) v_freelist; /* vnode freelist */
95 LIST_ENTRY(vnode) v_mntvnodes; /* vnodes for mount point */
96 struct buflists v_cleanblkhd; /* clean blocklist head */
97 struct buflists v_dirtyblkhd; /* dirty blocklist head */
98 LIST_ENTRY(vnode) v_synclist; /* vnodes with dirty buffers */
99 long v_numoutput; /* num of writes in progress */
100 enum vtype v_type; /* vnode type */
102 struct mount *vu_mountedhere;/* ptr to mounted vfs (VDIR) */
103 struct socket *vu_socket; /* unix ipc (VSOCK) */
105 struct specinfo *vu_specinfo; /* device (VCHR, VBLK) */
106 SLIST_ENTRY(vnode) vu_specnext;
108 struct fifoinfo *vu_fifoinfo; /* fifo (VFIFO) */
110 struct nqlease *v_lease; /* Soft reference to lease */
111 daddr_t v_lastw; /* last write (write cluster) */
112 daddr_t v_cstart; /* start block of cluster */
113 daddr_t v_lasta; /* last allocation */
114 int v_clen; /* length of current cluster */
115 int v_maxio; /* maximum I/O cluster size */
116 struct vm_object *v_object; /* Place to store VM object */
117 struct simplelock v_interlock; /* lock on usecount and flag */
118 struct lock *v_vnlock; /* used for non-locking fs's */
119 enum vtagtype v_tag; /* type of underlying data */
120 void *v_data; /* private data for fs */
121 LIST_HEAD(, namecache) v_cache_src; /* Cache entries from us */
122 TAILQ_HEAD(, namecache) v_cache_dst; /* Cache entries to us */
123 struct vnode *v_dd; /* .. vnode */
124 u_long v_ddid; /* .. capability identifier */
126 struct simplelock vpi_lock; /* lock to protect below */
127 struct selinfo vpi_selinfo; /* identity of poller(s) */
128 short vpi_events; /* what they are looking for */
129 short vpi_revents; /* what has happened */
132 const char *filename; /* Source file doing locking */
133 int line; /* Line number doing locking */
136 #define v_mountedhere v_un.vu_mountedhere
137 #define v_socket v_un.vu_socket
138 #define v_rdev v_un.vu_spec.vu_specinfo
139 #define v_specnext v_un.vu_spec.vu_specnext
140 #define v_fifoinfo v_un.vu_fifoinfo
142 #define VN_POLLEVENT(vp, events) \
144 if ((vp)->v_pollinfo.vpi_events & (events)) \
145 vn_pollevent((vp), (events)); \
151 #define VROOT 0x00001 /* root of its file system */
152 #define VTEXT 0x00002 /* vnode is a pure text prototype */
153 #define VSYSTEM 0x00004 /* vnode being used by kernel */
154 #define VISTTY 0x00008 /* vnode represents a tty */
155 #define VXLOCK 0x00100 /* vnode is locked to change underlying type */
156 #define VXWANT 0x00200 /* process is waiting for vnode */
157 #define VBWAIT 0x00400 /* waiting for output to complete */
158 /* open for business 0x00800 */
159 /* open for business 0x01000 */
160 #define VOBJBUF 0x02000 /* Allocate buffers in VM object */
161 /* open for business 0x04000 */
162 #define VAGE 0x08000 /* Insert vnode at head of free list */
163 #define VOLOCK 0x10000 /* vnode is locked waiting for an object */
164 #define VOWANT 0x20000 /* a process is waiting for VOLOCK */
165 #define VDOOMED 0x40000 /* This vnode is being recycled */
166 #define VFREE 0x80000 /* This vnode is on the freelist */
167 #define VTBFREE 0x100000 /* This vnode is on the to-be-freelist */
168 #define VONWORKLST 0x200000 /* On syncer work-list */
169 #define VMOUNT 0x400000 /* Mount in progress */
172 * Vnode attributes. A field value of VNOVAL represents a field whose value
173 * is unavailable (getattr) or which is not to be changed (setattr).
176 enum vtype va_type; /* vnode type (for create) */
177 u_short va_mode; /* files access mode and type */
178 short va_nlink; /* number of references to file */
179 uid_t va_uid; /* owner user id */
180 gid_t va_gid; /* owner group id */
181 udev_t va_fsid; /* file system id */
182 long va_fileid; /* file id */
183 u_quad_t va_size; /* file size in bytes */
184 long va_blocksize; /* blocksize preferred for i/o */
185 struct timespec va_atime; /* time of last access */
186 struct timespec va_mtime; /* time of last modification */
187 struct timespec va_ctime; /* time file changed */
188 u_long va_gen; /* generation number of file */
189 u_long va_flags; /* flags defined for file */
190 udev_t va_rdev; /* device the special file represents */
191 u_quad_t va_bytes; /* bytes of disk space held by file */
192 u_quad_t va_filerev; /* file modification number */
193 u_int va_vaflags; /* operations flags, see below */
194 long va_spare; /* remain quad aligned */
198 * Flags for va_vaflags.
200 #define VA_UTIMES_NULL 0x01 /* utimes argument was NULL */
201 #define VA_EXCLUSIVE 0x02 /* exclusive create request */
206 #define IO_UNIT 0x01 /* do I/O as atomic unit */
207 #define IO_APPEND 0x02 /* append write to end */
208 #define IO_SYNC 0x04 /* do I/O synchronously */
209 #define IO_NODELOCKED 0x08 /* underlying node already locked */
210 #define IO_NDELAY 0x10 /* FNDELAY flag set in file table */
211 #define IO_VMIO 0x20 /* data already in VMIO space */
212 #define IO_INVAL 0x40 /* invalidate after I/O */
215 * Modes. Some values same as Ixxx entries from inode.h for now.
217 #define VSUID 04000 /* set user id on execution */
218 #define VSGID 02000 /* set group id on execution */
219 #define VSVTX 01000 /* save swapped text even after use */
220 #define VREAD 00400 /* read, write, execute permissions */
225 * Token indicating no attribute value yet assigned.
231 #ifdef MALLOC_DECLARE
232 MALLOC_DECLARE(M_VNODE);
236 * Convert between vnode types and inode formats (since POSIX.1
237 * defines mode word of stat structure in terms of inode formats).
239 extern enum vtype iftovt_tab[];
240 extern int vttoif_tab[];
241 #define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
242 #define VTTOIF(indx) (vttoif_tab[(int)(indx)])
243 #define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode))
246 * Flags to various vnode functions.
248 #define SKIPSYSTEM 0x0001 /* vflush: skip vnodes marked VSYSTEM */
249 #define FORCECLOSE 0x0002 /* vflush: force file closure */
250 #define WRITECLOSE 0x0004 /* vflush: only close writable files */
251 #define DOCLOSE 0x0008 /* vclean: close active files */
252 #define V_SAVE 0x0001 /* vinvalbuf: sync file first */
253 #define REVOKEALL 0x0001 /* vop_revoke: revoke all aliases */
255 #define VREF(vp) vref(vp)
259 #define VATTR_NULL(vap) vattr_null(vap)
261 #define VATTR_NULL(vap) (*(vap) = va_null) /* initialize a vattr */
262 #endif /* DIAGNOSTIC */
264 #define NULLVP ((struct vnode *)NULL)
266 #define VNODEOP_SET(f) \
267 C_SYSINIT(f##init, SI_SUB_VFS, SI_ORDER_SECOND, vfs_add_vnodeops, &f); \
268 C_SYSUNINIT(f##uninit, SI_SUB_VFS, SI_ORDER_SECOND, vfs_rm_vnodeops, &f);
273 extern struct vnode *rootvnode; /* root (i.e. "/") vnode */
274 extern int desiredvnodes; /* number of vnodes desired */
275 extern time_t syncdelay; /* max time to delay syncing data */
276 extern time_t filedelay; /* time to delay syncing files */
277 extern time_t dirdelay; /* time to delay syncing directories */
278 extern time_t metadelay; /* time to delay syncing metadata */
279 extern struct vm_zone *namei_zone;
280 extern int prtactive; /* nonzero to call vprint() */
281 extern struct vattr va_null; /* predefined null vattr structure */
282 extern int vfs_ioopt;
285 * Macro/function to check for client cache inconsistency w.r.t. leasing.
287 #define LEASE_READ 0x1 /* Check lease for readers */
288 #define LEASE_WRITE 0x2 /* Check lease for modifiers */
291 extern void (*lease_updatetime) __P((int deltat));
293 #define VSHOULDFREE(vp) \
294 (!((vp)->v_flag & (VFREE|VDOOMED)) && \
295 !(vp)->v_holdcnt && !(vp)->v_usecount && \
296 (!(vp)->v_object || \
297 !((vp)->v_object->ref_count || (vp)->v_object->resident_page_count)))
299 #define VSHOULDBUSY(vp) \
300 (((vp)->v_flag & (VFREE|VTBFREE)) && \
301 ((vp)->v_holdcnt || (vp)->v_usecount))
307 * Mods for extensibility.
311 * Flags for vdesc_flags:
313 #define VDESC_MAX_VPS 16
314 /* Low order 16 flag bits are reserved for willrele flags for vp arguments. */
315 #define VDESC_VP0_WILLRELE 0x0001
316 #define VDESC_VP1_WILLRELE 0x0002
317 #define VDESC_VP2_WILLRELE 0x0004
318 #define VDESC_VP3_WILLRELE 0x0008
319 #define VDESC_NOMAP_VPP 0x0100
320 #define VDESC_VPP_WILLRELE 0x0200
323 * VDESC_NO_OFFSET is used to identify the end of the offset list
324 * and in places where no such field exists.
326 #define VDESC_NO_OFFSET -1
329 * This structure describes the vnode operation taking place.
331 struct vnodeop_desc {
332 int vdesc_offset; /* offset in vector--first for speed */
333 char *vdesc_name; /* a readable name for debugging */
334 int vdesc_flags; /* VDESC_* flags */
337 * These ops are used by bypass routines to map and locate arguments.
338 * Creds and procs are not needed in bypass routines, but sometimes
339 * they are useful to (for example) transport layers.
340 * Nameidata is useful because it has a cred in it.
342 int *vdesc_vp_offsets; /* list ended by VDESC_NO_OFFSET */
343 int vdesc_vpp_offset; /* return vpp location */
344 int vdesc_cred_offset; /* cred location, if any */
345 int vdesc_proc_offset; /* proc location, if any */
346 int vdesc_componentname_offset; /* if any */
348 * Finally, we've got a list of private data (about each operation)
349 * for each transport layer. (Support to manage this list is not
352 caddr_t *vdesc_transports;
357 * A list of all the operation descs.
359 extern struct vnodeop_desc *vnodeop_descs[];
362 * Interlock for scanning list of vnodes attached to a mountpoint
364 extern struct simplelock mntvnode_slock;
367 * This macro is very helpful in defining those offsets in the vdesc struct.
369 * This is stolen from X11R4. I ignored all the fancy stuff for
370 * Crays, so if you decide to port this to such a serious machine,
371 * you might want to consult Intrinsic.h's XtOffset{,Of,To}.
373 #define VOPARG_OFFSET(p_type,field) \
374 ((int) (((char *) (&(((p_type)NULL)->field))) - ((char *) NULL)))
375 #define VOPARG_OFFSETOF(s_type,field) \
376 VOPARG_OFFSET(s_type*,field)
377 #define VOPARG_OFFSETTO(S_TYPE,S_OFFSET,STRUCT_P) \
378 ((S_TYPE)(((char*)(STRUCT_P))+(S_OFFSET)))
382 * This structure is used to configure the new vnodeops vector.
384 struct vnodeopv_entry_desc {
385 struct vnodeop_desc *opve_op; /* which operation this is */
386 vop_t *opve_impl; /* code implementing this operation */
388 struct vnodeopv_desc {
389 /* ptr to the ptr to the vector where op should go */
390 vop_t ***opv_desc_vector_p;
391 struct vnodeopv_entry_desc *opv_desc_ops; /* null terminated list */
395 * A generic structure.
396 * This can be used by bypass routines to identify generic arguments.
398 struct vop_generic_args {
399 struct vnodeop_desc *a_desc;
400 /* other random data follows, presumably */
404 #ifdef DEBUG_VFS_LOCKS
406 * Macros to aid in tracing VFS locking problems. Not totally
407 * reliable since if the process sleeps between changing the lock
408 * state and checking it with the assert, some other process could
409 * change the state. They are good enough for debugging a single
410 * filesystem using a single-threaded test. I find that 'cvs co src'
411 * is a pretty good test.
415 * [dfr] Kludge until I get around to fixing all the vfs locking.
417 #define IS_LOCKING_VFS(vp) ((vp)->v_tag == VT_UFS \
418 || (vp)->v_tag == VT_MFS \
419 || (vp)->v_tag == VT_NFS \
420 || (vp)->v_tag == VT_LFS \
421 || (vp)->v_tag == VT_ISOFS \
422 || (vp)->v_tag == VT_MSDOSFS \
423 || (vp)->v_tag == VT_DEVFS)
425 #define ASSERT_VOP_LOCKED(vp, str) \
426 if ((vp) && IS_LOCKING_VFS(vp) && !VOP_ISLOCKED(vp)) { \
427 panic("%s: %p is not locked but should be", str, vp); \
430 #define ASSERT_VOP_UNLOCKED(vp, str) \
431 if ((vp) && IS_LOCKING_VFS(vp) && VOP_ISLOCKED(vp)) { \
432 panic("%s: %p is locked but shouldn't be", str, vp); \
437 #define ASSERT_VOP_LOCKED(vp, str)
438 #define ASSERT_VOP_UNLOCKED(vp, str)
443 * VOCALL calls an op given an ops vector. We break it out because BSD's
444 * vclean changes the ops vector and then wants to call ops with the old
447 #define VOCALL(OPSV,OFF,AP) (( *((OPSV)[(OFF)])) (AP))
450 * This call works for vnodes in the kernel.
452 #define VCALL(VP,OFF,AP) VOCALL((VP)->v_op,(OFF),(AP))
453 #define VDESC(OP) (& __CONCAT(OP,_desc))
454 #define VOFFSET(OP) (VDESC(OP)->vdesc_offset)
457 * VMIO support inline
460 extern int vmiodirenable;
463 vn_canvmio(struct vnode *vp)
465 if (vp && (vp->v_type == VREG || (vmiodirenable && vp->v_type == VDIR)))
471 * Finally, include the default set of vnode operations.
473 #include "vnode_if.h"
476 * Public vnode manipulation functions.
478 struct componentname;
490 struct vop_bwrite_args;
492 extern int (*lease_check_hook) __P((struct vop_lease_args *));
494 void addalias __P((struct vnode *vp, dev_t nvp_rdev));
495 void addaliasu __P((struct vnode *vp, udev_t nvp_rdev));
496 int bdevvp __P((dev_t dev, struct vnode **vpp));
497 /* cache_* may belong in namei.h. */
498 void cache_enter __P((struct vnode *dvp, struct vnode *vp,
499 struct componentname *cnp));
500 int cache_lookup __P((struct vnode *dvp, struct vnode **vpp,
501 struct componentname *cnp));
502 void cache_purge __P((struct vnode *vp));
503 void cache_purgevfs __P((struct mount *mp));
504 void cvtstat __P((struct stat *st, struct ostat *ost));
505 void cvtnstat __P((struct stat *sb, struct nstat *nsb));
506 int getnewvnode __P((enum vtagtype tag,
507 struct mount *mp, vop_t **vops, struct vnode **vpp));
508 int lease_check __P((struct vop_lease_args *ap));
509 int spec_vnoperate __P((struct vop_generic_args *));
510 int speedup_syncer __P((void));
511 void vattr_null __P((struct vattr *vap));
512 int vcount __P((struct vnode *vp));
513 void vdrop __P((struct vnode *));
514 int vfinddev __P((dev_t dev, enum vtype type, struct vnode **vpp));
515 void vfs_add_vnodeops __P((const void *));
516 void vfs_rm_vnodeops __P((const void *));
517 int vflush __P((struct mount *mp, struct vnode *skipvp, int flags));
518 int vget __P((struct vnode *vp, int lockflag, struct proc *p));
519 void vgone __P((struct vnode *vp));
520 void vhold __P((struct vnode *));
521 int vinvalbuf __P((struct vnode *vp, int save, struct ucred *cred,
523 struct proc *p, int slpflag, int slptimeo));
524 int vtruncbuf __P((struct vnode *vp, struct ucred *cred, struct proc *p,
525 off_t length, int blksize));
526 void vprint __P((char *label, struct vnode *vp));
527 int vrecycle __P((struct vnode *vp, struct simplelock *inter_lkp,
529 int vn_close __P((struct vnode *vp,
530 int flags, struct ucred *cred, struct proc *p));
531 int vn_isdisk __P((struct vnode *vp));
532 int vn_lock __P((struct vnode *vp, int flags, struct proc *p));
534 int debug_vn_lock __P((struct vnode *vp, int flags, struct proc *p,
535 const char *filename, int line));
536 #define vn_lock(vp,flags,p) debug_vn_lock(vp,flags,p,__FILE__,__LINE__)
538 int vn_open __P((struct nameidata *ndp, int fmode, int cmode));
539 void vn_pollevent __P((struct vnode *vp, int events));
540 void vn_pollgone __P((struct vnode *vp));
541 int vn_pollrecord __P((struct vnode *vp, struct proc *p, int events));
542 int vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base,
543 int len, off_t offset, enum uio_seg segflg, int ioflg,
544 struct ucred *cred, int *aresid, struct proc *p));
545 int vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
546 dev_t vn_todev __P((struct vnode *vp));
547 int vfs_cache_lookup __P((struct vop_lookup_args *ap));
548 int vfs_object_create __P((struct vnode *vp, struct proc *p,
549 struct ucred *cred));
550 void vfs_timestamp __P((struct timespec *));
551 int vn_writechk __P((struct vnode *vp));
552 int vop_stdbwrite __P((struct vop_bwrite_args *ap));
553 int vop_stdislocked __P((struct vop_islocked_args *));
554 int vop_stdlock __P((struct vop_lock_args *));
555 int vop_stdunlock __P((struct vop_unlock_args *));
556 int vop_noislocked __P((struct vop_islocked_args *));
557 int vop_nolock __P((struct vop_lock_args *));
558 int vop_nopoll __P((struct vop_poll_args *));
559 int vop_nounlock __P((struct vop_unlock_args *));
560 int vop_stdpathconf __P((struct vop_pathconf_args *));
561 int vop_stdpoll __P((struct vop_poll_args *));
562 int vop_revoke __P((struct vop_revoke_args *));
563 int vop_sharedlock __P((struct vop_lock_args *));
564 int vop_eopnotsupp __P((struct vop_generic_args *ap));
565 int vop_ebadf __P((struct vop_generic_args *ap));
566 int vop_einval __P((struct vop_generic_args *ap));
567 int vop_enotty __P((struct vop_generic_args *ap));
568 int vop_defaultop __P((struct vop_generic_args *ap));
569 int vop_null __P((struct vop_generic_args *ap));
570 int vop_panic __P((struct vop_generic_args *ap));
572 void vput __P((struct vnode *vp));
573 void vrele __P((struct vnode *vp));
574 void vref __P((struct vnode *vp));
575 void vbusy __P((struct vnode *vp));
577 extern vop_t **default_vnodeop_p;
578 extern vop_t **spec_vnodeop_p;
580 extern TAILQ_HEAD(tobefreelist, vnode)
581 vnode_tobefree_list; /* vnode free list */
585 #endif /* !_SYS_VNODE_H_ */