2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94
42 /* For 4.3 integer FS ID compatibility */
43 #include "opt_compat.h"
46 #include <sys/param.h>
47 #include <sys/systm.h>
50 #include <sys/sysent.h>
51 #include <sys/malloc.h>
52 #include <sys/mount.h>
53 #include <sys/mutex.h>
54 #include <sys/sysproto.h>
55 #include <sys/namei.h>
56 #include <sys/filedesc.h>
57 #include <sys/kernel.h>
58 #include <sys/fcntl.h>
60 #include <sys/linker.h>
63 #include <sys/unistd.h>
64 #include <sys/vnode.h>
66 #include <sys/dirent.h>
67 #include <sys/extattr.h>
69 #include <sys/sysctl.h>
71 #include <machine/limits.h>
72 #include <machine/stdarg.h>
75 #include <vm/vm_object.h>
76 #include <vm/vm_page.h>
79 static int change_dir(struct nameidata *ndp, struct thread *td);
80 static void checkdirs(struct vnode *olddp, struct vnode *newdp);
81 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
82 static int getutimes(const struct timeval *, struct timespec *);
83 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
84 static int setfmode(struct thread *td, struct vnode *, int);
85 static int setfflags(struct thread *td, struct vnode *, int);
86 static int setutimes(struct thread *td, struct vnode *,
87 const struct timespec *, int);
88 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
90 static void vfs_freeopts(struct vfsoptlist *opt);
91 static int vfs_nmount(struct thread *td, int, struct uio *);
93 static int usermount = 0; /* if 1, non-root can mount fs. */
95 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
97 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
100 * Virtual File System System Calls
103 #ifndef _SYS_SYSPROTO_H_
114 struct nmount_args /* {
115 syscallarg(struct iovec *) iovp;
116 syscallarg(unsigned int) iovcnt;
117 syscallarg(int) flags;
121 struct iovec *iov, *needfree;
122 struct iovec aiov[UIO_SMALLIOV];
124 u_int iovlen, iovcnt;
126 iovcnt = SCARG(uap, iovcnt);
127 iovlen = iovcnt * sizeof (struct iovec);
129 * Check that we have an even number of iovec's
130 * and that we have at least two options.
132 if ((iovcnt & 1) || (iovcnt < 4) || (iovcnt > UIO_MAXIOV))
135 if (iovcnt > UIO_SMALLIOV) {
136 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
143 auio.uio_iovcnt = iovcnt;
144 auio.uio_rw = UIO_WRITE;
145 auio.uio_segflg = UIO_USERSPACE;
149 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
151 for (i = 0; i < iovcnt; i++) {
152 if (iov->iov_len > INT_MAX - auio.uio_resid) {
156 auio.uio_resid += iov->iov_len;
159 error = vfs_nmount(td, SCARG(uap, flags), &auio);
161 if (needfree != NULL)
162 free(needfree, M_TEMP);
167 * Release all resources related to the
171 vfs_freeopts(struct vfsoptlist *opt)
173 free(opt->opt, M_MOUNT);
174 free(opt->optbuf, M_MOUNT);
179 kernel_mount(iovp, iovcnt, flags)
189 * Check that we have an even number of iovec's
190 * and that we have at least two options.
192 if ((iovcnt & 1) || (iovcnt < 4))
196 auio.uio_iovcnt = iovcnt;
197 auio.uio_rw = UIO_WRITE;
198 auio.uio_segflg = UIO_SYSSPACE;
203 for (i = 0; i < iovcnt; i++) {
204 if (iov->iov_len > INT_MAX - auio.uio_resid) {
207 auio.uio_resid += iov->iov_len;
211 error = vfs_nmount(curthread, flags, &auio);
216 kernel_vmount(int flags, ...)
218 struct iovec *iovp, *iov;
221 unsigned int iovcnt, iovlen, len, optcnt;
223 char *sep, *buf, *pos;
228 for (optcnt = 0; (opt = va_arg(ap, const char *)) != NULL; optcnt++)
229 len += strlen(opt) + 1;
235 iovcnt = optcnt << 1;
236 iovlen = iovcnt * sizeof (struct iovec);
237 MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK);
238 MALLOC(buf, char *, len, M_MOUNT, M_WAITOK);
241 for (i = 0; i < optcnt; i++) {
242 opt = va_arg(ap, const char *);
244 sep = index(pos, '=');
253 iov->iov_len = sep - pos + 1;
257 iovlen = strlen(pos) + 1;
258 iov->iov_len = iovlen;
264 auio.uio_iovcnt = iovcnt;
265 auio.uio_rw = UIO_WRITE;
266 auio.uio_segflg = UIO_SYSSPACE;
269 auio.uio_resid = len;
271 error = vfs_nmount(curthread, flags, &auio);
278 * vfs_nmount(): actually attempt a filesystem mount.
281 vfs_nmount(td, fsflags, fsoptions)
283 int fsflags; /* Flags common to all filesystems. */
284 struct uio *fsoptions; /* Options local to the filesystem. */
289 struct vfsconf *vfsp;
291 struct vfsoptlist *optlist;
293 char *buf, *fstype, *fspath;
294 int error, flag = 0, kern_flag = 0, i, len, optcnt;
295 int offset, iovcnt, fstypelen, fspathlen;
300 * Allocate memory to hold the vfsopt structures.
302 iovcnt = fsoptions->uio_iovcnt;
303 optcnt = iovcnt >> 1;
304 opt = malloc(sizeof (struct vfsopt) * optcnt,
305 M_MOUNT, M_WAITOK | M_ZERO);
308 * Count the size of the buffer for options,
309 * allocate it, and fill in the vfsopt structures.
311 cur = fsoptions->uio_iov;
312 len = fsoptions->uio_resid;
313 buf = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
315 optlist = malloc(sizeof (struct vfsoptlist), M_MOUNT, M_WAITOK);
317 optlist->optbuf = buf;
318 optlist->optcnt = optcnt;
321 cur = fsoptions->uio_iov;
323 opt[i].name = buf + offset;
324 /* Ensure the name of an option is a string. */
325 if (opt[i].name[cur->iov_len - 1] != '\0') {
329 offset += cur->iov_len;
331 opt[i].len = cur->iov_len;
333 * Prevent consumers from trying to
334 * read the value of a 0 length option
335 * by setting it to NULL.
340 opt[i].value = buf + offset;
341 offset += cur->iov_len;
345 if ((error = uiomove(buf, len, fsoptions)) != 0)
349 * We need these two options before the others,
350 * and they are mandatory for any filesystem.
351 * Ensure they are NULL terminated as well.
354 error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
355 if (error || fstype[fstypelen - 1] != '\0') {
360 error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
361 if (error || fspath[fspathlen - 1] != '\0') {
367 * Be ultra-paranoid about making sure the type and fspath
368 * variables will fit in our mp buffers, including the
371 if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
372 error = ENAMETOOLONG;
376 if (usermount == 0) {
377 error = suser_td(td);
382 * Do not allow NFS export by non-root users.
384 if (fsflags & MNT_EXPORTED) {
385 error = suser_td(td);
390 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
392 if (suser_xxx(td->td_ucred, NULL, 0) != 0)
393 fsflags |= MNT_NOSUID | MNT_NODEV;
395 * Get vnode to be covered
397 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
398 if ((error = namei(&nd)) != 0)
400 NDFREE(&nd, NDF_ONLY_PNBUF);
402 if (fsflags & MNT_UPDATE) {
403 if ((vp->v_flag & VROOT) == 0) {
410 kern_flag = mp->mnt_kern_flag;
412 * We only allow the filesystem to be reloaded if it
413 * is currently mounted read-only.
415 if ((fsflags & MNT_RELOAD) &&
416 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
418 error = EOPNOTSUPP; /* Needs translation */
422 * Only root, or the user that did the original mount is
423 * permitted to update it.
425 if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
426 error = suser_td(td);
432 if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
437 mtx_lock(&vp->v_interlock);
438 if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
439 mtx_unlock(&vp->v_interlock);
445 vp->v_flag |= VMOUNT;
446 mtx_unlock(&vp->v_interlock);
447 mp->mnt_flag |= fsflags &
448 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
449 VOP_UNLOCK(vp, 0, td);
450 mp->mnt_optnew = optlist;
454 * If the user is not root, ensure that they own the directory
455 * onto which we are attempting to mount.
457 error = VOP_GETATTR(vp, &va, td->td_ucred, td);
462 if (va.va_uid != td->td_ucred->cr_uid) {
463 error = suser_td(td);
469 if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
473 if (vp->v_type != VDIR) {
478 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
479 if (!strcmp(vfsp->vfc_name, fstype))
482 /* Only load modules for root (very important!). */
483 error = suser_td(td);
488 error = securelevel_gt(td->td_ucred, 0);
493 error = linker_load_file(fstype, &lf);
494 if (error || lf == NULL) {
501 /* Look up again to see if the VFS was loaded. */
502 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
503 if (!strcmp(vfsp->vfc_name, fstype))
507 linker_file_unload(lf);
513 mtx_lock(&vp->v_interlock);
514 if ((vp->v_flag & VMOUNT) != 0 ||
515 vp->v_mountedhere != NULL) {
516 mtx_unlock(&vp->v_interlock);
521 vp->v_flag |= VMOUNT;
522 mtx_unlock(&vp->v_interlock);
525 * Allocate and initialize the filesystem.
527 mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
528 TAILQ_INIT(&mp->mnt_nvnodelist);
529 TAILQ_INIT(&mp->mnt_reservedvnlist);
530 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
531 (void)vfs_busy(mp, LK_NOWAIT, 0, td);
532 mp->mnt_op = vfsp->vfc_vfsops;
534 vfsp->vfc_refcount++;
535 mp->mnt_stat.f_type = vfsp->vfc_typenum;
536 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
537 strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
538 mp->mnt_vnodecovered = vp;
539 mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
540 strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
541 mp->mnt_iosize_max = DFLTPHYS;
542 VOP_UNLOCK(vp, 0, td);
544 mp->mnt_opt = optlist;
547 * Check if the fs implements the new VFS_NMOUNT()
548 * function, since the new system call was used.
550 if (mp->mnt_op->vfs_mount != NULL) {
551 printf("%s doesn't support the new mount syscall\n",
552 mp->mnt_vfc->vfc_name);
559 * Set the mount level flags.
561 if (fsflags & MNT_RDONLY)
562 mp->mnt_flag |= MNT_RDONLY;
563 else if (mp->mnt_flag & MNT_RDONLY)
564 mp->mnt_kern_flag |= MNTK_WANTRDWR;
565 mp->mnt_flag &=~ MNT_UPDATEMASK;
566 mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
568 * Mount the filesystem.
569 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
570 * get. No freeing of cn_pnbuf.
572 error = VFS_NMOUNT(mp, &nd, td);
573 if (mp->mnt_flag & MNT_UPDATE) {
574 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
575 mp->mnt_flag &= ~MNT_RDONLY;
577 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
578 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
581 mp->mnt_kern_flag = kern_flag;
582 vfs_freeopts(mp->mnt_optnew);
584 vfs_freeopts(mp->mnt_opt);
585 mp->mnt_opt = mp->mnt_optnew;
587 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
588 if (mp->mnt_syncer == NULL)
589 error = vfs_allocate_syncvnode(mp);
591 if (mp->mnt_syncer != NULL)
592 vrele(mp->mnt_syncer);
593 mp->mnt_syncer = NULL;
596 mtx_lock(&vp->v_interlock);
597 vp->v_flag &= ~VMOUNT;
598 mtx_unlock(&vp->v_interlock);
602 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
604 * Put the new filesystem on the mount list after root.
610 mtx_lock(&vp->v_interlock);
611 vp->v_flag &= ~VMOUNT;
612 vp->v_mountedhere = mp;
613 mtx_unlock(&vp->v_interlock);
614 mtx_lock(&mountlist_mtx);
615 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
616 mtx_unlock(&mountlist_mtx);
617 if (VFS_ROOT(mp, &newdp))
618 panic("mount: lost mount");
619 checkdirs(vp, newdp);
621 VOP_UNLOCK(vp, 0, td);
622 if ((mp->mnt_flag & MNT_RDONLY) == 0)
623 error = vfs_allocate_syncvnode(mp);
625 if ((error = VFS_START(mp, 0, td)) != 0) {
630 mtx_lock(&vp->v_interlock);
631 vp->v_flag &= ~VMOUNT;
632 mtx_unlock(&vp->v_interlock);
633 mp->mnt_vfc->vfc_refcount--;
635 free((caddr_t)mp, M_MOUNT);
641 vfs_freeopts(optlist);
648 #ifndef _SYS_SYSPROTO_H_
660 struct mount_args /* {
661 syscallarg(char *) type;
662 syscallarg(char *) path;
663 syscallarg(int) flags;
664 syscallarg(caddr_t) data;
671 fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
672 fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK);
675 * vfs_mount() actually takes a kernel string for `type' and
676 * `path' now, so extract them.
678 error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
681 error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
684 error = vfs_mount(td, fstype, fspath, SCARG(uap, flags),
687 free(fstype, M_TEMP);
688 free(fspath, M_TEMP);
693 * vfs_mount(): actually attempt a filesystem mount.
695 * This routine is designed to be a "generic" entry point for routines
696 * that wish to mount a filesystem. All parameters except `fsdata' are
697 * pointers into kernel space. `fsdata' is currently still a pointer
701 vfs_mount(td, fstype, fspath, fsflags, fsdata)
711 struct vfsconf *vfsp;
712 int error, flag = 0, kern_flag = 0;
717 * Be ultra-paranoid about making sure the type and fspath
718 * variables will fit in our mp buffers, including the
721 if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
722 return (ENAMETOOLONG);
724 if (usermount == 0) {
725 error = suser_td(td);
730 * Do not allow NFS export by non-root users.
732 if (fsflags & MNT_EXPORTED) {
733 error = suser_td(td);
738 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
740 if (suser_xxx(td->td_ucred, NULL, 0) != 0)
741 fsflags |= MNT_NOSUID | MNT_NODEV;
743 * Get vnode to be covered
745 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
746 if ((error = namei(&nd)) != 0)
748 NDFREE(&nd, NDF_ONLY_PNBUF);
750 if (fsflags & MNT_UPDATE) {
751 if ((vp->v_flag & VROOT) == 0) {
757 kern_flag = mp->mnt_kern_flag;
759 * We only allow the filesystem to be reloaded if it
760 * is currently mounted read-only.
762 if ((fsflags & MNT_RELOAD) &&
763 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
765 return (EOPNOTSUPP); /* Needs translation */
768 * Only root, or the user that did the original mount is
769 * permitted to update it.
771 if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
772 error = suser_td(td);
778 if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
782 mtx_lock(&vp->v_interlock);
783 if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
784 mtx_unlock(&vp->v_interlock);
789 vp->v_flag |= VMOUNT;
790 mtx_unlock(&vp->v_interlock);
791 mp->mnt_flag |= fsflags &
792 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
793 VOP_UNLOCK(vp, 0, td);
797 * If the user is not root, ensure that they own the directory
798 * onto which we are attempting to mount.
800 error = VOP_GETATTR(vp, &va, td->td_ucred, td);
805 if (va.va_uid != td->td_ucred->cr_uid) {
806 error = suser_td(td);
812 if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
816 if (vp->v_type != VDIR) {
820 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
821 if (!strcmp(vfsp->vfc_name, fstype))
824 /* Only load modules for root (very important!). */
825 error = suser_td(td);
830 error = securelevel_gt(td->td_ucred, 0);
835 error = linker_load_file(fstype, &lf);
836 if (error || lf == NULL) {
843 /* Look up again to see if the VFS was loaded. */
844 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
845 if (!strcmp(vfsp->vfc_name, fstype))
849 linker_file_unload(lf);
854 mtx_lock(&vp->v_interlock);
855 if ((vp->v_flag & VMOUNT) != 0 ||
856 vp->v_mountedhere != NULL) {
857 mtx_unlock(&vp->v_interlock);
861 vp->v_flag |= VMOUNT;
862 mtx_unlock(&vp->v_interlock);
865 * Allocate and initialize the filesystem.
867 mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
868 TAILQ_INIT(&mp->mnt_nvnodelist);
869 TAILQ_INIT(&mp->mnt_reservedvnlist);
870 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
871 (void)vfs_busy(mp, LK_NOWAIT, 0, td);
872 mp->mnt_op = vfsp->vfc_vfsops;
874 vfsp->vfc_refcount++;
875 mp->mnt_stat.f_type = vfsp->vfc_typenum;
876 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
877 strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
878 mp->mnt_vnodecovered = vp;
879 mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
880 strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
881 mp->mnt_iosize_max = DFLTPHYS;
882 VOP_UNLOCK(vp, 0, td);
885 * Check if the fs implements the old VFS_MOUNT()
886 * function, since the old system call was used.
888 if (mp->mnt_op->vfs_mount == NULL) {
889 printf("%s doesn't support the old mount syscall\n",
890 mp->mnt_vfc->vfc_name);
896 * Set the mount level flags.
898 if (fsflags & MNT_RDONLY)
899 mp->mnt_flag |= MNT_RDONLY;
900 else if (mp->mnt_flag & MNT_RDONLY)
901 mp->mnt_kern_flag |= MNTK_WANTRDWR;
902 mp->mnt_flag &=~ MNT_UPDATEMASK;
903 mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
905 * Mount the filesystem.
906 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
907 * get. No freeing of cn_pnbuf.
909 error = VFS_MOUNT(mp, fspath, fsdata, &nd, td);
910 if (mp->mnt_flag & MNT_UPDATE) {
911 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
912 mp->mnt_flag &= ~MNT_RDONLY;
914 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
915 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
918 mp->mnt_kern_flag = kern_flag;
920 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
921 if (mp->mnt_syncer == NULL)
922 error = vfs_allocate_syncvnode(mp);
924 if (mp->mnt_syncer != NULL)
925 vrele(mp->mnt_syncer);
926 mp->mnt_syncer = NULL;
929 mtx_lock(&vp->v_interlock);
930 vp->v_flag &= ~VMOUNT;
931 mtx_unlock(&vp->v_interlock);
935 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
937 * Put the new filesystem on the mount list after root.
943 mtx_lock(&vp->v_interlock);
944 vp->v_flag &= ~VMOUNT;
945 vp->v_mountedhere = mp;
946 mtx_unlock(&vp->v_interlock);
947 mtx_lock(&mountlist_mtx);
948 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
949 mtx_unlock(&mountlist_mtx);
950 if (VFS_ROOT(mp, &newdp))
951 panic("mount: lost mount");
952 checkdirs(vp, newdp);
954 VOP_UNLOCK(vp, 0, td);
955 if ((mp->mnt_flag & MNT_RDONLY) == 0)
956 error = vfs_allocate_syncvnode(mp);
958 if ((error = VFS_START(mp, 0, td)) != 0)
961 mtx_lock(&vp->v_interlock);
962 vp->v_flag &= ~VMOUNT;
963 mtx_unlock(&vp->v_interlock);
964 mp->mnt_vfc->vfc_refcount--;
966 free((caddr_t)mp, M_MOUNT);
973 * Scan all active processes to see if any of them have a current
974 * or root directory of `olddp'. If so, replace them with the new
978 checkdirs(olddp, newdp)
979 struct vnode *olddp, *newdp;
981 struct filedesc *fdp;
985 if (olddp->v_usecount == 1)
987 sx_slock(&allproc_lock);
988 LIST_FOREACH(p, &allproc, p_list) {
997 if (fdp->fd_cdir == olddp) {
999 fdp->fd_cdir = newdp;
1002 if (fdp->fd_rdir == olddp) {
1004 fdp->fd_rdir = newdp;
1007 FILEDESC_UNLOCK(fdp);
1012 sx_sunlock(&allproc_lock);
1013 if (rootvnode == olddp) {
1021 * Unmount a file system.
1023 * Note: unmount takes a path to the vnode mounted on as argument,
1024 * not special file (as before).
1026 #ifndef _SYS_SYSPROTO_H_
1027 struct unmount_args {
1036 register struct unmount_args /* {
1037 syscallarg(char *) path;
1038 syscallarg(int) flags;
1041 register struct vnode *vp;
1044 struct nameidata nd;
1046 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1047 SCARG(uap, path), td);
1048 if ((error = namei(&nd)) != 0)
1051 NDFREE(&nd, NDF_ONLY_PNBUF);
1055 * Only root, or the user that did the original mount is
1056 * permitted to unmount this filesystem.
1058 if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
1059 error = suser_td(td);
1067 * Don't allow unmounting the root file system.
1069 if (mp->mnt_flag & MNT_ROOTFS) {
1075 * Must be the root of the filesystem
1077 if ((vp->v_flag & VROOT) == 0) {
1082 return (dounmount(mp, SCARG(uap, flags), td));
1086 * Do the actual file system unmount.
1089 dounmount(mp, flags, td)
1094 struct vnode *coveredvp, *fsrootvp;
1098 mtx_lock(&mountlist_mtx);
1099 mp->mnt_kern_flag |= MNTK_UNMOUNT;
1100 error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
1101 ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
1103 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1104 if (mp->mnt_kern_flag & MNTK_MWAIT)
1105 wakeup((caddr_t)mp);
1108 vn_start_write(NULL, &mp, V_WAIT);
1110 if (mp->mnt_flag & MNT_EXPUBLIC)
1111 vfs_setpublicfs(NULL, NULL, NULL);
1113 vfs_msync(mp, MNT_WAIT);
1114 async_flag = mp->mnt_flag & MNT_ASYNC;
1115 mp->mnt_flag &=~ MNT_ASYNC;
1116 cache_purgevfs(mp); /* remove cache entries for this file sys */
1117 if (mp->mnt_syncer != NULL)
1118 vrele(mp->mnt_syncer);
1119 /* Move process cdir/rdir refs on fs root to underlying vnode. */
1120 if (VFS_ROOT(mp, &fsrootvp) == 0) {
1121 if (mp->mnt_vnodecovered != NULL)
1122 checkdirs(fsrootvp, mp->mnt_vnodecovered);
1123 if (fsrootvp == rootvnode) {
1129 if (((mp->mnt_flag & MNT_RDONLY) ||
1130 (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
1131 (flags & MNT_FORCE)) {
1132 error = VFS_UNMOUNT(mp, flags, td);
1134 vn_finished_write(mp);
1136 /* Undo cdir/rdir and rootvnode changes made above. */
1137 if (VFS_ROOT(mp, &fsrootvp) == 0) {
1138 if (mp->mnt_vnodecovered != NULL)
1139 checkdirs(mp->mnt_vnodecovered, fsrootvp);
1140 if (rootvnode == NULL) {
1141 rootvnode = fsrootvp;
1146 if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
1147 (void) vfs_allocate_syncvnode(mp);
1148 mtx_lock(&mountlist_mtx);
1149 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1150 mp->mnt_flag |= async_flag;
1151 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
1152 &mountlist_mtx, td);
1153 if (mp->mnt_kern_flag & MNTK_MWAIT)
1154 wakeup((caddr_t)mp);
1157 mtx_lock(&mountlist_mtx);
1158 TAILQ_REMOVE(&mountlist, mp, mnt_list);
1159 if ((coveredvp = mp->mnt_vnodecovered) != NULL)
1160 coveredvp->v_mountedhere = NULL;
1161 mp->mnt_vfc->vfc_refcount--;
1162 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
1163 panic("unmount: dangling vnode");
1164 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td);
1165 lockdestroy(&mp->mnt_lock);
1166 if (coveredvp != NULL)
1168 if (mp->mnt_kern_flag & MNTK_MWAIT)
1169 wakeup((caddr_t)mp);
1170 if (mp->mnt_op->vfs_mount == NULL)
1171 vfs_freeopts(mp->mnt_opt);
1172 free((caddr_t)mp, M_MOUNT);
1177 * Sync each mounted filesystem.
1179 #ifndef _SYS_SYSPROTO_H_
1186 static int syncprt = 0;
1187 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
1194 struct sync_args *uap;
1196 struct mount *mp, *nmp;
1199 mtx_lock(&mountlist_mtx);
1200 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
1201 if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
1202 nmp = TAILQ_NEXT(mp, mnt_list);
1205 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
1206 vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
1207 asyncflag = mp->mnt_flag & MNT_ASYNC;
1208 mp->mnt_flag &= ~MNT_ASYNC;
1209 vfs_msync(mp, MNT_NOWAIT);
1210 VFS_SYNC(mp, MNT_NOWAIT,
1211 ((td != NULL) ? td->td_ucred : NOCRED), td);
1212 mp->mnt_flag |= asyncflag;
1213 vn_finished_write(mp);
1215 mtx_lock(&mountlist_mtx);
1216 nmp = TAILQ_NEXT(mp, mnt_list);
1219 mtx_unlock(&mountlist_mtx);
1222 * XXX don't call vfs_bufstats() yet because that routine
1223 * was not imported in the Lite2 merge.
1228 #endif /* DIAGNOSTIC */
1233 /* XXX PRISON: could be per prison flag */
1234 static int prison_quotas;
1236 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
1240 * Change filesystem quotas.
1242 #ifndef _SYS_SYSPROTO_H_
1243 struct quotactl_args {
1254 register struct quotactl_args /* {
1255 syscallarg(char *) path;
1256 syscallarg(int) cmd;
1257 syscallarg(int) uid;
1258 syscallarg(caddr_t) arg;
1263 struct nameidata nd;
1265 if (jailed(td->td_ucred) && !prison_quotas)
1267 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1268 if ((error = namei(&nd)) != 0)
1270 NDFREE(&nd, NDF_ONLY_PNBUF);
1271 error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
1275 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
1276 SCARG(uap, arg), td);
1277 vn_finished_write(mp);
1282 * Get filesystem statistics.
1284 #ifndef _SYS_SYSPROTO_H_
1285 struct statfs_args {
1294 register struct statfs_args /* {
1295 syscallarg(char *) path;
1296 syscallarg(struct statfs *) buf;
1299 register struct mount *mp;
1300 register struct statfs *sp;
1302 struct nameidata nd;
1305 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1306 if ((error = namei(&nd)) != 0)
1308 mp = nd.ni_vp->v_mount;
1310 NDFREE(&nd, NDF_ONLY_PNBUF);
1312 error = VFS_STATFS(mp, sp, td);
1315 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1316 if (suser_xxx(td->td_ucred, 0, 0)) {
1317 bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
1318 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
1321 return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
1325 * Get filesystem statistics.
1327 #ifndef _SYS_SYSPROTO_H_
1328 struct fstatfs_args {
1337 register struct fstatfs_args /* {
1339 syscallarg(struct statfs *) buf;
1344 register struct statfs *sp;
1348 if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
1350 mp = ((struct vnode *)fp->f_data)->v_mount;
1355 error = VFS_STATFS(mp, sp, td);
1358 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1359 if (suser_xxx(td->td_ucred, 0, 0)) {
1360 bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
1361 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
1364 return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
1368 * Get statistics on all filesystems.
1370 #ifndef _SYS_SYSPROTO_H_
1371 struct getfsstat_args {
1380 register struct getfsstat_args /* {
1381 syscallarg(struct statfs *) buf;
1382 syscallarg(long) bufsize;
1383 syscallarg(int) flags;
1386 register struct mount *mp, *nmp;
1387 register struct statfs *sp;
1389 long count, maxcount, error;
1391 maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
1392 sfsp = (caddr_t)SCARG(uap, buf);
1394 mtx_lock(&mountlist_mtx);
1395 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
1396 if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
1397 nmp = TAILQ_NEXT(mp, mnt_list);
1400 if (sfsp && count < maxcount) {
1403 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1404 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
1405 * overrides MNT_WAIT.
1407 if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
1408 (SCARG(uap, flags) & MNT_WAIT)) &&
1409 (error = VFS_STATFS(mp, sp, td))) {
1410 mtx_lock(&mountlist_mtx);
1411 nmp = TAILQ_NEXT(mp, mnt_list);
1415 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1416 error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
1421 sfsp += sizeof(*sp);
1424 mtx_lock(&mountlist_mtx);
1425 nmp = TAILQ_NEXT(mp, mnt_list);
1428 mtx_unlock(&mountlist_mtx);
1429 if (sfsp && count > maxcount)
1430 td->td_retval[0] = maxcount;
1432 td->td_retval[0] = count;
1437 * Change current working directory to a given file descriptor.
1439 #ifndef _SYS_SYSPROTO_H_
1440 struct fchdir_args {
1448 struct fchdir_args /* {
1452 register struct filedesc *fdp = td->td_proc->p_fd;
1453 struct vnode *vp, *tdp, *vpold;
1458 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
1460 vp = (struct vnode *)fp->f_data;
1463 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1464 if (vp->v_type != VDIR)
1467 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1468 while (!error && (mp = vp->v_mountedhere) != NULL) {
1469 if (vfs_busy(mp, 0, 0, td))
1471 error = VFS_ROOT(mp, &tdp);
1482 VOP_UNLOCK(vp, 0, td);
1484 vpold = fdp->fd_cdir;
1486 FILEDESC_UNLOCK(fdp);
1492 * Change current working directory (``.'').
1494 #ifndef _SYS_SYSPROTO_H_
1503 struct chdir_args /* {
1504 syscallarg(char *) path;
1507 register struct filedesc *fdp = td->td_proc->p_fd;
1509 struct nameidata nd;
1512 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1513 SCARG(uap, path), td);
1514 if ((error = change_dir(&nd, td)) != 0)
1516 NDFREE(&nd, NDF_ONLY_PNBUF);
1519 fdp->fd_cdir = nd.ni_vp;
1520 FILEDESC_UNLOCK(fdp);
1526 * Helper function for raised chroot(2) security function: Refuse if
1527 * any filedescriptors are open directories.
1530 chroot_refuse_vdir_fds(fdp)
1531 struct filedesc *fdp;
1538 for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
1539 fp = fget_locked(fdp, fd);
1542 if (fp->f_type == DTYPE_VNODE) {
1543 vp = (struct vnode *)fp->f_data;
1544 if (vp->v_type == VDIR) {
1545 FILEDESC_UNLOCK(fdp);
1550 FILEDESC_UNLOCK(fdp);
1555 * This sysctl determines if we will allow a process to chroot(2) if it
1556 * has a directory open:
1557 * 0: disallowed for all processes.
1558 * 1: allowed for processes that were not already chroot(2)'ed.
1559 * 2: allowed for all processes.
1562 static int chroot_allow_open_directories = 1;
1564 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
1565 &chroot_allow_open_directories, 0, "");
1568 * Change notion of root (``/'') directory.
1570 #ifndef _SYS_SYSPROTO_H_
1571 struct chroot_args {
1579 struct chroot_args /* {
1580 syscallarg(char *) path;
1583 register struct filedesc *fdp = td->td_proc->p_fd;
1585 struct nameidata nd;
1588 error = suser_xxx(0, td->td_proc, PRISON_ROOT);
1592 if (chroot_allow_open_directories == 0 ||
1593 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
1594 FILEDESC_UNLOCK(fdp);
1595 error = chroot_refuse_vdir_fds(fdp);
1597 FILEDESC_UNLOCK(fdp);
1600 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1601 SCARG(uap, path), td);
1602 if ((error = change_dir(&nd, td)) != 0)
1604 NDFREE(&nd, NDF_ONLY_PNBUF);
1607 fdp->fd_rdir = nd.ni_vp;
1608 if (!fdp->fd_jdir) {
1609 fdp->fd_jdir = nd.ni_vp;
1612 FILEDESC_UNLOCK(fdp);
1618 * Common routine for chroot and chdir.
1622 register struct nameidata *ndp;
1632 if (vp->v_type != VDIR)
1635 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1639 VOP_UNLOCK(vp, 0, td);
1644 * Check permissions, allocate an open file structure,
1645 * and call the device open routine if any.
1647 #ifndef _SYS_SYSPROTO_H_
1657 register struct open_args /* {
1658 syscallarg(char *) path;
1659 syscallarg(int) flags;
1660 syscallarg(int) mode;
1663 struct proc *p = td->td_proc;
1664 struct filedesc *fdp = p->p_fd;
1669 int cmode, flags, oflags;
1671 int type, indx, error;
1673 struct nameidata nd;
1675 oflags = SCARG(uap, flags);
1676 if ((oflags & O_ACCMODE) == O_ACCMODE)
1678 flags = FFLAGS(oflags);
1679 error = falloc(td, &nfp, &indx);
1684 cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1685 FILEDESC_UNLOCK(fdp);
1686 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1687 td->td_dupfd = -indx - 1; /* XXX check for fdopen */
1689 * Bump the ref count to prevent another process from closing
1690 * the descriptor while we are blocked in vn_open()
1693 error = vn_open(&nd, &flags, cmode);
1696 * release our own reference
1701 * handle special fdopen() case. bleh. dupfdopen() is
1702 * responsible for dropping the old contents of ofiles[indx]
1705 if ((error == ENODEV || error == ENXIO) &&
1706 td->td_dupfd >= 0 && /* XXX from fdopen */
1708 dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1709 td->td_retval[0] = indx;
1713 * Clean up the descriptor, but only if another thread hadn't
1714 * replaced or closed it.
1717 if (fdp->fd_ofiles[indx] == fp) {
1718 fdp->fd_ofiles[indx] = NULL;
1719 FILEDESC_UNLOCK(fdp);
1722 FILEDESC_UNLOCK(fdp);
1724 if (error == ERESTART)
1729 NDFREE(&nd, NDF_ONLY_PNBUF);
1733 * There should be 2 references on the file, one from the descriptor
1734 * table, and one for us.
1736 * Handle the case where someone closed the file (via its file
1737 * descriptor) while we were blocked. The end result should look
1738 * like opening the file succeeded but it was immediately closed.
1742 if (fp->f_count == 1) {
1743 KASSERT(fdp->fd_ofiles[indx] != fp,
1744 ("Open file descriptor lost all refs"));
1745 FILEDESC_UNLOCK(fdp);
1747 VOP_UNLOCK(vp, 0, td);
1748 vn_close(vp, flags & FMASK, fp->f_cred, td);
1750 td->td_retval[0] = indx;
1754 fp->f_data = (caddr_t)vp;
1755 fp->f_flag = flags & FMASK;
1757 fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1758 FILEDESC_UNLOCK(fdp);
1760 VOP_UNLOCK(vp, 0, td);
1761 if (flags & (O_EXLOCK | O_SHLOCK)) {
1762 lf.l_whence = SEEK_SET;
1765 if (flags & O_EXLOCK)
1766 lf.l_type = F_WRLCK;
1768 lf.l_type = F_RDLCK;
1770 if ((flags & FNONBLOCK) == 0)
1772 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0)
1774 fp->f_flag |= FHASLOCK;
1776 if (flags & O_TRUNC) {
1777 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1779 VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1782 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1783 error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1784 VOP_UNLOCK(vp, 0, td);
1785 vn_finished_write(mp);
1789 /* assert that vn_open created a backing object if one is needed */
1790 KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1791 ("open: vmio vnode has no backing object after vn_open"));
1793 * Release our private reference, leaving the one associated with
1794 * the descriptor table intact.
1797 td->td_retval[0] = indx;
1801 if (fdp->fd_ofiles[indx] == fp) {
1802 fdp->fd_ofiles[indx] = NULL;
1803 FILEDESC_UNLOCK(fdp);
1806 FILEDESC_UNLOCK(fdp);
1814 #ifndef _SYS_SYSPROTO_H_
1815 struct ocreat_args {
1823 register struct ocreat_args /* {
1824 syscallarg(char *) path;
1825 syscallarg(int) mode;
1828 struct open_args /* {
1829 syscallarg(char *) path;
1830 syscallarg(int) flags;
1831 syscallarg(int) mode;
1834 SCARG(&nuap, path) = SCARG(uap, path);
1835 SCARG(&nuap, mode) = SCARG(uap, mode);
1836 SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
1837 return (open(td, &nuap));
1839 #endif /* COMPAT_43 */
1842 * Create a special file.
1844 #ifndef _SYS_SYSPROTO_H_
1855 register struct mknod_args /* {
1856 syscallarg(char *) path;
1857 syscallarg(int) mode;
1858 syscallarg(int) dev;
1866 struct nameidata nd;
1868 switch (SCARG(uap, mode) & S_IFMT) {
1871 error = suser_td(td);
1874 error = suser_xxx(0, td->td_proc, PRISON_ROOT);
1881 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1882 if ((error = namei(&nd)) != 0)
1890 FILEDESC_LOCK(td->td_proc->p_fd);
1891 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
1892 FILEDESC_UNLOCK(td->td_proc->p_fd);
1893 vattr.va_rdev = SCARG(uap, dev);
1896 switch (SCARG(uap, mode) & S_IFMT) {
1897 case S_IFMT: /* used by badsect to flag bad sectors */
1898 vattr.va_type = VBAD;
1901 vattr.va_type = VCHR;
1904 vattr.va_type = VBLK;
1914 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1915 NDFREE(&nd, NDF_ONLY_PNBUF);
1917 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1922 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1924 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1926 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1927 &nd.ni_cnd, &vattr);
1932 NDFREE(&nd, NDF_ONLY_PNBUF);
1934 vn_finished_write(mp);
1935 ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1936 ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1941 * Create a named pipe.
1943 #ifndef _SYS_SYSPROTO_H_
1944 struct mkfifo_args {
1953 register struct mkfifo_args /* {
1954 syscallarg(char *) path;
1955 syscallarg(int) mode;
1961 struct nameidata nd;
1965 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1966 if ((error = namei(&nd)) != 0)
1968 if (nd.ni_vp != NULL) {
1969 NDFREE(&nd, NDF_ONLY_PNBUF);
1974 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1975 NDFREE(&nd, NDF_ONLY_PNBUF);
1977 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1982 vattr.va_type = VFIFO;
1983 FILEDESC_LOCK(td->td_proc->p_fd);
1984 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
1985 FILEDESC_UNLOCK(td->td_proc->p_fd);
1986 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1987 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1990 NDFREE(&nd, NDF_ONLY_PNBUF);
1992 vn_finished_write(mp);
1997 * Make a hard file link.
1999 #ifndef _SYS_SYSPROTO_H_
2009 register struct link_args /* {
2010 syscallarg(char *) path;
2011 syscallarg(char *) link;
2016 struct nameidata nd;
2020 NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), td);
2021 if ((error = namei(&nd)) != 0)
2023 NDFREE(&nd, NDF_ONLY_PNBUF);
2025 if (vp->v_type == VDIR) {
2027 return (EPERM); /* POSIX */
2029 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2033 NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
2034 if ((error = namei(&nd)) == 0) {
2035 if (nd.ni_vp != NULL) {
2039 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2040 VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2041 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2043 NDFREE(&nd, NDF_ONLY_PNBUF);
2047 vn_finished_write(mp);
2048 ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
2049 ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
2054 * Make a symbolic link.
2056 #ifndef _SYS_SYSPROTO_H_
2057 struct symlink_args {
2066 register struct symlink_args /* {
2067 syscallarg(char *) path;
2068 syscallarg(char *) link;
2075 struct nameidata nd;
2077 path = uma_zalloc(namei_zone, M_WAITOK);
2078 if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
2082 NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
2083 if ((error = namei(&nd)) != 0)
2086 NDFREE(&nd, NDF_ONLY_PNBUF);
2092 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2093 NDFREE(&nd, NDF_ONLY_PNBUF);
2095 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2100 FILEDESC_LOCK(td->td_proc->p_fd);
2101 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
2102 FILEDESC_UNLOCK(td->td_proc->p_fd);
2103 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2104 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2105 NDFREE(&nd, NDF_ONLY_PNBUF);
2109 vn_finished_write(mp);
2110 ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
2111 ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
2113 uma_zfree(namei_zone, path);
2118 * Delete a whiteout from the filesystem.
2124 register struct undelete_args /* {
2125 syscallarg(char *) path;
2130 struct nameidata nd;
2134 NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
2135 SCARG(uap, path), td);
2140 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2141 NDFREE(&nd, NDF_ONLY_PNBUF);
2147 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2148 NDFREE(&nd, NDF_ONLY_PNBUF);
2150 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2154 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2155 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
2156 NDFREE(&nd, NDF_ONLY_PNBUF);
2158 vn_finished_write(mp);
2159 ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
2160 ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
2165 * Delete a name from the filesystem.
2167 #ifndef _SYS_SYSPROTO_H_
2168 struct unlink_args {
2176 struct unlink_args /* {
2177 syscallarg(char *) path;
2183 struct nameidata nd;
2187 NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
2188 if ((error = namei(&nd)) != 0)
2191 if (vp->v_type == VDIR)
2192 error = EPERM; /* POSIX */
2195 * The root of a mounted filesystem cannot be deleted.
2197 * XXX: can this only be a VDIR case?
2199 if (vp->v_flag & VROOT)
2202 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2203 NDFREE(&nd, NDF_ONLY_PNBUF);
2206 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2210 VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2211 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2213 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2214 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
2216 NDFREE(&nd, NDF_ONLY_PNBUF);
2219 vn_finished_write(mp);
2220 ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
2221 ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
2226 * Reposition read/write file offset.
2228 #ifndef _SYS_SYSPROTO_H_
2239 register struct lseek_args /* {
2241 syscallarg(int) pad;
2242 syscallarg(off_t) offset;
2243 syscallarg(int) whence;
2246 struct ucred *cred = td->td_ucred;
2253 if ((error = fget(td, uap->fd, &fp)) != 0)
2255 if (fp->f_type != DTYPE_VNODE) {
2259 vp = (struct vnode *)fp->f_data;
2260 noneg = (vp->v_type != VCHR);
2261 offset = SCARG(uap, offset);
2262 switch (SCARG(uap, whence)) {
2265 (fp->f_offset < 0 ||
2266 (offset > 0 && fp->f_offset > OFF_MAX - offset)))
2268 offset += fp->f_offset;
2271 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2272 error = VOP_GETATTR(vp, &vattr, cred, td);
2273 VOP_UNLOCK(vp, 0, td);
2277 (vattr.va_size > OFF_MAX ||
2278 (offset > 0 && vattr.va_size > OFF_MAX - offset)))
2280 offset += vattr.va_size;
2288 if (noneg && offset < 0)
2290 fp->f_offset = offset;
2291 *(off_t *)(td->td_retval) = fp->f_offset;
2296 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2298 * Reposition read/write file offset.
2300 #ifndef _SYS_SYSPROTO_H_
2301 struct olseek_args {
2310 register struct olseek_args /* {
2312 syscallarg(long) offset;
2313 syscallarg(int) whence;
2316 struct lseek_args /* {
2318 syscallarg(int) pad;
2319 syscallarg(off_t) offset;
2320 syscallarg(int) whence;
2324 SCARG(&nuap, fd) = SCARG(uap, fd);
2325 SCARG(&nuap, offset) = SCARG(uap, offset);
2326 SCARG(&nuap, whence) = SCARG(uap, whence);
2327 error = lseek(td, &nuap);
2330 #endif /* COMPAT_43 */
2333 * Check access permissions using passed credentials.
2336 vn_access(vp, user_flags, cred, td)
2344 /* Flags == 0 means only check for existence. */
2348 if (user_flags & R_OK)
2350 if (user_flags & W_OK)
2352 if (user_flags & X_OK)
2354 if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
2355 error = VOP_ACCESS(vp, flags, cred, td);
2361 * Check access permissions using "real" credentials.
2363 #ifndef _SYS_SYSPROTO_H_
2364 struct access_args {
2372 register struct access_args /* {
2373 syscallarg(char *) path;
2374 syscallarg(int) flags;
2377 struct ucred *cred, *tmpcred;
2378 register struct vnode *vp;
2380 struct nameidata nd;
2383 * Create and modify a temporary credential instead of one that
2384 * is potentially shared. This could also mess up socket
2385 * buffer accounting which can run in an interrupt context.
2387 * XXX - Depending on how "threads" are finally implemented, it
2388 * may be better to explicitly pass the credential to namei()
2389 * rather than to modify the potentially shared process structure.
2391 cred = td->td_ucred;
2392 tmpcred = crdup(cred);
2393 tmpcred->cr_uid = cred->cr_ruid;
2394 tmpcred->cr_groups[0] = cred->cr_rgid;
2395 td->td_ucred = tmpcred;
2396 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2397 SCARG(uap, path), td);
2398 if ((error = namei(&nd)) != 0)
2402 error = vn_access(vp, SCARG(uap, flags), tmpcred, td);
2403 NDFREE(&nd, NDF_ONLY_PNBUF);
2406 td->td_ucred = cred;
2412 * Check access permissions using "effective" credentials.
2414 #ifndef _SYS_SYSPROTO_H_
2415 struct eaccess_args {
2423 register struct eaccess_args /* {
2424 syscallarg(char *) path;
2425 syscallarg(int) flags;
2428 struct nameidata nd;
2432 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2433 SCARG(uap, path), td);
2434 if ((error = namei(&nd)) != 0)
2438 error = vn_access(vp, SCARG(uap, flags), td->td_ucred, td);
2439 NDFREE(&nd, NDF_ONLY_PNBUF);
2444 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2446 * Get file status; this version follows links.
2448 #ifndef _SYS_SYSPROTO_H_
2458 register struct ostat_args /* {
2459 syscallarg(char *) path;
2460 syscallarg(struct ostat *) ub;
2466 struct nameidata nd;
2468 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2469 SCARG(uap, path), td);
2470 if ((error = namei(&nd)) != 0)
2472 NDFREE(&nd, NDF_ONLY_PNBUF);
2473 error = vn_stat(nd.ni_vp, &sb, td);
2478 error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
2483 * Get file status; this version does not follow links.
2485 #ifndef _SYS_SYSPROTO_H_
2486 struct olstat_args {
2495 register struct olstat_args /* {
2496 syscallarg(char *) path;
2497 syscallarg(struct ostat *) ub;
2504 struct nameidata nd;
2506 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2507 SCARG(uap, path), td);
2508 if ((error = namei(&nd)) != 0)
2511 error = vn_stat(vp, &sb, td);
2512 NDFREE(&nd, NDF_ONLY_PNBUF);
2517 error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
2522 * Convert from an old to a new stat structure.
2530 ost->st_dev = st->st_dev;
2531 ost->st_ino = st->st_ino;
2532 ost->st_mode = st->st_mode;
2533 ost->st_nlink = st->st_nlink;
2534 ost->st_uid = st->st_uid;
2535 ost->st_gid = st->st_gid;
2536 ost->st_rdev = st->st_rdev;
2537 if (st->st_size < (quad_t)1 << 32)
2538 ost->st_size = st->st_size;
2541 ost->st_atime = st->st_atime;
2542 ost->st_mtime = st->st_mtime;
2543 ost->st_ctime = st->st_ctime;
2544 ost->st_blksize = st->st_blksize;
2545 ost->st_blocks = st->st_blocks;
2546 ost->st_flags = st->st_flags;
2547 ost->st_gen = st->st_gen;
2549 #endif /* COMPAT_43 || COMPAT_SUNOS */
2552 * Get file status; this version follows links.
2554 #ifndef _SYS_SYSPROTO_H_
2564 register struct stat_args /* {
2565 syscallarg(char *) path;
2566 syscallarg(struct stat *) ub;
2571 struct nameidata nd;
2573 #ifdef LOOKUP_SHARED
2574 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
2575 UIO_USERSPACE, SCARG(uap, path), td);
2577 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2578 SCARG(uap, path), td);
2580 if ((error = namei(&nd)) != 0)
2582 error = vn_stat(nd.ni_vp, &sb, td);
2583 NDFREE(&nd, NDF_ONLY_PNBUF);
2587 error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
2592 * Get file status; this version does not follow links.
2594 #ifndef _SYS_SYSPROTO_H_
2604 register struct lstat_args /* {
2605 syscallarg(char *) path;
2606 syscallarg(struct stat *) ub;
2612 struct nameidata nd;
2614 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2615 SCARG(uap, path), td);
2616 if ((error = namei(&nd)) != 0)
2619 error = vn_stat(vp, &sb, td);
2620 NDFREE(&nd, NDF_ONLY_PNBUF);
2624 error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
2629 * Implementation of the NetBSD stat() function.
2630 * XXX This should probably be collapsed with the FreeBSD version,
2631 * as the differences are only due to vn_stat() clearing spares at
2632 * the end of the structures. vn_stat could be split to avoid this,
2633 * and thus collapse the following to close to zero code.
2640 nsb->st_dev = sb->st_dev;
2641 nsb->st_ino = sb->st_ino;
2642 nsb->st_mode = sb->st_mode;
2643 nsb->st_nlink = sb->st_nlink;
2644 nsb->st_uid = sb->st_uid;
2645 nsb->st_gid = sb->st_gid;
2646 nsb->st_rdev = sb->st_rdev;
2647 nsb->st_atimespec = sb->st_atimespec;
2648 nsb->st_mtimespec = sb->st_mtimespec;
2649 nsb->st_ctimespec = sb->st_ctimespec;
2650 nsb->st_size = sb->st_size;
2651 nsb->st_blocks = sb->st_blocks;
2652 nsb->st_blksize = sb->st_blksize;
2653 nsb->st_flags = sb->st_flags;
2654 nsb->st_gen = sb->st_gen;
2655 nsb->st_qspare[0] = sb->st_qspare[0];
2656 nsb->st_qspare[1] = sb->st_qspare[1];
2659 #ifndef _SYS_SYSPROTO_H_
2669 register struct nstat_args /* {
2670 syscallarg(char *) path;
2671 syscallarg(struct nstat *) ub;
2677 struct nameidata nd;
2679 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2680 SCARG(uap, path), td);
2681 if ((error = namei(&nd)) != 0)
2683 NDFREE(&nd, NDF_ONLY_PNBUF);
2684 error = vn_stat(nd.ni_vp, &sb, td);
2688 cvtnstat(&sb, &nsb);
2689 error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2694 * NetBSD lstat. Get file status; this version does not follow links.
2696 #ifndef _SYS_SYSPROTO_H_
2706 register struct nlstat_args /* {
2707 syscallarg(char *) path;
2708 syscallarg(struct nstat *) ub;
2715 struct nameidata nd;
2717 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2718 SCARG(uap, path), td);
2719 if ((error = namei(&nd)) != 0)
2722 NDFREE(&nd, NDF_ONLY_PNBUF);
2723 error = vn_stat(vp, &sb, td);
2727 cvtnstat(&sb, &nsb);
2728 error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2733 * Get configurable pathname variables.
2735 #ifndef _SYS_SYSPROTO_H_
2736 struct pathconf_args {
2745 register struct pathconf_args /* {
2746 syscallarg(char *) path;
2747 syscallarg(int) name;
2751 struct nameidata nd;
2753 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2754 SCARG(uap, path), td);
2755 if ((error = namei(&nd)) != 0)
2757 NDFREE(&nd, NDF_ONLY_PNBUF);
2758 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), td->td_retval);
2764 * Return target name of a symbolic link.
2766 #ifndef _SYS_SYSPROTO_H_
2767 struct readlink_args {
2777 register struct readlink_args /* {
2778 syscallarg(char *) path;
2779 syscallarg(char *) buf;
2780 syscallarg(int) count;
2783 register struct vnode *vp;
2787 struct nameidata nd;
2789 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2790 SCARG(uap, path), td);
2791 if ((error = namei(&nd)) != 0)
2793 NDFREE(&nd, NDF_ONLY_PNBUF);
2795 if (vp->v_type != VLNK)
2798 aiov.iov_base = SCARG(uap, buf);
2799 aiov.iov_len = SCARG(uap, count);
2800 auio.uio_iov = &aiov;
2801 auio.uio_iovcnt = 1;
2802 auio.uio_offset = 0;
2803 auio.uio_rw = UIO_READ;
2804 auio.uio_segflg = UIO_USERSPACE;
2806 auio.uio_resid = SCARG(uap, count);
2807 error = VOP_READLINK(vp, &auio, td->td_ucred);
2810 td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
2815 * Common implementation code for chflags() and fchflags().
2818 setfflags(td, vp, flags)
2828 * Prevent non-root users from setting flags on devices. When
2829 * a device is reused, users can retain ownership of the device
2830 * if they are allowed to set flags and programs assume that
2831 * chown can't fail when done as root.
2833 if (vp->v_type == VCHR || vp->v_type == VBLK) {
2834 error = suser_xxx(td->td_ucred, td->td_proc, PRISON_ROOT);
2839 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2841 VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2842 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2844 vattr.va_flags = flags;
2845 error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2846 VOP_UNLOCK(vp, 0, td);
2847 vn_finished_write(mp);
2852 * Change flags of a file given a path name.
2854 #ifndef _SYS_SYSPROTO_H_
2855 struct chflags_args {
2864 register struct chflags_args /* {
2865 syscallarg(char *) path;
2866 syscallarg(int) flags;
2870 struct nameidata nd;
2872 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2873 if ((error = namei(&nd)) != 0)
2875 NDFREE(&nd, NDF_ONLY_PNBUF);
2876 error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
2882 * Change flags of a file given a file descriptor.
2884 #ifndef _SYS_SYSPROTO_H_
2885 struct fchflags_args {
2894 register struct fchflags_args /* {
2896 syscallarg(int) flags;
2902 if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2904 error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags));
2910 * Common implementation code for chmod(), lchmod() and fchmod().
2913 setfmode(td, vp, mode)
2922 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2924 VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2925 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2927 vattr.va_mode = mode & ALLPERMS;
2928 error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2929 VOP_UNLOCK(vp, 0, td);
2930 vn_finished_write(mp);
2935 * Change mode of a file given path name.
2937 #ifndef _SYS_SYSPROTO_H_
2947 register struct chmod_args /* {
2948 syscallarg(char *) path;
2949 syscallarg(int) mode;
2953 struct nameidata nd;
2955 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2956 if ((error = namei(&nd)) != 0)
2958 NDFREE(&nd, NDF_ONLY_PNBUF);
2959 error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
2965 * Change mode of a file given path name (don't follow links.)
2967 #ifndef _SYS_SYSPROTO_H_
2968 struct lchmod_args {
2977 register struct lchmod_args /* {
2978 syscallarg(char *) path;
2979 syscallarg(int) mode;
2983 struct nameidata nd;
2985 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2986 if ((error = namei(&nd)) != 0)
2988 NDFREE(&nd, NDF_ONLY_PNBUF);
2989 error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
2995 * Change mode of a file given a file descriptor.
2997 #ifndef _SYS_SYSPROTO_H_
2998 struct fchmod_args {
3007 register struct fchmod_args /* {
3009 syscallarg(int) mode;
3016 if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3018 vp = (struct vnode *)fp->f_data;
3019 error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode));
3025 * Common implementation for chown(), lchown(), and fchown()
3028 setfown(td, vp, uid, gid)
3038 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3040 VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3041 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3045 error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3046 VOP_UNLOCK(vp, 0, td);
3047 vn_finished_write(mp);
3052 * Set ownership given a path name.
3054 #ifndef _SYS_SYSPROTO_H_
3065 register struct chown_args /* {
3066 syscallarg(char *) path;
3067 syscallarg(int) uid;
3068 syscallarg(int) gid;
3072 struct nameidata nd;
3074 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3075 if ((error = namei(&nd)) != 0)
3077 NDFREE(&nd, NDF_ONLY_PNBUF);
3078 error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
3084 * Set ownership given a path name, do not cross symlinks.
3086 #ifndef _SYS_SYSPROTO_H_
3087 struct lchown_args {
3097 register struct lchown_args /* {
3098 syscallarg(char *) path;
3099 syscallarg(int) uid;
3100 syscallarg(int) gid;
3104 struct nameidata nd;
3106 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3107 if ((error = namei(&nd)) != 0)
3109 NDFREE(&nd, NDF_ONLY_PNBUF);
3110 error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
3116 * Set ownership given a file descriptor.
3118 #ifndef _SYS_SYSPROTO_H_
3119 struct fchown_args {
3129 register struct fchown_args /* {
3131 syscallarg(int) uid;
3132 syscallarg(int) gid;
3139 if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3141 vp = (struct vnode *)fp->f_data;
3142 error = setfown(td, (struct vnode *)fp->f_data,
3143 SCARG(uap, uid), SCARG(uap, gid));
3149 * Common implementation code for utimes(), lutimes(), and futimes().
3152 getutimes(usrtvp, tsp)
3153 const struct timeval *usrtvp;
3154 struct timespec *tsp;
3156 struct timeval tv[2];
3159 if (usrtvp == NULL) {
3161 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
3164 if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
3166 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
3167 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
3173 * Common implementation code for utimes(), lutimes(), and futimes().
3176 setutimes(td, vp, ts, nullflag)
3179 const struct timespec *ts;
3186 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3188 VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3189 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3191 vattr.va_atime = ts[0];
3192 vattr.va_mtime = ts[1];
3194 vattr.va_vaflags |= VA_UTIMES_NULL;
3195 error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3196 VOP_UNLOCK(vp, 0, td);
3197 vn_finished_write(mp);
3202 * Set the access and modification times of a file.
3204 #ifndef _SYS_SYSPROTO_H_
3205 struct utimes_args {
3207 struct timeval *tptr;
3214 register struct utimes_args /* {
3215 syscallarg(char *) path;
3216 syscallarg(struct timeval *) tptr;
3219 struct timespec ts[2];
3220 struct timeval *usrtvp;
3222 struct nameidata nd;
3224 usrtvp = SCARG(uap, tptr);
3225 if ((error = getutimes(usrtvp, ts)) != 0)
3227 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3228 if ((error = namei(&nd)) != 0)
3230 NDFREE(&nd, NDF_ONLY_PNBUF);
3231 error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
3237 * Set the access and modification times of a file.
3239 #ifndef _SYS_SYSPROTO_H_
3240 struct lutimes_args {
3242 struct timeval *tptr;
3249 register struct lutimes_args /* {
3250 syscallarg(char *) path;
3251 syscallarg(struct timeval *) tptr;
3254 struct timespec ts[2];
3255 struct timeval *usrtvp;
3257 struct nameidata nd;
3259 usrtvp = SCARG(uap, tptr);
3260 if ((error = getutimes(usrtvp, ts)) != 0)
3262 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3263 if ((error = namei(&nd)) != 0)
3265 NDFREE(&nd, NDF_ONLY_PNBUF);
3266 error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
3272 * Set the access and modification times of a file.
3274 #ifndef _SYS_SYSPROTO_H_
3275 struct futimes_args {
3277 struct timeval *tptr;
3284 register struct futimes_args /* {
3285 syscallarg(int ) fd;
3286 syscallarg(struct timeval *) tptr;
3289 struct timespec ts[2];
3291 struct timeval *usrtvp;
3294 usrtvp = SCARG(uap, tptr);
3295 if ((error = getutimes(usrtvp, ts)) != 0)
3297 if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3299 error = setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL);
3305 * Truncate a file given its path name.
3307 #ifndef _SYS_SYSPROTO_H_
3308 struct truncate_args {
3318 register struct truncate_args /* {
3319 syscallarg(char *) path;
3320 syscallarg(int) pad;
3321 syscallarg(off_t) length;
3328 struct nameidata nd;
3330 if (uap->length < 0)
3332 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3333 if ((error = namei(&nd)) != 0)
3336 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3340 NDFREE(&nd, NDF_ONLY_PNBUF);
3341 VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3342 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3343 if (vp->v_type == VDIR)
3345 else if ((error = vn_writechk(vp)) == 0 &&
3346 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3348 vattr.va_size = SCARG(uap, length);
3349 error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3352 vn_finished_write(mp);
3357 * Truncate a file given a file descriptor.
3359 #ifndef _SYS_SYSPROTO_H_
3360 struct ftruncate_args {
3370 register struct ftruncate_args /* {
3372 syscallarg(int) pad;
3373 syscallarg(off_t) length;
3382 if (uap->length < 0)
3384 if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3386 if ((fp->f_flag & FWRITE) == 0) {
3390 vp = (struct vnode *)fp->f_data;
3391 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3395 VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3396 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3397 if (vp->v_type == VDIR)
3399 else if ((error = vn_writechk(vp)) == 0) {
3401 vattr.va_size = SCARG(uap, length);
3402 error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3404 VOP_UNLOCK(vp, 0, td);
3405 vn_finished_write(mp);
3410 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
3412 * Truncate a file given its path name.
3414 #ifndef _SYS_SYSPROTO_H_
3415 struct otruncate_args {
3424 register struct otruncate_args /* {
3425 syscallarg(char *) path;
3426 syscallarg(long) length;
3429 struct truncate_args /* {
3430 syscallarg(char *) path;
3431 syscallarg(int) pad;
3432 syscallarg(off_t) length;
3435 SCARG(&nuap, path) = SCARG(uap, path);
3436 SCARG(&nuap, length) = SCARG(uap, length);
3437 return (truncate(td, &nuap));
3441 * Truncate a file given a file descriptor.
3443 #ifndef _SYS_SYSPROTO_H_
3444 struct oftruncate_args {
3453 register struct oftruncate_args /* {
3455 syscallarg(long) length;
3458 struct ftruncate_args /* {
3460 syscallarg(int) pad;
3461 syscallarg(off_t) length;
3464 SCARG(&nuap, fd) = SCARG(uap, fd);
3465 SCARG(&nuap, length) = SCARG(uap, length);
3466 return (ftruncate(td, &nuap));
3468 #endif /* COMPAT_43 || COMPAT_SUNOS */
3471 * Sync an open file.
3473 #ifndef _SYS_SYSPROTO_H_
3482 struct fsync_args /* {
3494 if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3496 vp = (struct vnode *)fp->f_data;
3497 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3501 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3502 if (VOP_GETVOBJECT(vp, &obj) == 0) {
3503 vm_object_page_clean(obj, 0, 0, 0);
3505 error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
3507 if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3508 error = softdep_fsync(vp);
3511 VOP_UNLOCK(vp, 0, td);
3512 vn_finished_write(mp);
3518 * Rename files. Source and destination must either both be directories,
3519 * or both not be directories. If target is a directory, it must be empty.
3521 #ifndef _SYS_SYSPROTO_H_
3522 struct rename_args {
3531 register struct rename_args /* {
3532 syscallarg(char *) from;
3533 syscallarg(char *) to;
3537 struct vnode *tvp, *fvp, *tdvp;
3538 struct nameidata fromnd, tond;
3542 NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
3543 SCARG(uap, from), td);
3544 if ((error = namei(&fromnd)) != 0)
3547 if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
3548 NDFREE(&fromnd, NDF_ONLY_PNBUF);
3549 vrele(fromnd.ni_dvp);
3553 NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
3554 UIO_USERSPACE, SCARG(uap, to), td);
3555 if (fromnd.ni_vp->v_type == VDIR)
3556 tond.ni_cnd.cn_flags |= WILLBEDIR;
3557 if ((error = namei(&tond)) != 0) {
3558 /* Translate error code for rename("dir1", "dir2/."). */
3559 if (error == EISDIR && fvp->v_type == VDIR)
3561 NDFREE(&fromnd, NDF_ONLY_PNBUF);
3562 vrele(fromnd.ni_dvp);
3569 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3572 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3580 * If source is the same as the destination (that is the
3581 * same inode number with the same name in the same directory),
3582 * then there is nothing to do.
3584 if (fvp == tvp && fromnd.ni_dvp == tdvp &&
3585 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3586 !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
3587 fromnd.ni_cnd.cn_namelen))
3591 VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3592 if (fromnd.ni_dvp != tdvp) {
3593 VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3596 VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3598 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3599 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3600 NDFREE(&fromnd, NDF_ONLY_PNBUF);
3601 NDFREE(&tond, NDF_ONLY_PNBUF);
3603 NDFREE(&fromnd, NDF_ONLY_PNBUF);
3604 NDFREE(&tond, NDF_ONLY_PNBUF);
3611 vrele(fromnd.ni_dvp);
3614 vrele(tond.ni_startdir);
3615 vn_finished_write(mp);
3616 ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
3617 ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
3618 ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
3619 ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
3621 if (fromnd.ni_startdir)
3622 vrele(fromnd.ni_startdir);
3629 * Make a directory file.
3631 #ifndef _SYS_SYSPROTO_H_
3641 register struct mkdir_args /* {
3642 syscallarg(char *) path;
3643 syscallarg(int) mode;
3647 return vn_mkdir(uap->path, uap->mode, UIO_USERSPACE, td);
3651 vn_mkdir(path, mode, segflg, td)
3654 enum uio_seg segflg;
3661 struct nameidata nd;
3665 NDINIT(&nd, CREATE, LOCKPARENT, segflg, path, td);
3666 nd.ni_cnd.cn_flags |= WILLBEDIR;
3667 if ((error = namei(&nd)) != 0)
3671 NDFREE(&nd, NDF_ONLY_PNBUF);
3676 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3677 NDFREE(&nd, NDF_ONLY_PNBUF);
3679 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3684 vattr.va_type = VDIR;
3685 FILEDESC_LOCK(td->td_proc->p_fd);
3686 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3687 FILEDESC_UNLOCK(td->td_proc->p_fd);
3688 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3689 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3690 NDFREE(&nd, NDF_ONLY_PNBUF);
3694 vn_finished_write(mp);
3695 ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
3696 ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
3701 * Remove a directory file.
3703 #ifndef _SYS_SYSPROTO_H_
3712 struct rmdir_args /* {
3713 syscallarg(char *) path;
3719 struct nameidata nd;
3723 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3724 SCARG(uap, path), td);
3725 if ((error = namei(&nd)) != 0)
3728 if (vp->v_type != VDIR) {
3733 * No rmdir "." please.
3735 if (nd.ni_dvp == vp) {
3740 * The root of a mounted filesystem cannot be deleted.
3742 if (vp->v_flag & VROOT) {
3746 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3747 NDFREE(&nd, NDF_ONLY_PNBUF);
3748 if (nd.ni_dvp == vp)
3753 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3757 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3758 VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3759 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3760 vn_finished_write(mp);
3762 NDFREE(&nd, NDF_ONLY_PNBUF);
3763 if (nd.ni_dvp == vp)
3768 ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3769 ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3775 * Read a block of directory entries in a file system independent format.
3777 #ifndef _SYS_SYSPROTO_H_
3778 struct ogetdirentries_args {
3786 ogetdirentries(td, uap)
3788 register struct ogetdirentries_args /* {
3790 syscallarg(char *) buf;
3791 syscallarg(u_int) count;
3792 syscallarg(long *) basep;
3797 struct uio auio, kuio;
3798 struct iovec aiov, kiov;
3799 struct dirent *dp, *edp;
3801 int error, eofflag, readcnt;
3804 /* XXX arbitrary sanity limit on `count'. */
3805 if (SCARG(uap, count) > 64 * 1024)
3807 if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3809 if ((fp->f_flag & FREAD) == 0) {
3813 vp = (struct vnode *)fp->f_data;
3815 if (vp->v_type != VDIR) {
3819 aiov.iov_base = SCARG(uap, buf);
3820 aiov.iov_len = SCARG(uap, count);
3821 auio.uio_iov = &aiov;
3822 auio.uio_iovcnt = 1;
3823 auio.uio_rw = UIO_READ;
3824 auio.uio_segflg = UIO_USERSPACE;
3826 auio.uio_resid = SCARG(uap, count);
3827 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3828 loff = auio.uio_offset = fp->f_offset;
3829 # if (BYTE_ORDER != LITTLE_ENDIAN)
3830 if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3831 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3833 fp->f_offset = auio.uio_offset;
3838 kuio.uio_iov = &kiov;
3839 kuio.uio_segflg = UIO_SYSSPACE;
3840 kiov.iov_len = SCARG(uap, count);
3841 MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
3842 kiov.iov_base = dirbuf;
3843 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3845 fp->f_offset = kuio.uio_offset;
3847 readcnt = SCARG(uap, count) - kuio.uio_resid;
3848 edp = (struct dirent *)&dirbuf[readcnt];
3849 for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3850 # if (BYTE_ORDER == LITTLE_ENDIAN)
3852 * The expected low byte of
3853 * dp->d_namlen is our dp->d_type.
3854 * The high MBZ byte of dp->d_namlen
3855 * is our dp->d_namlen.
3857 dp->d_type = dp->d_namlen;
3861 * The dp->d_type is the high byte
3862 * of the expected dp->d_namlen,
3863 * so must be zero'ed.
3867 if (dp->d_reclen > 0) {
3868 dp = (struct dirent *)
3869 ((char *)dp + dp->d_reclen);
3876 error = uiomove(dirbuf, readcnt, &auio);
3878 FREE(dirbuf, M_TEMP);
3880 VOP_UNLOCK(vp, 0, td);
3885 if (SCARG(uap, count) == auio.uio_resid) {
3886 if (union_dircheckp) {
3887 error = union_dircheckp(td, &vp, fp);
3895 if ((vp->v_flag & VROOT) &&
3896 (vp->v_mount->mnt_flag & MNT_UNION)) {
3897 struct vnode *tvp = vp;
3898 vp = vp->v_mount->mnt_vnodecovered;
3900 fp->f_data = (caddr_t) vp;
3906 error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3909 td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3912 #endif /* COMPAT_43 */
3915 * Read a block of directory entries in a file system independent format.
3917 #ifndef _SYS_SYSPROTO_H_
3918 struct getdirentries_args {
3926 getdirentries(td, uap)
3928 register struct getdirentries_args /* {
3930 syscallarg(char *) buf;
3931 syscallarg(u_int) count;
3932 syscallarg(long *) basep;
3942 if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3944 if ((fp->f_flag & FREAD) == 0) {
3948 vp = (struct vnode *)fp->f_data;
3950 if (vp->v_type != VDIR) {
3954 aiov.iov_base = SCARG(uap, buf);
3955 aiov.iov_len = SCARG(uap, count);
3956 auio.uio_iov = &aiov;
3957 auio.uio_iovcnt = 1;
3958 auio.uio_rw = UIO_READ;
3959 auio.uio_segflg = UIO_USERSPACE;
3961 auio.uio_resid = SCARG(uap, count);
3962 /* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3963 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3964 loff = auio.uio_offset = fp->f_offset;
3965 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
3966 fp->f_offset = auio.uio_offset;
3967 VOP_UNLOCK(vp, 0, td);
3972 if (SCARG(uap, count) == auio.uio_resid) {
3973 if (union_dircheckp) {
3974 error = union_dircheckp(td, &vp, fp);
3982 if ((vp->v_flag & VROOT) &&
3983 (vp->v_mount->mnt_flag & MNT_UNION)) {
3984 struct vnode *tvp = vp;
3985 vp = vp->v_mount->mnt_vnodecovered;
3987 fp->f_data = (caddr_t) vp;
3993 if (SCARG(uap, basep) != NULL) {
3994 error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3997 td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
4001 #ifndef _SYS_SYSPROTO_H_
4002 struct getdents_args {
4011 register struct getdents_args /* {
4013 syscallarg(char *) buf;
4014 syscallarg(u_int) count;
4017 struct getdirentries_args ap;
4020 ap.count = uap->count;
4022 return getdirentries(td, &ap);
4026 * Set the mode mask for creation of filesystem nodes.
4030 #ifndef _SYS_SYSPROTO_H_
4038 struct umask_args /* {
4039 syscallarg(int) newmask;
4042 register struct filedesc *fdp;
4044 FILEDESC_LOCK(td->td_proc->p_fd);
4045 fdp = td->td_proc->p_fd;
4046 td->td_retval[0] = fdp->fd_cmask;
4047 fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
4048 FILEDESC_UNLOCK(td->td_proc->p_fd);
4053 * Void all references to file by ripping underlying filesystem
4056 #ifndef _SYS_SYSPROTO_H_
4057 struct revoke_args {
4065 register struct revoke_args /* {
4066 syscallarg(char *) path;
4073 struct nameidata nd;
4075 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path),
4077 if ((error = namei(&nd)) != 0)
4080 NDFREE(&nd, NDF_ONLY_PNBUF);
4081 if (vp->v_type != VCHR) {
4085 error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
4090 VOP_UNLOCK(vp, 0, td);
4091 if (td->td_ucred->cr_uid != vattr.va_uid) {
4092 error = suser_xxx(0, td->td_proc, PRISON_ROOT);
4096 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4099 VOP_REVOKE(vp, REVOKEALL);
4100 vn_finished_write(mp);
4107 * Convert a user file descriptor to a kernel file entry.
4108 * The file entry is locked upon returning.
4111 getvnode(fdp, fd, fpp)
4112 struct filedesc *fdp;
4124 if ((u_int)fd >= fdp->fd_nfiles ||
4125 (fp = fdp->fd_ofiles[fd]) == NULL)
4127 else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
4134 FILEDESC_UNLOCK(fdp);
4140 * Get (NFS) file handle
4142 #ifndef _SYS_SYSPROTO_H_
4151 register struct getfh_args *uap;
4153 struct nameidata nd;
4155 register struct vnode *vp;
4159 * Must be super user
4161 error = suser_td(td);
4164 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
4168 NDFREE(&nd, NDF_ONLY_PNBUF);
4170 bzero(&fh, sizeof(fh));
4171 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4172 error = VFS_VPTOFH(vp, &fh.fh_fid);
4176 error = copyout(&fh, uap->fhp, sizeof (fh));
4181 * syscall for the rpc.lockd to use to translate a NFS file handle into
4182 * an open descriptor.
4184 * warning: do not remove the suser() call or this becomes one giant
4187 #ifndef _SYS_SYSPROTO_H_
4188 struct fhopen_args {
4189 const struct fhandle *u_fhp;
4196 struct fhopen_args /* {
4197 syscallarg(const struct fhandle *) u_fhp;
4198 syscallarg(int) flags;
4201 struct proc *p = td->td_proc;
4206 struct vattr *vap = &vat;
4209 register struct filedesc *fdp = p->p_fd;
4210 int fmode, mode, error, type;
4215 * Must be super user
4217 error = suser_td(td);
4221 fmode = FFLAGS(SCARG(uap, flags));
4222 /* why not allow a non-read/write open for our lockd? */
4223 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4225 error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
4228 /* find the mount point */
4229 mp = vfs_getvfs(&fhp.fh_fsid);
4232 /* now give me my vnode, it gets returned to me locked */
4233 error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4237 * from now on we have to make sure not
4238 * to forget about the vnode
4239 * any error that causes an abort must vput(vp)
4240 * just set error = err and 'goto bad;'.
4246 if (vp->v_type == VLNK) {
4250 if (vp->v_type == VSOCK) {
4255 if (fmode & (FWRITE | O_TRUNC)) {
4256 if (vp->v_type == VDIR) {
4260 error = vn_writechk(vp);
4268 error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4272 if (fmode & O_TRUNC) {
4273 VOP_UNLOCK(vp, 0, td); /* XXX */
4274 if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4278 VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4279 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); /* XXX */
4282 error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4283 vn_finished_write(mp);
4287 error = VOP_OPEN(vp, fmode, td->td_ucred, td);
4291 * Make sure that a VM object is created for VMIO support.
4293 if (vn_canvmio(vp) == TRUE) {
4294 if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
4301 * end of vn_open code
4304 if ((error = falloc(td, &nfp, &indx)) != 0) {
4312 * Hold an extra reference to avoid having fp ripped out
4313 * from under us while we block in the lock op
4316 nfp->f_data = (caddr_t)vp;
4317 nfp->f_flag = fmode & FMASK;
4318 nfp->f_ops = &vnops;
4319 nfp->f_type = DTYPE_VNODE;
4320 if (fmode & (O_EXLOCK | O_SHLOCK)) {
4321 lf.l_whence = SEEK_SET;
4324 if (fmode & O_EXLOCK)
4325 lf.l_type = F_WRLCK;
4327 lf.l_type = F_RDLCK;
4329 if ((fmode & FNONBLOCK) == 0)
4331 VOP_UNLOCK(vp, 0, td);
4332 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
4334 * The lock request failed. Normally close the
4335 * descriptor but handle the case where someone might
4336 * have dup()d or close()d it when we weren't looking.
4339 if (fdp->fd_ofiles[indx] == fp) {
4340 fdp->fd_ofiles[indx] = NULL;
4341 FILEDESC_UNLOCK(fdp);
4344 FILEDESC_UNLOCK(fdp);
4346 * release our private reference
4351 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4352 fp->f_flag |= FHASLOCK;
4354 if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
4355 vfs_object_create(vp, td, td->td_ucred);
4357 VOP_UNLOCK(vp, 0, td);
4359 td->td_retval[0] = indx;
4368 * Stat an (NFS) file handle.
4370 #ifndef _SYS_SYSPROTO_H_
4371 struct fhstat_args {
4372 struct fhandle *u_fhp;
4379 register struct fhstat_args /* {
4380 syscallarg(struct fhandle *) u_fhp;
4381 syscallarg(struct stat *) sb;
4391 * Must be super user
4393 error = suser_td(td);
4397 error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
4401 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4403 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4405 error = vn_stat(vp, &sb, td);
4409 error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
4414 * Implement fstatfs() for (NFS) file handles.
4416 #ifndef _SYS_SYSPROTO_H_
4417 struct fhstatfs_args {
4418 struct fhandle *u_fhp;
4425 struct fhstatfs_args /* {
4426 syscallarg(struct fhandle) *u_fhp;
4427 syscallarg(struct statfs) *buf;
4438 * Must be super user
4440 error = suser_td(td);
4444 if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
4447 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4449 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4454 if ((error = VFS_STATFS(mp, sp, td)) != 0)
4456 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4457 if (suser_xxx(td->td_ucred, 0, 0)) {
4458 bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
4459 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
4462 return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
4466 * Syscall to push extended attribute configuration information into the
4467 * VFS. Accepts a path, which it converts to a mountpoint, as well as
4468 * a command (int cmd), and attribute name and misc data. For now, the
4469 * attribute name is left in userspace for consumption by the VFS_op.
4470 * It will probably be changed to be copied into sysspace by the
4471 * syscall in the future, once issues with various consumers of the
4472 * attribute code have raised their hands.
4474 * Currently this is used only by UFS Extended Attributes.
4479 struct extattrctl_args *uap;
4481 struct vnode *filename_vp;
4482 struct nameidata nd;
4483 struct mount *mp, *mp_writable;
4484 char attrname[EXTATTR_MAXNAMELEN];
4488 * SCARG(uap, attrname) not always defined. We check again later
4489 * when we invoke the VFS call so as to pass in NULL there if needed.
4491 if (SCARG(uap, attrname) != NULL) {
4492 error = copyinstr(SCARG(uap, attrname), attrname,
4493 EXTATTR_MAXNAMELEN, NULL);
4499 * SCARG(uap, filename) not always defined. If it is, grab
4500 * a vnode lock, which VFS_EXTATTRCTL() will later release.
4503 if (SCARG(uap, filename) != NULL) {
4504 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
4505 SCARG(uap, filename), td);
4506 if ((error = namei(&nd)) != 0)
4508 filename_vp = nd.ni_vp;
4509 NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4512 /* SCARG(uap, path) always defined. */
4513 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
4514 if ((error = namei(&nd)) != 0) {
4515 if (filename_vp != NULL)
4519 mp = nd.ni_vp->v_mount;
4520 error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4523 if (filename_vp != NULL)
4528 if (SCARG(uap, attrname) != NULL) {
4529 error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
4530 SCARG(uap, attrnamespace), attrname, td);
4532 error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
4533 SCARG(uap, attrnamespace), NULL, td);
4536 vn_finished_write(mp_writable);
4538 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4539 * filename_vp, so vrele it if it is defined.
4541 if (filename_vp != NULL)
4548 * Set a named extended attribute on a file or directory
4550 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4551 * kernelspace string pointer "attrname", userspace buffer
4552 * pointer "data", buffer length "nbytes", thread "td".
4553 * Returns: 0 on success, an error number otherwise
4555 * References: vp must be a valid reference for the duration of the call
4558 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4559 void *data, size_t nbytes, struct thread *td)
4567 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4569 VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4570 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4572 aiov.iov_base = data;
4573 aiov.iov_len = nbytes;
4574 auio.uio_iov = &aiov;
4575 auio.uio_iovcnt = 1;
4576 auio.uio_offset = 0;
4577 if (nbytes > INT_MAX) {
4581 auio.uio_resid = nbytes;
4582 auio.uio_rw = UIO_WRITE;
4583 auio.uio_segflg = UIO_USERSPACE;
4587 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4589 cnt -= auio.uio_resid;
4590 td->td_retval[0] = cnt;
4593 VOP_UNLOCK(vp, 0, td);
4594 vn_finished_write(mp);
4599 extattr_set_file(td, uap)
4601 struct extattr_set_file_args *uap;
4603 struct nameidata nd;
4604 char attrname[EXTATTR_MAXNAMELEN];
4607 error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4612 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
4613 if ((error = namei(&nd)) != 0)
4615 NDFREE(&nd, NDF_ONLY_PNBUF);
4617 error = extattr_set_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
4618 SCARG(uap, data), SCARG(uap, nbytes), td);
4625 extattr_set_fd(td, uap)
4627 struct extattr_set_fd_args *uap;
4630 char attrname[EXTATTR_MAXNAMELEN];
4633 error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4638 if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4641 error = extattr_set_vp((struct vnode *)fp->f_data,
4642 SCARG(uap, attrnamespace), attrname, SCARG(uap, data),
4643 SCARG(uap, nbytes), td);
4650 * Get a named extended attribute on a file or directory
4652 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4653 * kernelspace string pointer "attrname", userspace buffer
4654 * pointer "data", buffer length "nbytes", thread "td".
4655 * Returns: 0 on success, an error number otherwise
4657 * References: vp must be a valid reference for the duration of the call
4660 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4661 void *data, size_t nbytes, struct thread *td)
4669 VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4670 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4673 * Slightly unusual semantics: if the user provides a NULL data
4674 * pointer, they don't want to receive the data, just the
4675 * maximum read length.
4678 aiov.iov_base = data;
4679 aiov.iov_len = nbytes;
4680 auio.uio_iov = &aiov;
4681 auio.uio_offset = 0;
4682 if (nbytes > INT_MAX) {
4686 auio.uio_resid = nbytes;
4687 auio.uio_rw = UIO_READ;
4688 auio.uio_segflg = UIO_USERSPACE;
4691 error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio,
4692 NULL, td->td_ucred, td);
4693 cnt -= auio.uio_resid;
4694 td->td_retval[0] = cnt;
4696 error = VOP_GETEXTATTR(vp, attrnamespace, attrname, NULL,
4697 &size, td->td_ucred, td);
4698 td->td_retval[0] = size;
4701 VOP_UNLOCK(vp, 0, td);
4706 extattr_get_file(td, uap)
4708 struct extattr_get_file_args *uap;
4710 struct nameidata nd;
4711 char attrname[EXTATTR_MAXNAMELEN];
4714 error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4719 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
4720 if ((error = namei(&nd)) != 0)
4722 NDFREE(&nd, NDF_ONLY_PNBUF);
4724 error = extattr_get_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
4725 SCARG(uap, data), SCARG(uap, nbytes), td);
4732 extattr_get_fd(td, uap)
4734 struct extattr_get_fd_args *uap;
4737 char attrname[EXTATTR_MAXNAMELEN];
4740 error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4745 if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4748 error = extattr_get_vp((struct vnode *)fp->f_data,
4749 SCARG(uap, attrnamespace), attrname, SCARG(uap, data),
4750 SCARG(uap, nbytes), td);
4757 * extattr_delete_vp(): Delete a named extended attribute on a file or
4760 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4761 * kernelspace string pointer "attrname", proc "p"
4762 * Returns: 0 on success, an error number otherwise
4764 * References: vp must be a valid reference for the duration of the call
4767 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4773 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4775 VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4776 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4778 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred,
4781 VOP_UNLOCK(vp, 0, td);
4782 vn_finished_write(mp);
4787 extattr_delete_file(td, uap)
4789 struct extattr_delete_file_args *uap;
4791 struct nameidata nd;
4792 char attrname[EXTATTR_MAXNAMELEN];
4795 error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4800 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
4801 if ((error = namei(&nd)) != 0)
4803 NDFREE(&nd, NDF_ONLY_PNBUF);
4805 error = extattr_delete_vp(nd.ni_vp, SCARG(uap, attrnamespace),
4813 extattr_delete_fd(td, uap)
4815 struct extattr_delete_fd_args *uap;
4819 char attrname[EXTATTR_MAXNAMELEN];
4822 error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4827 if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4829 vp = (struct vnode *)fp->f_data;
4831 error = extattr_delete_vp((struct vnode *)fp->f_data,
4832 SCARG(uap, attrnamespace), attrname, td);