2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94
42 #include "opt_compat.h"
44 #include <sys/param.h>
45 #include <sys/systm.h>
47 #include <sys/malloc.h>
48 #include <sys/mutex.h>
49 #include <sys/sysproto.h>
51 #include <sys/filedesc.h>
52 #include <sys/kernel.h>
53 #include <sys/sysctl.h>
54 #include <sys/vnode.h>
58 #include <sys/filio.h>
59 #include <sys/fcntl.h>
60 #include <sys/unistd.h>
61 #include <sys/resourcevar.h>
62 #include <sys/event.h>
64 #include <sys/socketvar.h>
66 #include <machine/limits.h>
69 #include <vm/vm_extern.h>
71 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
72 MALLOC_DEFINE(M_FILE, "file", "Open file structure");
73 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
75 static d_open_t fdopen;
79 static struct cdevsw fildesc_cdevsw = {
87 /* strategy */ nostrategy,
95 static int do_dup __P((struct filedesc *fdp, int old, int new, register_t *retval, struct thread *td));
96 static int badfo_readwrite __P((struct file *fp, struct uio *uio,
97 struct ucred *cred, int flags, struct thread *td));
98 static int badfo_ioctl __P((struct file *fp, u_long com, caddr_t data,
100 static int badfo_poll __P((struct file *fp, int events,
101 struct ucred *cred, struct thread *td));
102 static int badfo_kqfilter __P((struct file *fp, struct knote *kn));
103 static int badfo_stat __P((struct file *fp, struct stat *sb, struct thread *td));
104 static int badfo_close __P((struct file *fp, struct thread *td));
107 * Descriptor management.
109 struct filelist filehead; /* head of list of open files */
110 int nfiles; /* actual number of open files */
112 struct sx filelist_lock; /* sx to protect filelist */
115 * System calls on descriptors.
117 #ifndef _SYS_SYSPROTO_H_
118 struct getdtablesize_args {
127 getdtablesize(td, uap)
129 struct getdtablesize_args *uap;
131 struct proc *p = td->td_proc;
135 min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
141 * Duplicate a file descriptor to a particular value.
143 * note: keep in mind that a potential race condition exists when closing
144 * descriptors from a shared descriptor table (via rfork).
146 #ifndef _SYS_SYSPROTO_H_
159 struct dup2_args *uap;
161 struct proc *p = td->td_proc;
162 register struct filedesc *fdp = td->td_proc->p_fd;
163 register u_int old = uap->from, new = uap->to;
168 if (old >= fdp->fd_nfiles ||
169 fdp->fd_ofiles[old] == NULL ||
170 new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
171 new >= maxfilesperproc) {
172 FILEDESC_UNLOCK(fdp);
176 td->td_retval[0] = new;
177 FILEDESC_UNLOCK(fdp);
180 if (new >= fdp->fd_nfiles) {
181 if ((error = fdalloc(td, new, &i))) {
182 FILEDESC_UNLOCK(fdp);
186 * fdalloc() may block, retest everything.
190 error = do_dup(fdp, (int)old, (int)new, td->td_retval, td);
195 * Duplicate a file descriptor.
197 #ifndef _SYS_SYSPROTO_H_
209 struct dup_args *uap;
211 register struct filedesc *fdp;
216 fdp = td->td_proc->p_fd;
218 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
219 FILEDESC_UNLOCK(fdp);
222 if ((error = fdalloc(td, 0, &new))) {
223 FILEDESC_UNLOCK(fdp);
226 error = do_dup(fdp, (int)old, new, td->td_retval, td);
231 * The file control system call.
233 #ifndef _SYS_SYSPROTO_H_
247 register struct fcntl_args *uap;
249 register struct proc *p = td->td_proc;
250 register struct filedesc *fdp;
251 register struct file *fp;
254 int i, tmp, error = 0, flg = F_POSIX;
257 struct proc *leaderp;
263 if ((unsigned)uap->fd >= fdp->fd_nfiles ||
264 (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
265 FILEDESC_UNLOCK(fdp);
269 pop = &fdp->fd_ofileflags[uap->fd];
274 if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
275 newmin >= maxfilesperproc) {
276 FILEDESC_UNLOCK(fdp);
280 if ((error = fdalloc(td, newmin, &i))) {
281 FILEDESC_UNLOCK(fdp);
284 error = do_dup(fdp, uap->fd, i, td->td_retval, td);
288 td->td_retval[0] = *pop & 1;
289 FILEDESC_UNLOCK(fdp);
293 *pop = (*pop &~ 1) | (uap->arg & 1);
294 FILEDESC_UNLOCK(fdp);
299 FILEDESC_UNLOCK(fdp);
300 td->td_retval[0] = OFLAGS(fp->f_flag);
306 FILEDESC_UNLOCK(fdp);
307 fp->f_flag &= ~FCNTLFLAGS;
308 fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
309 tmp = fp->f_flag & FNONBLOCK;
310 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
315 tmp = fp->f_flag & FASYNC;
316 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td);
321 fp->f_flag &= ~FNONBLOCK;
323 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
329 FILEDESC_UNLOCK(fdp);
330 error = fo_ioctl(fp, FIOGETOWN, (caddr_t)td->td_retval, td);
336 FILEDESC_UNLOCK(fdp);
337 error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, td);
343 /* Fall into F_SETLK */
346 if (fp->f_type != DTYPE_VNODE) {
347 FILEDESC_UNLOCK(fdp);
351 vp = (struct vnode *)fp->f_data;
353 * copyin/lockop may block
356 FILEDESC_UNLOCK(fdp);
357 vp = (struct vnode *)fp->f_data;
359 /* Copy in the lock structure */
360 error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
366 if (fl.l_whence == SEEK_CUR) {
367 if (fp->f_offset < 0 ||
369 fp->f_offset > OFF_MAX - fl.l_start)) {
374 fl.l_start += fp->f_offset;
379 if ((fp->f_flag & FREAD) == 0) {
384 p->p_flag |= P_ADVLOCK;
385 leaderp = p->p_leader;
387 error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
391 if ((fp->f_flag & FWRITE) == 0) {
396 p->p_flag |= P_ADVLOCK;
397 leaderp = p->p_leader;
399 error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
404 leaderp = p->p_leader;
406 error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_UNLCK,
417 if (fp->f_type != DTYPE_VNODE) {
418 FILEDESC_UNLOCK(fdp);
422 vp = (struct vnode *)fp->f_data;
424 * copyin/lockop may block
427 FILEDESC_UNLOCK(fdp);
428 vp = (struct vnode *)fp->f_data;
430 /* Copy in the lock structure */
431 error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
437 if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
438 fl.l_type != F_UNLCK) {
443 if (fl.l_whence == SEEK_CUR) {
444 if ((fl.l_start > 0 &&
445 fp->f_offset > OFF_MAX - fl.l_start) ||
447 fp->f_offset < OFF_MIN - fl.l_start)) {
452 fl.l_start += fp->f_offset;
454 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
458 error = copyout((caddr_t)&fl,
459 (caddr_t)(intptr_t)uap->arg, sizeof(fl));
463 FILEDESC_UNLOCK(fdp);
473 * Common code for dup, dup2, and fcntl(F_DUPFD).
474 * filedesc must be locked, but will be unlocked as a side effect.
477 do_dup(fdp, old, new, retval, td)
478 register struct filedesc *fdp;
479 register int old, new;
486 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
489 * Save info on the descriptor being overwritten. We have
490 * to do the unmap now, but we cannot close it without
491 * introducing an ownership race for the slot.
493 delfp = fdp->fd_ofiles[new];
495 if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
496 (void) munmapfd(td, new);
500 * Duplicate the source descriptor, update lastfile
502 fp = fdp->fd_ofiles[old];
503 fdp->fd_ofiles[new] = fp;
504 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
506 if (new > fdp->fd_lastfile)
507 fdp->fd_lastfile = new;
510 FILEDESC_UNLOCK(fdp);
513 * If we dup'd over a valid file, we now own the reference to it
514 * and must dispose of it using closef() semantics (as if a
515 * close() were performed on it).
519 (void) closef(delfp, td);
526 * If sigio is on the list associated with a process or process group,
527 * disable signalling from the device, remove sigio from the list and
539 *(sigio->sio_myref) = NULL;
541 if (sigio->sio_pgid < 0) {
542 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
544 } else /* if ((*sigiop)->sio_pgid > 0) */ {
545 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
548 crfree(sigio->sio_ucred);
549 FREE(sigio, M_SIGIO);
552 /* Free a list of sigio structures. */
554 funsetownlst(sigiolst)
555 struct sigiolst *sigiolst;
559 while ((sigio = SLIST_FIRST(sigiolst)) != NULL)
564 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
566 * After permission checking, add a sigio structure to the sigio list for
567 * the process or process group.
570 fsetown(pgid, sigiop)
572 struct sigio **sigiop;
589 * Policy - Don't allow a process to FSETOWN a process
590 * in another session.
592 * Remove this test to allow maximum flexibility or
593 * restrict FSETOWN to the current process or process
594 * group for maximum safety.
596 if (proc->p_session != curthread->td_proc->p_session) {
603 } else /* if (pgid < 0) */ {
604 pgrp = pgfind(-pgid);
609 * Policy - Don't allow a process to FSETOWN a process
610 * in another session.
612 * Remove this test to allow maximum flexibility or
613 * restrict FSETOWN to the current process or process
614 * group for maximum safety.
616 if (pgrp->pg_session != curthread->td_proc->p_session)
622 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
624 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
625 sigio->sio_proc = proc;
627 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
628 sigio->sio_pgrp = pgrp;
630 sigio->sio_pgid = pgid;
631 sigio->sio_ucred = crhold(curthread->td_proc->p_ucred);
632 sigio->sio_myref = sigiop;
640 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
646 return (sigio != NULL ? sigio->sio_pgid : 0);
650 * Close a file descriptor.
652 #ifndef _SYS_SYSPROTO_H_
664 struct close_args *uap;
666 register struct filedesc *fdp;
667 register struct file *fp;
668 register int fd = uap->fd;
672 fdp = td->td_proc->p_fd;
674 if ((unsigned)fd >= fdp->fd_nfiles ||
675 (fp = fdp->fd_ofiles[fd]) == NULL) {
676 FILEDESC_UNLOCK(fdp);
681 if (fdp->fd_ofileflags[fd] & UF_MAPPED)
682 (void) munmapfd(td, fd);
684 fdp->fd_ofiles[fd] = NULL;
685 fdp->fd_ofileflags[fd] = 0;
688 * we now hold the fp reference that used to be owned by the descriptor
691 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
693 if (fd < fdp->fd_freefile)
694 fdp->fd_freefile = fd;
695 if (fd < fdp->fd_knlistsize) {
696 FILEDESC_UNLOCK(fdp);
697 knote_fdclose(td, fd);
699 FILEDESC_UNLOCK(fdp);
701 error = closef(fp, td);
707 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
709 * Return status information about a file descriptor.
711 #ifndef _SYS_SYSPROTO_H_
724 register struct ofstat_args *uap;
732 if ((error = fget(td, uap->fd, &fp)) != 0)
734 error = fo_stat(fp, &ub, td);
737 error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
744 #endif /* COMPAT_43 || COMPAT_SUNOS */
747 * Return status information about a file descriptor.
749 #ifndef _SYS_SYSPROTO_H_
762 struct fstat_args *uap;
769 if ((error = fget(td, uap->fd, &fp)) != 0)
771 error = fo_stat(fp, &ub, td);
773 error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
781 * Return status information about a file descriptor.
783 #ifndef _SYS_SYSPROTO_H_
796 register struct nfstat_args *uap;
804 if ((error = fget(td, uap->fd, &fp)) != 0)
806 error = fo_stat(fp, &ub, td);
809 error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
818 * Return pathconf information about a file descriptor.
820 #ifndef _SYS_SYSPROTO_H_
821 struct fpathconf_args {
833 register struct fpathconf_args *uap;
839 if ((error = fget(td, uap->fd, &fp)) != 0)
842 switch (fp->f_type) {
845 if (uap->name != _PC_PIPE_BUF) {
848 td->td_retval[0] = PIPE_BUF;
854 vp = (struct vnode *)fp->f_data;
856 error = VOP_PATHCONF(vp, uap->name, td->td_retval);
868 * Allocate a file descriptor for the process.
871 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
874 fdalloc(td, want, result)
879 struct proc *p = td->td_proc;
880 register struct filedesc *fdp = td->td_proc->p_fd;
882 int lim, last, nfiles;
883 struct file **newofile, **oldofile;
886 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
889 * Search for a free descriptor starting at the higher
890 * of want or fd_freefile. If that fails, consider
891 * expanding the ofile array.
893 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
895 last = min(fdp->fd_nfiles, lim);
896 if ((i = want) < fdp->fd_freefile)
897 i = fdp->fd_freefile;
898 for (; i < last; i++) {
899 if (fdp->fd_ofiles[i] == NULL) {
900 fdp->fd_ofileflags[i] = 0;
901 if (i > fdp->fd_lastfile)
902 fdp->fd_lastfile = i;
903 if (want <= fdp->fd_freefile)
904 fdp->fd_freefile = i;
911 * No space in current array. Expand?
913 if (fdp->fd_nfiles >= lim)
915 if (fdp->fd_nfiles < NDEXTENT)
918 nfiles = 2 * fdp->fd_nfiles;
919 FILEDESC_UNLOCK(fdp);
921 MALLOC(newofile, struct file **, nfiles * OFILESIZE,
922 M_FILEDESC, M_WAITOK);
927 * deal with file-table extend race that might have occured
928 * when malloc was blocked.
930 if (fdp->fd_nfiles >= nfiles) {
931 FILEDESC_UNLOCK(fdp);
933 FREE(newofile, M_FILEDESC);
938 newofileflags = (char *) &newofile[nfiles];
940 * Copy the existing ofile and ofileflags arrays
941 * and zero the new portion of each array.
943 bcopy(fdp->fd_ofiles, newofile,
944 (i = sizeof(struct file *) * fdp->fd_nfiles));
945 bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
946 bcopy(fdp->fd_ofileflags, newofileflags,
947 (i = sizeof(char) * fdp->fd_nfiles));
948 bzero(newofileflags + i, nfiles * sizeof(char) - i);
949 if (fdp->fd_nfiles > NDFILE)
950 oldofile = fdp->fd_ofiles;
953 fdp->fd_ofiles = newofile;
954 fdp->fd_ofileflags = newofileflags;
955 fdp->fd_nfiles = nfiles;
957 if (oldofile != NULL) {
958 FILEDESC_UNLOCK(fdp);
960 FREE(oldofile, M_FILEDESC);
969 * Check to see whether n user file descriptors
970 * are available to the process p.
977 struct proc *p = td->td_proc;
978 register struct filedesc *fdp = td->td_proc->p_fd;
979 register struct file **fpp;
980 register int i, lim, last;
982 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
984 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
985 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
988 last = min(fdp->fd_nfiles, lim);
989 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
990 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
991 if (*fpp == NULL && --n <= 0)
998 * Create a new open file structure and allocate
999 * a file decriptor for the process that refers to it.
1002 falloc(td, resultfp, resultfd)
1003 register struct thread *td;
1004 struct file **resultfp;
1007 struct proc *p = td->td_proc;
1008 register struct file *fp, *fq;
1011 sx_xlock(&filelist_lock);
1012 if (nfiles >= maxfiles) {
1013 sx_xunlock(&filelist_lock);
1018 sx_xunlock(&filelist_lock);
1020 * Allocate a new file descriptor.
1021 * If the process has file descriptor zero open, add to the list
1022 * of open files at that point, otherwise put it at the front of
1023 * the list of open files.
1025 MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK | M_ZERO);
1028 * wait until after malloc (which may have blocked) returns before
1029 * allocating the slot, else a race might have shrunk it if we had
1030 * allocated it before the malloc.
1032 FILEDESC_LOCK(p->p_fd);
1033 if ((error = fdalloc(td, 0, &i))) {
1034 FILEDESC_UNLOCK(p->p_fd);
1035 sx_xlock(&filelist_lock);
1037 sx_xunlock(&filelist_lock);
1041 fp->f_mtxp = mtx_pool_alloc();
1044 fp->f_cred = crhold(p->p_ucred);
1045 fp->f_ops = &badfileops;
1047 FILEDESC_UNLOCK(p->p_fd);
1048 sx_xlock(&filelist_lock);
1049 FILEDESC_LOCK(p->p_fd);
1050 if ((fq = p->p_fd->fd_ofiles[0])) {
1051 LIST_INSERT_AFTER(fq, fp, f_list);
1053 LIST_INSERT_HEAD(&filehead, fp, f_list);
1055 p->p_fd->fd_ofiles[i] = fp;
1056 FILEDESC_UNLOCK(p->p_fd);
1057 sx_xunlock(&filelist_lock);
1066 * Free a file descriptor.
1070 register struct file *fp;
1073 KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
1074 sx_xlock(&filelist_lock);
1075 LIST_REMOVE(fp, f_list);
1077 sx_xunlock(&filelist_lock);
1083 * Build a new filedesc structure.
1089 register struct filedesc0 *newfdp;
1090 register struct filedesc *fdp = td->td_proc->p_fd;
1092 MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1093 M_FILEDESC, M_WAITOK | M_ZERO);
1094 mtx_init(&newfdp->fd_fd.fd_mtx, "filedesc structure", MTX_DEF);
1095 FILEDESC_LOCK(&newfdp->fd_fd);
1096 newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1097 if (newfdp->fd_fd.fd_cdir)
1098 VREF(newfdp->fd_fd.fd_cdir);
1099 newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1100 if (newfdp->fd_fd.fd_rdir)
1101 VREF(newfdp->fd_fd.fd_rdir);
1102 newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1103 if (newfdp->fd_fd.fd_jdir)
1104 VREF(newfdp->fd_fd.fd_jdir);
1106 /* Create the file descriptor table. */
1107 newfdp->fd_fd.fd_refcnt = 1;
1108 newfdp->fd_fd.fd_cmask = cmask;
1109 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1110 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1111 newfdp->fd_fd.fd_nfiles = NDFILE;
1112 newfdp->fd_fd.fd_knlistsize = -1;
1113 FILEDESC_UNLOCK(&newfdp->fd_fd);
1115 return (&newfdp->fd_fd);
1119 * Share a filedesc structure.
1125 FILEDESC_LOCK(p->p_fd);
1126 p->p_fd->fd_refcnt++;
1127 FILEDESC_UNLOCK(p->p_fd);
1132 * Copy a filedesc structure.
1138 register struct filedesc *newfdp, *fdp = td->td_proc->p_fd;
1139 register struct file **fpp;
1142 /* Certain daemons might not have file descriptors. */
1146 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1148 FILEDESC_UNLOCK(fdp);
1149 MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1150 M_FILEDESC, M_WAITOK);
1152 bcopy(fdp, newfdp, sizeof(struct filedesc));
1153 FILEDESC_UNLOCK(fdp);
1154 bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1155 mtx_init(&newfdp->fd_mtx, "filedesc structure", MTX_DEF);
1156 if (newfdp->fd_cdir)
1157 VREF(newfdp->fd_cdir);
1158 if (newfdp->fd_rdir)
1159 VREF(newfdp->fd_rdir);
1160 if (newfdp->fd_jdir)
1161 VREF(newfdp->fd_jdir);
1162 newfdp->fd_refcnt = 1;
1165 * If the number of open files fits in the internal arrays
1166 * of the open file structure, use them, otherwise allocate
1167 * additional memory for the number of descriptors currently
1171 newfdp->fd_lastfile = fdp->fd_lastfile;
1172 newfdp->fd_nfiles = fdp->fd_nfiles;
1173 if (newfdp->fd_lastfile < NDFILE) {
1174 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1175 newfdp->fd_ofileflags =
1176 ((struct filedesc0 *) newfdp)->fd_dfileflags;
1180 * Compute the smallest multiple of NDEXTENT needed
1181 * for the file descriptors currently in use,
1182 * allowing the table to shrink.
1185 i = newfdp->fd_nfiles;
1186 while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1188 FILEDESC_UNLOCK(fdp);
1189 MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1190 M_FILEDESC, M_WAITOK);
1192 newfdp->fd_lastfile = fdp->fd_lastfile;
1193 newfdp->fd_nfiles = fdp->fd_nfiles;
1194 j = newfdp->fd_nfiles;
1195 while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1199 * The size of the original table has changed.
1200 * Go over once again.
1202 FILEDESC_UNLOCK(fdp);
1203 FREE(newfdp->fd_ofiles, M_FILEDESC);
1205 newfdp->fd_lastfile = fdp->fd_lastfile;
1206 newfdp->fd_nfiles = fdp->fd_nfiles;
1209 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1211 newfdp->fd_nfiles = i;
1212 bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1213 bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1216 * kq descriptors cannot be copied.
1218 if (newfdp->fd_knlistsize != -1) {
1219 fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1220 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1221 if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1223 if (i < newfdp->fd_freefile)
1224 newfdp->fd_freefile = i;
1226 if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1227 newfdp->fd_lastfile--;
1229 newfdp->fd_knlist = NULL;
1230 newfdp->fd_knlistsize = -1;
1231 newfdp->fd_knhash = NULL;
1232 newfdp->fd_knhashmask = 0;
1235 fpp = newfdp->fd_ofiles;
1236 for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1245 * Release a filedesc structure.
1251 register struct filedesc *fdp = td->td_proc->p_fd;
1255 /* Certain daemons might not have file descriptors. */
1260 if (--fdp->fd_refcnt > 0) {
1261 FILEDESC_UNLOCK(fdp);
1265 * we are the last reference to the structure, we can
1266 * safely assume it will not change out from under us.
1268 FILEDESC_UNLOCK(fdp);
1269 fpp = fdp->fd_ofiles;
1270 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1272 (void) closef(*fpp, td);
1274 if (fdp->fd_nfiles > NDFILE)
1275 FREE(fdp->fd_ofiles, M_FILEDESC);
1277 vrele(fdp->fd_cdir);
1279 vrele(fdp->fd_rdir);
1281 vrele(fdp->fd_jdir);
1283 FREE(fdp->fd_knlist, M_KQUEUE);
1285 FREE(fdp->fd_knhash, M_KQUEUE);
1286 mtx_destroy(&fdp->fd_mtx);
1287 FREE(fdp, M_FILEDESC);
1291 * For setugid programs, we don't want to people to use that setugidness
1292 * to generate error messages which write to a file which otherwise would
1293 * otherwise be off-limits to the process.
1295 * This is a gross hack to plug the hole. A better solution would involve
1296 * a special vop or other form of generalized access control mechanism. We
1297 * go ahead and just reject all procfs file systems accesses as dangerous.
1299 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1300 * sufficient. We also don't for check setugidness since we know we are.
1303 is_unsafe(struct file *fp)
1305 if (fp->f_type == DTYPE_VNODE &&
1306 ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1312 * Make this setguid thing safe, if at all possible.
1318 struct filedesc *fdp = td->td_proc->p_fd;
1321 /* Certain daemons might not have file descriptors. */
1326 * note: fdp->fd_ofiles may be reallocated out from under us while
1327 * we are blocked in a close. Be careful!
1330 for (i = 0; i <= fdp->fd_lastfile; i++) {
1333 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1337 if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1338 (void) munmapfd(td, i);
1340 if (i < fdp->fd_knlistsize) {
1341 FILEDESC_UNLOCK(fdp);
1342 knote_fdclose(td, i);
1346 * NULL-out descriptor prior to close to avoid
1347 * a race while close blocks.
1349 fp = fdp->fd_ofiles[i];
1350 fdp->fd_ofiles[i] = NULL;
1351 fdp->fd_ofileflags[i] = 0;
1352 if (i < fdp->fd_freefile)
1353 fdp->fd_freefile = i;
1354 FILEDESC_UNLOCK(fdp);
1355 (void) closef(fp, td);
1359 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1361 FILEDESC_UNLOCK(fdp);
1365 * Close any files on exec?
1371 struct filedesc *fdp = td->td_proc->p_fd;
1374 /* Certain daemons might not have file descriptors. */
1381 * We cannot cache fd_ofiles or fd_ofileflags since operations
1382 * may block and rip them out from under us.
1384 for (i = 0; i <= fdp->fd_lastfile; i++) {
1385 if (fdp->fd_ofiles[i] != NULL &&
1386 (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1390 if (fdp->fd_ofileflags[i] & UF_MAPPED)
1391 (void) munmapfd(td, i);
1393 if (i < fdp->fd_knlistsize) {
1394 FILEDESC_UNLOCK(fdp);
1395 knote_fdclose(td, i);
1399 * NULL-out descriptor prior to close to avoid
1400 * a race while close blocks.
1402 fp = fdp->fd_ofiles[i];
1403 fdp->fd_ofiles[i] = NULL;
1404 fdp->fd_ofileflags[i] = 0;
1405 if (i < fdp->fd_freefile)
1406 fdp->fd_freefile = i;
1407 FILEDESC_UNLOCK(fdp);
1408 (void) closef(fp, td);
1412 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1414 FILEDESC_UNLOCK(fdp);
1418 * Internal form of close.
1419 * Decrement reference count on file structure.
1420 * Note: td may be NULL when closing a file
1421 * that was being passed in a message.
1425 register struct file *fp;
1426 register struct thread *td;
1434 * POSIX record locking dictates that any close releases ALL
1435 * locks owned by this process. This is handled by setting
1436 * a flag in the unlock to free ONLY locks obeying POSIX
1437 * semantics, and not to free BSD-style file locks.
1438 * If the descriptor was in a message, POSIX-style locks
1439 * aren't passed with the descriptor.
1441 if (td && (td->td_proc->p_flag & P_ADVLOCK) &&
1442 fp->f_type == DTYPE_VNODE) {
1443 lf.l_whence = SEEK_SET;
1446 lf.l_type = F_UNLCK;
1447 vp = (struct vnode *)fp->f_data;
1448 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1449 F_UNLCK, &lf, F_POSIX);
1451 return (fdrop(fp, td));
1455 * Drop reference on struct file passed in, may call closef if the
1456 * reference hits zero.
1465 return (fdrop_locked(fp, td));
1469 * Extract the file pointer associated with the specified descriptor for
1470 * the current user process.
1472 * If the descriptor doesn't exist, EBADF is returned.
1474 * If the descriptor exists but doesn't match 'flags' then
1475 * return EBADF for read attempts and EINVAL for write attempts.
1477 * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1478 * It should be droped with fdrop().
1479 * If it is not set, then the refcount will not be bumped however the
1480 * thread's filedesc struct will be returned locked (for fgetsock).
1482 * If an error occured the non-zero error is returned and *fpp is set to NULL.
1483 * Otherwise *fpp is set and zero is returned.
1487 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1489 struct filedesc *fdp;
1493 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1496 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1497 FILEDESC_UNLOCK(fdp);
1502 * Note: FREAD failures returns EBADF to maintain backwards
1503 * compatibility with what routines returned before.
1505 * Only one flag, or 0, may be specified.
1507 if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1508 FILEDESC_UNLOCK(fdp);
1511 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1512 FILEDESC_UNLOCK(fdp);
1517 FILEDESC_UNLOCK(fdp);
1524 fget(struct thread *td, int fd, struct file **fpp)
1526 return(_fget(td, fd, fpp, 0, 1));
1530 fget_read(struct thread *td, int fd, struct file **fpp)
1532 return(_fget(td, fd, fpp, FREAD, 1));
1536 fget_write(struct thread *td, int fd, struct file **fpp)
1538 return(_fget(td, fd, fpp, FWRITE, 1));
1542 * Like fget() but loads the underlying vnode, or returns an error if
1543 * the descriptor does not represent a vnode. Note that pipes use vnodes
1544 * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1545 * error). The returned vnode will be vref()d.
1550 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1556 if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1558 if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
1561 *vpp = (struct vnode *)fp->f_data;
1564 FILEDESC_UNLOCK(td->td_proc->p_fd);
1569 fgetvp(struct thread *td, int fd, struct vnode **vpp)
1571 return(_fgetvp(td, fd, vpp, 0));
1575 fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1577 return(_fgetvp(td, fd, vpp, FREAD));
1581 fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1583 return(_fgetvp(td, fd, vpp, FWRITE));
1587 * Like fget() but loads the underlying socket, or returns an error if
1588 * the descriptor does not represent a socket.
1590 * We bump the ref count on the returned socket. XXX Also obtain the SX lock in
1594 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1602 if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1604 if (fp->f_type != DTYPE_SOCKET) {
1607 *spp = (struct socket *)fp->f_data;
1609 *fflagp = fp->f_flag;
1612 FILEDESC_UNLOCK(td->td_proc->p_fd);
1617 * Drop the reference count on the the socket and XXX release the SX lock in
1618 * the future. The last reference closes the socket.
1621 fputsock(struct socket *so)
1627 * Drop reference on struct file passed in, may call closef if the
1628 * reference hits zero.
1629 * Expects struct file locked, and will unlock it.
1632 fdrop_locked(fp, td)
1640 FILE_LOCK_ASSERT(fp, MA_OWNED);
1642 if (--fp->f_count > 0) {
1646 if (fp->f_count < 0)
1647 panic("fdrop: count < 0");
1648 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1649 lf.l_whence = SEEK_SET;
1652 lf.l_type = F_UNLCK;
1653 vp = (struct vnode *)fp->f_data;
1655 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1658 if (fp->f_ops != &badfileops)
1659 error = fo_close(fp, td);
1667 * Apply an advisory lock on a file descriptor.
1669 * Just attempt to get a record lock of the requested type on
1670 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1672 #ifndef _SYS_SYSPROTO_H_
1685 register struct flock_args *uap;
1692 if ((error = fget(td, uap->fd, &fp)) != 0)
1694 if (fp->f_type != DTYPE_VNODE) {
1696 return (EOPNOTSUPP);
1700 vp = (struct vnode *)fp->f_data;
1701 lf.l_whence = SEEK_SET;
1704 if (uap->how & LOCK_UN) {
1705 lf.l_type = F_UNLCK;
1707 fp->f_flag &= ~FHASLOCK;
1709 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1712 if (uap->how & LOCK_EX)
1713 lf.l_type = F_WRLCK;
1714 else if (uap->how & LOCK_SH)
1715 lf.l_type = F_RDLCK;
1721 fp->f_flag |= FHASLOCK;
1723 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1724 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
1732 * File Descriptor pseudo-device driver (/dev/fd/).
1734 * Opening minor device N dup()s the file (if any) connected to file
1735 * descriptor N belonging to the calling process. Note that this driver
1736 * consists of only the ``open()'' routine, because all subsequent
1737 * references to this file will be direct to the other driver.
1741 fdopen(dev, mode, type, td)
1748 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1749 * the file descriptor being sought for duplication. The error
1750 * return ensures that the vnode for this device will be released
1751 * by vn_open. Open will detect this special error and take the
1752 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1753 * will simply report the error.
1755 td->td_dupfd = dev2unit(dev);
1760 * Duplicate the specified descriptor to a free descriptor.
1763 dupfdopen(td, fdp, indx, dfd, mode, error)
1765 struct filedesc *fdp;
1770 register struct file *wfp;
1774 * If the to-be-dup'd fd number is greater than the allowed number
1775 * of file descriptors, or the fd to be dup'd has already been
1776 * closed, then reject.
1779 if ((u_int)dfd >= fdp->fd_nfiles ||
1780 (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1781 FILEDESC_UNLOCK(fdp);
1786 * There are two cases of interest here.
1788 * For ENODEV simply dup (dfd) to file descriptor
1789 * (indx) and return.
1791 * For ENXIO steal away the file structure from (dfd) and
1792 * store it in (indx). (dfd) is effectively closed by
1795 * Any other error code is just returned.
1800 * Check that the mode the file is being opened for is a
1801 * subset of the mode of the existing descriptor.
1804 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1806 FILEDESC_UNLOCK(fdp);
1809 fp = fdp->fd_ofiles[indx];
1811 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1812 (void) munmapfd(td, indx);
1814 fdp->fd_ofiles[indx] = wfp;
1815 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1818 if (indx > fdp->fd_lastfile)
1819 fdp->fd_lastfile = indx;
1822 FILEDESC_UNLOCK(fdp);
1824 * we now own the reference to fp that the ofiles[] array
1825 * used to own. Release it.
1828 fdrop_locked(fp, td);
1833 * Steal away the file pointer from dfd, and stuff it into indx.
1835 fp = fdp->fd_ofiles[indx];
1837 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1838 (void) munmapfd(td, indx);
1840 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1841 fdp->fd_ofiles[dfd] = NULL;
1842 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1843 fdp->fd_ofileflags[dfd] = 0;
1846 * Complete the clean up of the filedesc structure by
1847 * recomputing the various hints.
1849 if (indx > fdp->fd_lastfile) {
1850 fdp->fd_lastfile = indx;
1852 while (fdp->fd_lastfile > 0 &&
1853 fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
1856 if (dfd < fdp->fd_freefile)
1857 fdp->fd_freefile = dfd;
1861 FILEDESC_UNLOCK(fdp);
1864 * we now own the reference to fp that the ofiles[] array
1865 * used to own. Release it.
1868 fdrop_locked(fp, td);
1872 FILEDESC_UNLOCK(fdp);
1879 * Get file structures.
1882 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
1887 sx_slock(&filelist_lock);
1890 * overestimate by 10 files
1892 error = SYSCTL_OUT(req, 0, sizeof(filehead) +
1893 (nfiles + 10) * sizeof(struct file));
1894 sx_sunlock(&filelist_lock);
1898 error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1900 sx_sunlock(&filelist_lock);
1905 * followed by an array of file structures
1907 LIST_FOREACH(fp, &filehead, f_list) {
1908 error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1910 sx_sunlock(&filelist_lock);
1914 sx_sunlock(&filelist_lock);
1918 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1919 0, 0, sysctl_kern_file, "S,file", "Entire file table");
1921 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
1922 &maxfilesperproc, 0, "Maximum files allowed open per process");
1924 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
1925 &maxfiles, 0, "Maximum number of files");
1927 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
1928 &nfiles, 0, "System-wide number of open files");
1931 fildesc_drvinit(void *unused)
1935 dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
1936 make_dev_alias(dev, "stdin");
1937 dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
1938 make_dev_alias(dev, "stdout");
1939 dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
1940 make_dev_alias(dev, "stderr");
1941 if (!devfs_present) {
1944 for (fd = 3; fd < NUMFDESC; fd++)
1945 make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
1950 struct fileops badfileops = {
1961 badfo_readwrite(fp, uio, cred, flags, td)
1973 badfo_ioctl(fp, com, data, td)
1984 badfo_poll(fp, events, cred, td)
1995 badfo_kqfilter(fp, kn)
2004 badfo_stat(fp, sb, td)
2022 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2023 fildesc_drvinit,NULL)
2025 static void filelistinit __P((void *));
2026 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2033 sx_init(&filelist_lock, "filelist lock");