2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
37 #include <sys/capability.h>
40 * Functions that perform the vfs operations required by the routines in
41 * nfsd_serv.c. It is hoped that this change will make the server more
45 #include <fs/nfs/nfsport.h>
47 #include <sys/sysctl.h>
48 #include <nlm/nlm_prot.h>
51 FEATURE(nfsd, "NFSv4 server");
53 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
54 extern int nfsrv_useacl;
55 extern int newnfs_numnfsd;
56 extern struct mount nfsv4root_mnt;
57 extern struct nfsrv_stablefirst nfsrv_stablefirst;
58 extern void (*nfsd_call_servertimer)(void);
59 extern SVCPOOL *nfsrvd_pool;
60 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
62 struct mtx nfs_cache_mutex;
63 struct mtx nfs_v4root_mutex;
64 struct nfsrvfh nfs_rootfh, nfs_pubfh;
65 int nfs_pubfhset = 0, nfs_rootfhset = 0;
66 struct proc *nfsd_master_proc = NULL;
67 static pid_t nfsd_master_pid = (pid_t)-1;
68 static char nfsd_master_comm[MAXCOMLEN + 1];
69 static struct timeval nfsd_master_start;
70 static uint32_t nfsv4_sysid = 0;
72 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *,
75 int nfsrv_enable_crossmntpt = 1;
76 static int nfs_commit_blks;
77 static int nfs_commit_miss;
78 extern int nfsrv_issuedelegs;
79 extern int nfsrv_dolocallocks;
81 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW, 0, "New NFS server");
82 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW,
83 &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points");
84 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks,
86 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
88 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW,
89 &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
90 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW,
91 &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
93 #define NUM_HEURISTIC 1017
96 #define NHUSE_MAX 2048
98 static struct nfsheur {
99 struct vnode *nh_vp; /* vp to match (unreferenced pointer) */
100 off_t nh_nextr; /* next offset for sequential detection */
101 int nh_use; /* use count for selection */
102 int nh_seqcount; /* heuristic */
103 } nfsheur[NUM_HEURISTIC];
107 * Get attributes into nfsvattr structure.
110 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
111 struct thread *p, int vpislocked)
113 int error, lockedit = 0;
115 if (vpislocked == 0) {
117 * When vpislocked == 0, the vnode is either exclusively
118 * locked by this thread or not locked by this thread.
119 * As such, shared lock it, if not exclusively locked.
121 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
123 NFSVOPLOCK(vp, LK_SHARED | LK_RETRY);
126 error = VOP_GETATTR(vp, &nvap->na_vattr, cred);
135 * Get a file handle for a vnode.
138 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p)
142 NFSBZERO((caddr_t)fhp, sizeof(fhandle_t));
143 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
144 error = VOP_VPTOFH(vp, &fhp->fh_fid);
151 * Perform access checking for vnodes obtained from file handles that would
152 * refer to files already opened by a Unix client. You cannot just use
153 * vn_writechk() and VOP_ACCESSX() for two reasons.
154 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
156 * 2 - The owner is to be given access irrespective of mode bits for some
157 * operations, so that processes that chmod after opening a file don't
161 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred,
162 struct nfsexstuff *exp, struct thread *p, int override, int vpislocked,
163 u_int32_t *supportedtypep)
166 int error = 0, getret = 0;
168 if (vpislocked == 0) {
169 if (NFSVOPLOCK(vp, LK_SHARED) != 0) {
174 if (accmode & VWRITE) {
175 /* Just vn_writechk() changed to check rdonly */
177 * Disallow write attempts on read-only file systems;
178 * unless the file is a socket or a block or character
179 * device resident on the file system.
181 if (NFSVNO_EXRDONLY(exp) ||
182 (vp->v_mount->mnt_flag & MNT_RDONLY)) {
183 switch (vp->v_type) {
193 * If there's shared text associated with
194 * the inode, try to free it up once. If
195 * we fail, we can't allow writing.
197 if ((vp->v_vflag & VV_TEXT) != 0 && error == 0)
207 * Should the override still be applied when ACLs are enabled?
209 error = VOP_ACCESSX(vp, accmode, cred, p);
210 if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) {
212 * Try again with VEXPLICIT_DENY, to see if the test for
213 * deletion is supported.
215 error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p);
217 if (vp->v_type == VDIR) {
218 accmode &= ~(VDELETE | VDELETE_CHILD);
220 error = VOP_ACCESSX(vp, accmode, cred, p);
221 } else if (supportedtypep != NULL) {
222 *supportedtypep &= ~NFSACCESS_DELETE;
228 * Allow certain operations for the owner (reads and writes
229 * on files that are already open).
231 if (override != NFSACCCHK_NOOVERRIDE &&
232 (error == EPERM || error == EACCES)) {
233 if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT))
235 else if (override & NFSACCCHK_ALLOWOWNER) {
236 getret = VOP_GETATTR(vp, &vattr, cred);
237 if (getret == 0 && cred->cr_uid == vattr.va_uid)
250 * Set attribute(s) vnop.
253 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
254 struct thread *p, struct nfsexstuff *exp)
258 error = VOP_SETATTR(vp, &nvap->na_vattr, cred);
264 * Set up nameidata for a lookup() call and do it
265 * For the cases where we are crossing mount points
266 * (looking up the public fh path or the v4 root path when
267 * not using a pseudo-root fs), set/release the Giant lock,
271 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp,
272 struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p,
273 struct vnode **retdirp)
275 struct componentname *cnp = &ndp->ni_cnd;
279 int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen;
280 int error = 0, crossmnt;
284 cnp->cn_nameptr = cnp->cn_pnbuf;
286 * Extract and set starting directory.
288 if (dp->v_type != VDIR) {
293 nfsvno_relpathbuf(ndp);
301 if (NFSVNO_EXRDONLY(exp))
302 cnp->cn_flags |= RDONLY;
303 ndp->ni_segflg = UIO_SYSSPACE;
306 if (nd->nd_flag & ND_PUBLOOKUP) {
308 if (cnp->cn_pnbuf[0] == '/') {
311 * Check for degenerate pathnames here, since lookup()
314 for (i = 1; i < ndp->ni_pathlen; i++)
315 if (cnp->cn_pnbuf[i] != '/')
317 if (i == ndp->ni_pathlen) {
318 error = NFSERR_ACCES;
324 } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) ||
325 (nd->nd_flag & ND_NFSV4) == 0) {
327 * Only cross mount points for NFSv4 when doing a
328 * mount while traversing the file system above
329 * the mount point, unless nfsrv_enable_crossmntpt is set.
331 cnp->cn_flags |= NOCROSSMOUNT;
336 * Initialize for scan, set ni_startdir and bump ref on dp again
337 * becuase lookup() will dereference ni_startdir.
341 ndp->ni_startdir = dp;
342 ndp->ni_rootdir = rootvnode;
345 cnp->cn_flags |= LOCKLEAF;
347 cnp->cn_nameptr = cnp->cn_pnbuf;
349 * Call lookup() to do the real work. If an error occurs,
350 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
351 * we do not have to dereference anything before returning.
352 * In either case ni_startdir will be dereferenced and NULLed
360 * Check for encountering a symbolic link. Trivial
361 * termination occurs if no symlink encountered.
363 if ((cnp->cn_flags & ISSYMLINK) == 0) {
364 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
365 nfsvno_relpathbuf(ndp);
366 if (ndp->ni_vp && !lockleaf)
367 NFSVOPUNLOCK(ndp->ni_vp, 0);
374 if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
375 NFSVOPUNLOCK(ndp->ni_dvp, 0);
376 if (!(nd->nd_flag & ND_PUBLOOKUP)) {
381 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
385 if (ndp->ni_pathlen > 1)
386 cp = uma_zalloc(namei_zone, M_WAITOK);
390 aiov.iov_len = MAXPATHLEN;
391 auio.uio_iov = &aiov;
394 auio.uio_rw = UIO_READ;
395 auio.uio_segflg = UIO_SYSSPACE;
397 auio.uio_resid = MAXPATHLEN;
398 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
401 if (ndp->ni_pathlen > 1)
402 uma_zfree(namei_zone, cp);
408 linklen = MAXPATHLEN - auio.uio_resid;
413 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
414 error = ENAMETOOLONG;
419 * Adjust or replace path
421 if (ndp->ni_pathlen > 1) {
422 NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
423 uma_zfree(namei_zone, cnp->cn_pnbuf);
426 cnp->cn_pnbuf[linklen] = '\0';
427 ndp->ni_pathlen += linklen;
430 * Cleanup refs for next loop and check if root directory
431 * should replace current directory. Normally ni_dvp
432 * becomes the new base directory and is cleaned up when
433 * we loop. Explicitly null pointers after invalidation
434 * to clarify operation.
439 if (cnp->cn_pnbuf[0] == '/') {
441 ndp->ni_dvp = ndp->ni_rootdir;
444 ndp->ni_startdir = ndp->ni_dvp;
448 cnp->cn_flags &= ~LOCKLEAF;
452 uma_zfree(namei_zone, cnp->cn_pnbuf);
455 ndp->ni_startdir = NULL;
456 cnp->cn_flags &= ~HASBUF;
457 } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
462 NFSEXITCODE2(error, nd);
467 * Set up a pathname buffer and return a pointer to it and, optionally
468 * set a hash pointer.
471 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp)
473 struct componentname *cnp = &ndp->ni_cnd;
475 cnp->cn_flags |= (NOMACCHECK | HASBUF);
476 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
479 *bufpp = cnp->cn_pnbuf;
483 * Release the above path buffer, if not released by nfsvno_namei().
486 nfsvno_relpathbuf(struct nameidata *ndp)
489 if ((ndp->ni_cnd.cn_flags & HASBUF) == 0)
491 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
492 ndp->ni_cnd.cn_flags &= ~HASBUF;
496 * Readlink vnode op into an mbuf list.
499 nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p,
500 struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
502 struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
503 struct iovec *ivp = iv;
504 struct uio io, *uiop = &io;
505 struct mbuf *mp, *mp2 = NULL, *mp3 = NULL;
506 int i, len, tlen, error = 0;
510 while (len < NFS_MAXPATHLEN) {
513 mp->m_len = NFSMSIZ(mp);
520 if ((len + mp->m_len) > NFS_MAXPATHLEN) {
521 mp->m_len = NFS_MAXPATHLEN - len;
522 len = NFS_MAXPATHLEN;
526 ivp->iov_base = mtod(mp, caddr_t);
527 ivp->iov_len = mp->m_len;
532 uiop->uio_iovcnt = i;
533 uiop->uio_offset = 0;
534 uiop->uio_resid = len;
535 uiop->uio_rw = UIO_READ;
536 uiop->uio_segflg = UIO_SYSSPACE;
538 error = VOP_READLINK(vp, uiop, cred);
544 if (uiop->uio_resid > 0) {
545 len -= uiop->uio_resid;
546 tlen = NFSM_RNDUP(len);
547 nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len);
559 * Read vnode op call into mbuf list.
562 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
563 struct thread *p, struct mbuf **mpp, struct mbuf **mpendp)
569 int error = 0, len, left, siz, tlen, ioflag = 0, hi, try = 32;
570 struct mbuf *m2 = NULL, *m3;
571 struct uio io, *uiop = &io;
575 * Calculate seqcount for heuristic
578 * Locate best candidate
581 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
585 if (nfsheur[hi].nh_vp == vp) {
589 if (nfsheur[hi].nh_use > 0)
590 --nfsheur[hi].nh_use;
591 hi = (hi + 1) % NUM_HEURISTIC;
592 if (nfsheur[hi].nh_use < nh->nh_use)
596 if (nh->nh_vp != vp) {
599 nh->nh_use = NHUSE_INIT;
607 * Calculate heuristic
610 if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
611 if (++nh->nh_seqcount > IO_SEQMAX)
612 nh->nh_seqcount = IO_SEQMAX;
613 } else if (nh->nh_seqcount > 1) {
618 nh->nh_use += NHUSE_INC;
619 if (nh->nh_use > NHUSE_MAX)
620 nh->nh_use = NHUSE_MAX;
621 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
623 len = left = NFSM_RNDUP(cnt);
626 * Generate the mbuf list with the uio_iov ref. to it.
633 siz = min(M_TRAILINGSPACE(m), left);
642 MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
644 uiop->uio_iov = iv2 = iv;
650 panic("nfsvno_read iov");
651 siz = min(M_TRAILINGSPACE(m), left);
653 iv->iov_base = mtod(m, caddr_t) + m->m_len;
662 uiop->uio_iovcnt = i;
663 uiop->uio_offset = off;
664 uiop->uio_resid = len;
665 uiop->uio_rw = UIO_READ;
666 uiop->uio_segflg = UIO_SYSSPACE;
667 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
668 FREE((caddr_t)iv2, M_TEMP);
674 tlen = len - uiop->uio_resid;
675 cnt = cnt < tlen ? cnt : tlen;
676 tlen = NFSM_RNDUP(cnt);
680 } else if (len != tlen || tlen != cnt)
681 nfsrv_adj(m3, len - tlen, tlen - cnt);
691 * Write vnode op from an mbuf list.
694 nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int stable,
695 struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p)
701 struct uio io, *uiop = &io;
703 MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
705 uiop->uio_iov = iv = ivp;
706 uiop->uio_iovcnt = cnt;
707 i = mtod(mp, caddr_t) + mp->m_len - cp;
711 panic("nfsvno_write");
722 cp = mtod(mp, caddr_t);
726 if (stable == NFSWRITE_UNSTABLE)
727 ioflags = IO_NODELOCKED;
729 ioflags = (IO_SYNC | IO_NODELOCKED);
730 uiop->uio_resid = retlen;
731 uiop->uio_rw = UIO_WRITE;
732 uiop->uio_segflg = UIO_SYSSPACE;
734 uiop->uio_offset = off;
735 error = VOP_WRITE(vp, uiop, ioflags, cred);
736 FREE((caddr_t)iv, M_TEMP);
743 * Common code for creating a regular file (plus special files for V2).
746 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp,
747 struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp,
748 int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp)
753 error = nd->nd_repstat;
754 if (!error && ndp->ni_vp == NULL) {
755 if (nvap->na_type == VREG || nvap->na_type == VSOCK) {
756 vrele(ndp->ni_startdir);
757 error = VOP_CREATE(ndp->ni_dvp,
758 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
760 nfsvno_relpathbuf(ndp);
762 if (*exclusive_flagp) {
763 *exclusive_flagp = 0;
764 NFSVNO_ATTRINIT(nvap);
765 nvap->na_atime.tv_sec = cverf[0];
766 nvap->na_atime.tv_nsec = cverf[1];
767 error = VOP_SETATTR(ndp->ni_vp,
768 &nvap->na_vattr, nd->nd_cred);
772 * NFS V2 Only. nfsrvd_mknod() does this for V3.
773 * (This implies, just get out on an error.)
775 } else if (nvap->na_type == VCHR || nvap->na_type == VBLK ||
776 nvap->na_type == VFIFO) {
777 if (nvap->na_type == VCHR && rdev == 0xffffffff)
778 nvap->na_type = VFIFO;
779 if (nvap->na_type != VFIFO &&
780 (error = priv_check_cred(nd->nd_cred,
781 PRIV_VFS_MKNOD_DEV, 0))) {
782 vrele(ndp->ni_startdir);
783 nfsvno_relpathbuf(ndp);
787 nvap->na_rdev = rdev;
788 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
789 &ndp->ni_cnd, &nvap->na_vattr);
791 nfsvno_relpathbuf(ndp);
792 vrele(ndp->ni_startdir);
796 vrele(ndp->ni_startdir);
797 nfsvno_relpathbuf(ndp);
805 * Handle cases where error is already set and/or
807 * 1 - clean up the lookup
808 * 2 - iff !error and na_size set, truncate it
810 vrele(ndp->ni_startdir);
811 nfsvno_relpathbuf(ndp);
813 if (ndp->ni_dvp == *vpp)
817 if (!error && nvap->na_size != VNOVAL) {
818 error = nfsvno_accchk(*vpp, VWRITE,
819 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
820 NFSACCCHK_VPISLOCKED, NULL);
822 tempsize = nvap->na_size;
823 NFSVNO_ATTRINIT(nvap);
824 nvap->na_size = tempsize;
825 error = VOP_SETATTR(*vpp,
826 &nvap->na_vattr, nd->nd_cred);
839 * Do a mknod vnode op.
842 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred,
848 vtyp = nvap->na_type;
850 * Iff doesn't exist, create it.
853 vrele(ndp->ni_startdir);
854 nfsvno_relpathbuf(ndp);
860 if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
861 vrele(ndp->ni_startdir);
862 nfsvno_relpathbuf(ndp);
864 error = NFSERR_BADTYPE;
868 vrele(ndp->ni_startdir);
869 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
870 &ndp->ni_cnd, &nvap->na_vattr);
872 nfsvno_relpathbuf(ndp);
874 if (nvap->na_type != VFIFO &&
875 (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) {
876 vrele(ndp->ni_startdir);
877 nfsvno_relpathbuf(ndp);
881 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
882 &ndp->ni_cnd, &nvap->na_vattr);
884 nfsvno_relpathbuf(ndp);
885 vrele(ndp->ni_startdir);
887 * Since VOP_MKNOD returns the ni_vp, I can't
888 * see any reason to do the lookup.
901 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid,
902 struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
906 if (ndp->ni_vp != NULL) {
907 if (ndp->ni_dvp == ndp->ni_vp)
912 nfsvno_relpathbuf(ndp);
916 error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
919 nfsvno_relpathbuf(ndp);
930 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp,
931 int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p,
932 struct nfsexstuff *exp)
937 vrele(ndp->ni_startdir);
938 nfsvno_relpathbuf(ndp);
939 if (ndp->ni_dvp == ndp->ni_vp)
948 error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
949 &nvap->na_vattr, pathcp);
951 vrele(ndp->ni_startdir);
952 nfsvno_relpathbuf(ndp);
954 * Although FreeBSD still had the lookup code in
955 * it for 7/current, there doesn't seem to be any
956 * point, since VOP_SYMLINK() returns the ni_vp.
957 * Just vput it for v2.
959 if (!not_v2 && !error)
968 * Parse symbolic link arguments.
969 * This function has an ugly side effect. It will MALLOC() an area for
970 * the symlink and set iov_base to point to it, only if it succeeds.
971 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must
975 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap,
976 struct thread *p, char **pathcpp, int *lenp)
981 struct nfsv2_sattr *sp;
985 if ((nd->nd_flag & ND_NFSV3) &&
986 (error = nfsrv_sattr(nd, nvap, NULL, NULL, p)))
988 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
989 len = fxdr_unsigned(int, *tl);
990 if (len > NFS_MAXPATHLEN || len <= 0) {
994 MALLOC(pathcp, caddr_t, len + 1, M_TEMP, M_WAITOK);
995 error = nfsrv_mtostr(nd, pathcp, len);
998 if (nd->nd_flag & ND_NFSV2) {
999 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1000 nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
1004 NFSEXITCODE2(0, nd);
1008 free(pathcp, M_TEMP);
1009 NFSEXITCODE2(error, nd);
1014 * Remove a non-directory object.
1017 nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
1018 struct thread *p, struct nfsexstuff *exp)
1024 if (vp->v_type == VDIR)
1025 error = NFSERR_ISDIR;
1027 error = nfsrv_checkremove(vp, 1, p);
1029 error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
1030 if (ndp->ni_dvp == vp)
1040 * Remove a directory.
1043 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred,
1044 struct thread *p, struct nfsexstuff *exp)
1050 if (vp->v_type != VDIR) {
1055 * No rmdir "." please.
1057 if (ndp->ni_dvp == vp) {
1062 * The root of a mounted filesystem cannot be deleted.
1064 if (vp->v_vflag & VV_ROOT)
1068 error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd);
1069 if (ndp->ni_dvp == vp)
1082 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
1083 u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
1085 struct vnode *fvp, *tvp, *tdvp;
1088 fvp = fromndp->ni_vp;
1090 vrele(fromndp->ni_dvp);
1095 tdvp = tondp->ni_dvp;
1098 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
1099 error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST;
1101 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
1102 error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST;
1105 if (tvp->v_type == VDIR && tvp->v_mountedhere) {
1106 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1111 * A rename to '.' or '..' results in a prematurely
1112 * unlocked vnode on FreeBSD5, so I'm just going to fail that
1115 if ((tondp->ni_cnd.cn_namelen == 1 &&
1116 tondp->ni_cnd.cn_nameptr[0] == '.') ||
1117 (tondp->ni_cnd.cn_namelen == 2 &&
1118 tondp->ni_cnd.cn_nameptr[0] == '.' &&
1119 tondp->ni_cnd.cn_nameptr[1] == '.')) {
1124 if (fvp->v_type == VDIR && fvp->v_mountedhere) {
1125 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1128 if (fvp->v_mount != tdvp->v_mount) {
1129 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1133 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
1138 * If source and destination are the same, there is nothing to
1139 * do. Set error to -1 to indicate this.
1144 if (ndflag & ND_NFSV4) {
1145 if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) {
1146 error = nfsrv_checkremove(fvp, 0, p);
1147 NFSVOPUNLOCK(fvp, 0);
1151 error = nfsrv_checkremove(tvp, 1, p);
1154 * For NFSv2 and NFSv3, try to get rid of the delegation, so
1155 * that the NFSv4 client won't be confused by the rename.
1156 * Since nfsd_recalldelegation() can only be called on an
1157 * unlocked vnode at this point and fvp is the file that will
1158 * still exist after the rename, just do fvp.
1160 nfsd_recalldelegation(fvp, p);
1164 error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
1165 &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
1174 vrele(fromndp->ni_dvp);
1179 vrele(tondp->ni_startdir);
1180 nfsvno_relpathbuf(tondp);
1182 vrele(fromndp->ni_startdir);
1183 nfsvno_relpathbuf(fromndp);
1192 nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred,
1193 struct thread *p, struct nfsexstuff *exp)
1203 if (vp->v_mount != xp->v_mount)
1207 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
1208 if ((vp->v_iflag & VI_DOOMED) == 0)
1209 error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd);
1212 if (ndp->ni_dvp == vp)
1216 NFSVOPUNLOCK(vp, 0);
1218 if (ndp->ni_dvp == ndp->ni_vp)
1225 nfsvno_relpathbuf(ndp);
1231 * Do the fsync() appropriate for the commit.
1234 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred,
1239 if (cnt > MAX_COMMIT_COUNT) {
1241 * Give up and do the whole thing
1244 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
1245 VM_OBJECT_LOCK(vp->v_object);
1246 vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
1247 VM_OBJECT_UNLOCK(vp->v_object);
1249 error = VOP_FSYNC(vp, MNT_WAIT, td);
1252 * Locate and synchronously write any buffers that fall
1253 * into the requested range. Note: we are assuming that
1254 * f_iosize is a power of 2.
1256 int iosize = vp->v_mount->mnt_stat.f_iosize;
1257 int iomask = iosize - 1;
1262 * Align to iosize boundry, super-align to page boundry.
1265 cnt += off & iomask;
1266 off &= ~(u_quad_t)iomask;
1268 if (off & PAGE_MASK) {
1269 cnt += off & PAGE_MASK;
1270 off &= ~(u_quad_t)PAGE_MASK;
1272 lblkno = off / iosize;
1275 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
1276 VM_OBJECT_LOCK(vp->v_object);
1277 vm_object_page_clean(vp->v_object, off, off + cnt,
1279 VM_OBJECT_UNLOCK(vp->v_object);
1288 * If we have a buffer and it is marked B_DELWRI we
1289 * have to lock and write it. Otherwise the prior
1290 * write is assumed to have already been committed.
1292 * gbincore() can return invalid buffers now so we
1293 * have to check that bit as well (though B_DELWRI
1294 * should not be set if B_INVAL is set there could be
1295 * a race here since we haven't locked the buffer).
1297 if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
1298 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
1299 LK_INTERLOCK, BO_MTX(bo)) == ENOLCK) {
1301 continue; /* retry */
1303 if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
1306 bp->b_flags &= ~B_ASYNC;
1329 nfsvno_statfs(struct vnode *vp, struct statfs *sf)
1333 error = VFS_STATFS(vp->v_mount, sf);
1336 * Since NFS handles these values as unsigned on the
1337 * wire, there is no way to represent negative values,
1338 * so set them to 0. Without this, they will appear
1339 * to be very large positive values for clients like
1342 if (sf->f_bavail < 0)
1344 if (sf->f_ffree < 0)
1352 * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but
1353 * must handle nfsrv_opencheck() calls after any other access checks.
1356 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp,
1357 nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp,
1358 int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create,
1359 NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p,
1360 struct nfsexstuff *exp, struct vnode **vpp)
1362 struct vnode *vp = NULL;
1364 struct nfsexstuff nes;
1366 if (ndp->ni_vp == NULL)
1367 nd->nd_repstat = nfsrv_opencheck(clientid,
1368 stateidp, stp, NULL, nd, p, nd->nd_repstat);
1369 if (!nd->nd_repstat) {
1370 if (ndp->ni_vp == NULL) {
1371 vrele(ndp->ni_startdir);
1372 nd->nd_repstat = VOP_CREATE(ndp->ni_dvp,
1373 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
1375 nfsvno_relpathbuf(ndp);
1376 if (!nd->nd_repstat) {
1377 if (*exclusive_flagp) {
1378 *exclusive_flagp = 0;
1379 NFSVNO_ATTRINIT(nvap);
1380 nvap->na_atime.tv_sec = cverf[0];
1381 nvap->na_atime.tv_nsec = cverf[1];
1382 nd->nd_repstat = VOP_SETATTR(ndp->ni_vp,
1383 &nvap->na_vattr, cred);
1385 nfsrv_fixattr(nd, ndp->ni_vp, nvap,
1386 aclp, p, attrbitp, exp);
1391 if (ndp->ni_startdir)
1392 vrele(ndp->ni_startdir);
1393 nfsvno_relpathbuf(ndp);
1395 if (create == NFSV4OPEN_CREATE) {
1396 if (ndp->ni_dvp == vp)
1401 if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) {
1402 if (ndp->ni_cnd.cn_flags & RDONLY)
1403 NFSVNO_SETEXRDONLY(&nes);
1405 NFSVNO_EXINIT(&nes);
1406 nd->nd_repstat = nfsvno_accchk(vp,
1407 VWRITE, cred, &nes, p,
1408 NFSACCCHK_NOOVERRIDE,
1409 NFSACCCHK_VPISLOCKED, NULL);
1410 nd->nd_repstat = nfsrv_opencheck(clientid,
1411 stateidp, stp, vp, nd, p, nd->nd_repstat);
1412 if (!nd->nd_repstat) {
1413 tempsize = nvap->na_size;
1414 NFSVNO_ATTRINIT(nvap);
1415 nvap->na_size = tempsize;
1416 nd->nd_repstat = VOP_SETATTR(vp,
1417 &nvap->na_vattr, cred);
1419 } else if (vp->v_type == VREG) {
1420 nd->nd_repstat = nfsrv_opencheck(clientid,
1421 stateidp, stp, vp, nd, p, nd->nd_repstat);
1425 if (ndp->ni_cnd.cn_flags & HASBUF)
1426 nfsvno_relpathbuf(ndp);
1427 if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) {
1428 vrele(ndp->ni_startdir);
1429 if (ndp->ni_dvp == ndp->ni_vp)
1439 NFSEXITCODE2(0, nd);
1443 * Updates the file rev and sets the mtime and ctime
1444 * to the current clock time, returning the va_filerev and va_Xtime
1448 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap,
1449 struct ucred *cred, struct thread *p)
1454 getnanotime(&va.va_mtime);
1455 (void) VOP_SETATTR(vp, &va, cred);
1456 (void) nfsvno_getattr(vp, nvap, cred, p, 1);
1460 * Glue routine to nfsv4_fillattr().
1463 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp,
1464 struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
1465 struct ucred *cred, struct thread *p, int isdgram, int reterr,
1466 int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno)
1470 error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror,
1471 attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root,
1473 NFSEXITCODE2(0, nd);
1477 /* Since the Readdir vnode ops vary, put the entire functions in here. */
1479 * nfs readdir service
1480 * - mallocs what it thinks is enough to read
1481 * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR
1482 * - calls VOP_READDIR()
1483 * - loops around building the reply
1484 * if the output generated exceeds count break out of loop
1485 * The NFSM_CLGET macro is used here so that the reply will be packed
1486 * tightly in mbuf clusters.
1487 * - it trims out records with d_fileno == 0
1488 * this doesn't matter for Unix clients, but they might confuse clients
1490 * - it trims out records with d_type == DT_WHT
1491 * these cannot be seen through NFS (unless we extend the protocol)
1492 * The alternate call nfsrvd_readdirplus() does lookups as well.
1493 * PS: The NFS protocol spec. does not clarify what the "count" byte
1494 * argument is a count of.. just name strings and file id's or the
1495 * entire reply rpc or ...
1496 * I tried just file name and id sizes and it confused the Sun client,
1497 * so I am using the full rpc size now. The "paranoia.." comment refers
1498 * to including the status longwords that are not a part of the dir.
1499 * "entry" structures, but are in the rpc.
1502 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram,
1503 struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
1508 char *cpos, *cend, *rbuf;
1510 int nlen, error = 0, getret = 1;
1511 int siz, cnt, fullsiz, eofflag, ncookies;
1512 u_int64_t off, toff, verf;
1513 u_long *cookies = NULL, *cookiep;
1518 if (nd->nd_repstat) {
1519 nfsrv_postopattr(nd, getret, &at);
1522 if (nd->nd_flag & ND_NFSV2) {
1523 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1524 off = fxdr_unsigned(u_quad_t, *tl++);
1526 NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1527 off = fxdr_hyper(tl);
1529 verf = fxdr_hyper(tl);
1533 cnt = fxdr_unsigned(int, *tl);
1534 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
1535 cnt = NFS_SRVMAXDATA(nd);
1536 siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
1538 if (nd->nd_flag & ND_NFSV3) {
1539 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred,
1543 * va_filerev is not sufficient as a cookie verifier,
1544 * since it is not supposed to change when entries are
1545 * removed/added unless that offset cookies returned to
1546 * the client are no longer valid.
1548 if (!nd->nd_repstat && toff && verf != at.na_filerev)
1549 nd->nd_repstat = NFSERR_BAD_COOKIE;
1552 if (nd->nd_repstat == 0 && cnt == 0) {
1553 if (nd->nd_flag & ND_NFSV2)
1554 /* NFSv2 does not have NFSERR_TOOSMALL */
1555 nd->nd_repstat = EPERM;
1557 nd->nd_repstat = NFSERR_TOOSMALL;
1559 if (!nd->nd_repstat)
1560 nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
1561 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
1562 NFSACCCHK_VPISLOCKED, NULL);
1563 if (nd->nd_repstat) {
1565 if (nd->nd_flag & ND_NFSV3)
1566 nfsrv_postopattr(nd, getret, &at);
1569 not_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs");
1570 MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
1574 free((caddr_t)cookies, M_TEMP);
1582 io.uio_offset = (off_t)off;
1584 io.uio_segflg = UIO_SYSSPACE;
1585 io.uio_rw = UIO_READ;
1587 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
1589 off = (u_int64_t)io.uio_offset;
1591 siz -= io.uio_resid;
1593 if (!cookies && !nd->nd_repstat)
1594 nd->nd_repstat = NFSERR_PERM;
1595 if (nd->nd_flag & ND_NFSV3) {
1596 getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
1597 if (!nd->nd_repstat)
1598 nd->nd_repstat = getret;
1602 * Handles the failed cases. nd->nd_repstat == 0 past here.
1604 if (nd->nd_repstat) {
1606 free((caddr_t)rbuf, M_TEMP);
1608 free((caddr_t)cookies, M_TEMP);
1609 if (nd->nd_flag & ND_NFSV3)
1610 nfsrv_postopattr(nd, getret, &at);
1614 * If nothing read, return eof
1619 if (nd->nd_flag & ND_NFSV2) {
1620 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1622 nfsrv_postopattr(nd, getret, &at);
1623 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1624 txdr_hyper(at.na_filerev, tl);
1627 *tl++ = newnfs_false;
1629 FREE((caddr_t)rbuf, M_TEMP);
1630 FREE((caddr_t)cookies, M_TEMP);
1635 * Check for degenerate cases of nothing useful read.
1636 * If so go try again
1640 dp = (struct dirent *)cpos;
1644 * For some reason FreeBSD's ufs_readdir() chooses to back the
1645 * directory offset up to a block boundary, so it is necessary to
1646 * skip over the records that precede the requested offset. This
1647 * requires the assumption that file offset cookies monotonically
1649 * Since the offset cookies don't monotonically increase for ZFS,
1650 * this is not done when ZFS is the file system.
1652 while (cpos < cend && ncookies > 0 &&
1653 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
1654 (not_zfs != 0 && ((u_quad_t)(*cookiep)) <= toff))) {
1655 cpos += dp->d_reclen;
1656 dp = (struct dirent *)cpos;
1660 if (cpos >= cend || ncookies == 0) {
1668 * dirlen is the size of the reply, including all XDR and must
1669 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate
1670 * if the XDR should be included in "count", but to be safe, we do.
1671 * (Include the two booleans at the end of the reply in dirlen now.)
1673 if (nd->nd_flag & ND_NFSV3) {
1674 nfsrv_postopattr(nd, getret, &at);
1675 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1676 txdr_hyper(at.na_filerev, tl);
1677 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
1679 dirlen = 2 * NFSX_UNSIGNED;
1682 /* Loop through the records and build reply */
1683 while (cpos < cend && ncookies > 0) {
1684 nlen = dp->d_namlen;
1685 if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
1686 nlen <= NFS_MAXNAMLEN) {
1687 if (nd->nd_flag & ND_NFSV3)
1688 dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
1690 dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
1697 * Build the directory record xdr from
1700 if (nd->nd_flag & ND_NFSV3) {
1701 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1702 *tl++ = newnfs_true;
1705 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1706 *tl++ = newnfs_true;
1708 *tl = txdr_unsigned(dp->d_fileno);
1709 (void) nfsm_strtom(nd, dp->d_name, nlen);
1710 if (nd->nd_flag & ND_NFSV3) {
1711 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1714 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1715 *tl = txdr_unsigned(*cookiep);
1717 cpos += dp->d_reclen;
1718 dp = (struct dirent *)cpos;
1724 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1725 *tl++ = newnfs_false;
1730 FREE((caddr_t)rbuf, M_TEMP);
1731 FREE((caddr_t)cookies, M_TEMP);
1734 NFSEXITCODE2(0, nd);
1738 NFSEXITCODE2(error, nd);
1743 * Readdirplus for V3 and Readdir for V4.
1746 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
1747 struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
1752 char *cpos, *cend, *rbuf;
1755 struct nfsvattr nva, at, *nvap = &nva;
1756 struct mbuf *mb0, *mb1;
1757 struct nfsreferral *refp;
1758 int nlen, r, error = 0, getret = 1, usevget = 1;
1759 int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
1760 caddr_t bpos0, bpos1;
1761 u_int64_t off, toff, verf;
1762 u_long *cookies = NULL, *cookiep;
1763 nfsattrbit_t attrbits, rderrbits, savbits;
1766 struct componentname cn;
1767 int at_root, needs_unbusy, not_zfs, supports_nfsv4acls;
1768 struct mount *mp, *new_mp;
1769 uint64_t mounted_on_fileno;
1771 if (nd->nd_repstat) {
1772 nfsrv_postopattr(nd, getret, &at);
1775 NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
1776 off = fxdr_hyper(tl);
1779 verf = fxdr_hyper(tl);
1781 siz = fxdr_unsigned(int, *tl++);
1782 cnt = fxdr_unsigned(int, *tl);
1785 * Use the server's maximum data transfer size as the upper bound
1788 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
1789 cnt = NFS_SRVMAXDATA(nd);
1792 * siz is a "hint" of how much directory information (name, fileid,
1793 * cookie) should be in the reply. At least one client "hints" 0,
1794 * so I set it to cnt for that case. I also round it up to the
1795 * next multiple of DIRBLKSIZ.
1799 siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
1801 if (nd->nd_flag & ND_NFSV4) {
1802 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1805 NFSSET_ATTRBIT(&savbits, &attrbits);
1806 NFSCLRNOTFILLABLE_ATTRBIT(&attrbits);
1807 NFSZERO_ATTRBIT(&rderrbits);
1808 NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR);
1810 NFSZERO_ATTRBIT(&attrbits);
1813 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
1814 if (!nd->nd_repstat) {
1815 if (off && verf != at.na_filerev) {
1817 * va_filerev is not sufficient as a cookie verifier,
1818 * since it is not supposed to change when entries are
1819 * removed/added unless that offset cookies returned to
1820 * the client are no longer valid.
1823 if (nd->nd_flag & ND_NFSV4) {
1824 nd->nd_repstat = NFSERR_NOTSAME;
1826 nd->nd_repstat = NFSERR_BAD_COOKIE;
1829 } else if ((nd->nd_flag & ND_NFSV4) && off == 0 && verf != 0) {
1830 nd->nd_repstat = NFSERR_BAD_COOKIE;
1833 if (!nd->nd_repstat && vp->v_type != VDIR)
1834 nd->nd_repstat = NFSERR_NOTDIR;
1835 if (!nd->nd_repstat && cnt == 0)
1836 nd->nd_repstat = NFSERR_TOOSMALL;
1837 if (!nd->nd_repstat)
1838 nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
1839 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
1840 NFSACCCHK_VPISLOCKED, NULL);
1841 if (nd->nd_repstat) {
1843 if (nd->nd_flag & ND_NFSV3)
1844 nfsrv_postopattr(nd, getret, &at);
1847 not_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs");
1849 MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
1853 free((caddr_t)cookies, M_TEMP);
1861 io.uio_offset = (off_t)off;
1863 io.uio_segflg = UIO_SYSSPACE;
1864 io.uio_rw = UIO_READ;
1866 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
1868 off = (u_int64_t)io.uio_offset;
1870 siz -= io.uio_resid;
1872 getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
1874 if (!cookies && !nd->nd_repstat)
1875 nd->nd_repstat = NFSERR_PERM;
1876 if (!nd->nd_repstat)
1877 nd->nd_repstat = getret;
1878 if (nd->nd_repstat) {
1881 free((caddr_t)cookies, M_TEMP);
1882 free((caddr_t)rbuf, M_TEMP);
1883 if (nd->nd_flag & ND_NFSV3)
1884 nfsrv_postopattr(nd, getret, &at);
1888 * If nothing read, return eof
1893 if (nd->nd_flag & ND_NFSV3)
1894 nfsrv_postopattr(nd, getret, &at);
1895 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1896 txdr_hyper(at.na_filerev, tl);
1898 *tl++ = newnfs_false;
1900 free((caddr_t)cookies, M_TEMP);
1901 free((caddr_t)rbuf, M_TEMP);
1906 * Check for degenerate cases of nothing useful read.
1907 * If so go try again
1911 dp = (struct dirent *)cpos;
1915 * For some reason FreeBSD's ufs_readdir() chooses to back the
1916 * directory offset up to a block boundary, so it is necessary to
1917 * skip over the records that precede the requested offset. This
1918 * requires the assumption that file offset cookies monotonically
1920 * Since the offset cookies don't monotonically increase for ZFS,
1921 * this is not done when ZFS is the file system.
1923 while (cpos < cend && ncookies > 0 &&
1924 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
1925 (not_zfs != 0 && ((u_quad_t)(*cookiep)) <= toff) ||
1926 ((nd->nd_flag & ND_NFSV4) &&
1927 ((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1928 (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) {
1929 cpos += dp->d_reclen;
1930 dp = (struct dirent *)cpos;
1934 if (cpos >= cend || ncookies == 0) {
1941 * Busy the file system so that the mount point won't go away
1942 * and, as such, VFS_VGET() can be used safely.
1946 NFSVOPUNLOCK(vp, 0);
1947 nd->nd_repstat = vfs_busy(mp, 0);
1949 if (nd->nd_repstat != 0) {
1951 free(cookies, M_TEMP);
1953 if (nd->nd_flag & ND_NFSV3)
1954 nfsrv_postopattr(nd, getret, &at);
1959 * Save this position, in case there is an error before one entry
1963 bpos0 = nd->nd_bpos;
1966 * Fill in the first part of the reply.
1967 * dirlen is the reply length in bytes and cannot exceed cnt.
1968 * (Include the two booleans at the end of the reply in dirlen now,
1969 * so we recognize when we have exceeded cnt.)
1971 if (nd->nd_flag & ND_NFSV3) {
1972 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
1973 nfsrv_postopattr(nd, getret, &at);
1975 dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED;
1977 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
1978 txdr_hyper(at.na_filerev, tl);
1981 * Save this position, in case there is an empty reply needed.
1984 bpos1 = nd->nd_bpos;
1986 /* Loop through the records and build reply */
1988 while (cpos < cend && ncookies > 0 && dirlen < cnt) {
1989 nlen = dp->d_namlen;
1990 if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
1991 nlen <= NFS_MAXNAMLEN &&
1992 ((nd->nd_flag & ND_NFSV3) || nlen > 2 ||
1993 (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.'))
1994 || (nlen == 1 && dp->d_name[0] != '.'))) {
1996 * Save the current position in the reply, in case
1997 * this entry exceeds cnt.
2000 bpos1 = nd->nd_bpos;
2003 * For readdir_and_lookup get the vnode using
2012 mounted_on_fileno = (uint64_t)dp->d_fileno;
2013 if ((nd->nd_flag & ND_NFSV3) ||
2014 NFSNONZERO_ATTRBIT(&savbits)) {
2015 if (nd->nd_flag & ND_NFSV4)
2016 refp = nfsv4root_getreferral(NULL,
2020 r = VFS_VGET(mp, dp->d_fileno,
2024 if (r == EOPNOTSUPP) {
2027 cn.cn_nameiop = LOOKUP;
2035 cn.cn_nameptr = dp->d_name;
2036 cn.cn_namelen = nlen;
2037 cn.cn_flags = ISLASTCN |
2038 NOFOLLOW | LOCKLEAF |
2041 dp->d_name[0] == '.' &&
2042 dp->d_name[1] == '.')
2045 if (NFSVOPLOCK(vp, LK_SHARED)
2047 nd->nd_repstat = EPERM;
2050 if ((vp->v_vflag & VV_ROOT) != 0
2051 && (cn.cn_flags & ISDOTDOT)
2057 r = VOP_LOOKUP(vp, &nvp,
2066 * For NFSv4, check to see if nvp is
2067 * a mount point and get the mount
2068 * point vnode, as required.
2071 nfsrv_enable_crossmntpt != 0 &&
2072 (nd->nd_flag & ND_NFSV4) != 0 &&
2073 nvp->v_type == VDIR &&
2074 nvp->v_mountedhere != NULL) {
2075 new_mp = nvp->v_mountedhere;
2076 r = vfs_busy(new_mp, 0);
2080 r = VFS_ROOT(new_mp,
2090 ((nd->nd_flag & ND_NFSV3) ||
2091 NFSNONZERO_ATTRBIT(&attrbits))) {
2092 r = nfsvno_getfh(nvp, &nfh, p);
2094 r = nfsvno_getattr(nvp, nvap,
2101 if (!NFSISSET_ATTRBIT(&attrbits,
2102 NFSATTRBIT_RDATTRERROR)) {
2105 if (needs_unbusy != 0)
2114 * Build the directory record xdr
2116 if (nd->nd_flag & ND_NFSV3) {
2117 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2118 *tl++ = newnfs_true;
2120 *tl = txdr_unsigned(dp->d_fileno);
2121 dirlen += nfsm_strtom(nd, dp->d_name, nlen);
2122 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2124 *tl = txdr_unsigned(*cookiep);
2125 nfsrv_postopattr(nd, 0, nvap);
2126 dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1);
2127 dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR);
2131 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2132 *tl++ = newnfs_true;
2134 *tl = txdr_unsigned(*cookiep);
2135 dirlen += nfsm_strtom(nd, dp->d_name, nlen);
2137 supports_nfsv4acls =
2138 nfs_supportsnfsv4acls(nvp);
2139 NFSVOPUNLOCK(nvp, 0);
2141 supports_nfsv4acls = 0;
2143 dirlen += nfsrv_putreferralattr(nd,
2146 if (nd->nd_repstat) {
2149 if (needs_unbusy != 0)
2154 dirlen += nfsvno_fillattr(nd, new_mp,
2155 nvp, nvap, &nfh, r, &rderrbits,
2156 nd->nd_cred, p, isdgram, 0,
2157 supports_nfsv4acls, at_root,
2160 dirlen += nfsvno_fillattr(nd, new_mp,
2161 nvp, nvap, &nfh, r, &attrbits,
2162 nd->nd_cred, p, isdgram, 0,
2163 supports_nfsv4acls, at_root,
2168 dirlen += (3 * NFSX_UNSIGNED);
2170 if (needs_unbusy != 0)
2175 cpos += dp->d_reclen;
2176 dp = (struct dirent *)cpos;
2184 * If dirlen > cnt, we must strip off the last entry. If that
2185 * results in an empty reply, report NFSERR_TOOSMALL.
2187 if (dirlen > cnt || nd->nd_repstat) {
2188 if (!nd->nd_repstat && entrycnt == 0)
2189 nd->nd_repstat = NFSERR_TOOSMALL;
2191 newnfs_trimtrailing(nd, mb0, bpos0);
2193 newnfs_trimtrailing(nd, mb1, bpos1);
2195 } else if (cpos < cend)
2197 if (!nd->nd_repstat) {
2198 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2199 *tl++ = newnfs_false;
2205 FREE((caddr_t)cookies, M_TEMP);
2206 FREE((caddr_t)rbuf, M_TEMP);
2209 NFSEXITCODE2(0, nd);
2213 NFSEXITCODE2(error, nd);
2218 * Get the settable attributes out of the mbuf list.
2219 * (Return 0 or EBADRPC)
2222 nfsrv_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap,
2223 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
2226 struct nfsv2_sattr *sp;
2227 struct timeval curtime;
2228 int error = 0, toclient = 0;
2230 switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) {
2232 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
2234 * Some old clients didn't fill in the high order 16bits.
2235 * --> check the low order 2 bytes for 0xffff
2237 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
2238 nvap->na_mode = nfstov_mode(sp->sa_mode);
2239 if (sp->sa_uid != newnfs_xdrneg1)
2240 nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid);
2241 if (sp->sa_gid != newnfs_xdrneg1)
2242 nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid);
2243 if (sp->sa_size != newnfs_xdrneg1)
2244 nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size);
2245 if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) {
2247 fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime);
2249 nvap->na_atime.tv_sec =
2250 fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
2251 nvap->na_atime.tv_nsec = 0;
2254 if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1)
2255 fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime);
2258 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2259 if (*tl == newnfs_true) {
2260 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2261 nvap->na_mode = nfstov_mode(*tl);
2263 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2264 if (*tl == newnfs_true) {
2265 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2266 nvap->na_uid = fxdr_unsigned(uid_t, *tl);
2268 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2269 if (*tl == newnfs_true) {
2270 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2271 nvap->na_gid = fxdr_unsigned(gid_t, *tl);
2273 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2274 if (*tl == newnfs_true) {
2275 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2276 nvap->na_size = fxdr_hyper(tl);
2278 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2279 switch (fxdr_unsigned(int, *tl)) {
2280 case NFSV3SATTRTIME_TOCLIENT:
2281 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2282 fxdr_nfsv3time(tl, &nvap->na_atime);
2285 case NFSV3SATTRTIME_TOSERVER:
2286 NFSGETTIME(&curtime);
2287 nvap->na_atime.tv_sec = curtime.tv_sec;
2288 nvap->na_atime.tv_nsec = curtime.tv_usec * 1000;
2289 nvap->na_vaflags |= VA_UTIMES_NULL;
2292 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2293 switch (fxdr_unsigned(int, *tl)) {
2294 case NFSV3SATTRTIME_TOCLIENT:
2295 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2296 fxdr_nfsv3time(tl, &nvap->na_mtime);
2297 nvap->na_vaflags &= ~VA_UTIMES_NULL;
2299 case NFSV3SATTRTIME_TOSERVER:
2300 NFSGETTIME(&curtime);
2301 nvap->na_mtime.tv_sec = curtime.tv_sec;
2302 nvap->na_mtime.tv_nsec = curtime.tv_usec * 1000;
2304 nvap->na_vaflags |= VA_UTIMES_NULL;
2309 error = nfsv4_sattr(nd, nvap, attrbitp, aclp, p);
2312 NFSEXITCODE2(error, nd);
2317 * Handle the setable attributes for V4.
2318 * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise.
2321 nfsv4_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap,
2322 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
2327 int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0;
2329 u_char *cp, namestr[NFSV4_SMALLSTR + 1];
2332 struct timeval curtime;
2334 error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup);
2337 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2338 attrsize = fxdr_unsigned(int, *tl);
2341 * Loop around getting the setable attributes. If an unsupported
2342 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
2345 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2346 bitpos = NFSATTRBIT_MAX;
2350 for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
2351 if (attrsum > attrsize) {
2352 error = NFSERR_BADXDR;
2355 if (NFSISSET_ATTRBIT(attrbitp, bitpos))
2357 case NFSATTRBIT_SIZE:
2358 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
2359 nvap->na_size = fxdr_hyper(tl);
2360 attrsum += NFSX_HYPER;
2362 case NFSATTRBIT_ACL:
2363 error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize,
2367 if (aceerr && !nd->nd_repstat)
2368 nd->nd_repstat = aceerr;
2371 case NFSATTRBIT_ARCHIVE:
2372 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2373 if (!nd->nd_repstat)
2374 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2375 attrsum += NFSX_UNSIGNED;
2377 case NFSATTRBIT_HIDDEN:
2378 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2379 if (!nd->nd_repstat)
2380 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2381 attrsum += NFSX_UNSIGNED;
2383 case NFSATTRBIT_MIMETYPE:
2384 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2385 i = fxdr_unsigned(int, *tl);
2386 error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
2389 if (!nd->nd_repstat)
2390 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2391 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i));
2393 case NFSATTRBIT_MODE:
2394 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2395 nvap->na_mode = nfstov_mode(*tl);
2396 attrsum += NFSX_UNSIGNED;
2398 case NFSATTRBIT_OWNER:
2399 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2400 j = fxdr_unsigned(int, *tl);
2402 error = NFSERR_BADXDR;
2405 if (j > NFSV4_SMALLSTR)
2406 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
2409 error = nfsrv_mtostr(nd, cp, j);
2411 if (j > NFSV4_SMALLSTR)
2412 free(cp, M_NFSSTRING);
2415 if (!nd->nd_repstat) {
2416 nd->nd_repstat = nfsv4_strtouid(cp,j,&uid,p);
2417 if (!nd->nd_repstat)
2420 if (j > NFSV4_SMALLSTR)
2421 free(cp, M_NFSSTRING);
2422 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
2424 case NFSATTRBIT_OWNERGROUP:
2425 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2426 j = fxdr_unsigned(int, *tl);
2428 error = NFSERR_BADXDR;
2431 if (j > NFSV4_SMALLSTR)
2432 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
2435 error = nfsrv_mtostr(nd, cp, j);
2437 if (j > NFSV4_SMALLSTR)
2438 free(cp, M_NFSSTRING);
2441 if (!nd->nd_repstat) {
2442 nd->nd_repstat = nfsv4_strtogid(cp,j,&gid,p);
2443 if (!nd->nd_repstat)
2446 if (j > NFSV4_SMALLSTR)
2447 free(cp, M_NFSSTRING);
2448 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
2450 case NFSATTRBIT_SYSTEM:
2451 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2452 if (!nd->nd_repstat)
2453 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2454 attrsum += NFSX_UNSIGNED;
2456 case NFSATTRBIT_TIMEACCESSSET:
2457 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2458 attrsum += NFSX_UNSIGNED;
2459 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
2460 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2461 fxdr_nfsv4time(tl, &nvap->na_atime);
2463 attrsum += NFSX_V4TIME;
2465 NFSGETTIME(&curtime);
2466 nvap->na_atime.tv_sec = curtime.tv_sec;
2467 nvap->na_atime.tv_nsec = curtime.tv_usec * 1000;
2468 nvap->na_vaflags |= VA_UTIMES_NULL;
2471 case NFSATTRBIT_TIMEBACKUP:
2472 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2473 if (!nd->nd_repstat)
2474 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2475 attrsum += NFSX_V4TIME;
2477 case NFSATTRBIT_TIMECREATE:
2478 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2479 if (!nd->nd_repstat)
2480 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2481 attrsum += NFSX_V4TIME;
2483 case NFSATTRBIT_TIMEMODIFYSET:
2484 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2485 attrsum += NFSX_UNSIGNED;
2486 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
2487 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2488 fxdr_nfsv4time(tl, &nvap->na_mtime);
2489 nvap->na_vaflags &= ~VA_UTIMES_NULL;
2490 attrsum += NFSX_V4TIME;
2492 NFSGETTIME(&curtime);
2493 nvap->na_mtime.tv_sec = curtime.tv_sec;
2494 nvap->na_mtime.tv_nsec = curtime.tv_usec * 1000;
2496 nvap->na_vaflags |= VA_UTIMES_NULL;
2500 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2502 * set bitpos so we drop out of the loop.
2504 bitpos = NFSATTRBIT_MAX;
2510 * some clients pad the attrlist, so we need to skip over the
2513 if (attrsum > attrsize) {
2514 error = NFSERR_BADXDR;
2516 attrsize = NFSM_RNDUP(attrsize);
2517 if (attrsum < attrsize)
2518 error = nfsm_advance(nd, attrsize - attrsum, -1);
2521 NFSEXITCODE2(error, nd);
2526 * Check/setup export credentials.
2529 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
2530 struct ucred *credanon)
2535 * Check/setup credentials.
2537 if (nd->nd_flag & ND_GSS)
2538 exp->nes_exflag &= ~MNT_EXPORTANON;
2541 * Check to see if the operation is allowed for this security flavor.
2542 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to
2543 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS.
2544 * Also, allow Secinfo, so that it can acquire the correct flavor(s).
2546 if (nfsvno_testexp(nd, exp) &&
2547 nd->nd_procnum != NFSV4OP_SECINFO &&
2548 nd->nd_procnum != NFSPROC_FSINFO) {
2549 if (nd->nd_flag & ND_NFSV4)
2550 error = NFSERR_WRONGSEC;
2552 error = (NFSERR_AUTHERR | AUTH_TOOWEAK);
2557 * Check to see if the file system is exported V4 only.
2559 if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) {
2560 error = NFSERR_PROGNOTV4;
2565 * Now, map the user credentials.
2566 * (Note that ND_AUTHNONE will only be set for an NFSv3
2567 * Fsinfo RPC. If set for anything else, this code might need
2570 if (NFSVNO_EXPORTED(exp) &&
2571 ((!(nd->nd_flag & ND_GSS) && nd->nd_cred->cr_uid == 0) ||
2572 NFSVNO_EXPORTANON(exp) ||
2573 (nd->nd_flag & ND_AUTHNONE))) {
2574 nd->nd_cred->cr_uid = credanon->cr_uid;
2575 nd->nd_cred->cr_gid = credanon->cr_gid;
2576 crsetgroups(nd->nd_cred, credanon->cr_ngroups,
2577 credanon->cr_groups);
2581 NFSEXITCODE2(error, nd);
2589 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp,
2590 struct ucred **credp)
2592 int i, error, *secflavors;
2594 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
2595 &exp->nes_numsecflavor, &secflavors);
2597 if (nfs_rootfhset) {
2598 exp->nes_exflag = 0;
2599 exp->nes_numsecflavor = 0;
2603 /* Copy the security flavors. */
2604 for (i = 0; i < exp->nes_numsecflavor; i++)
2605 exp->nes_secflavors[i] = secflavors[i];
2612 * Get a vnode for a file handle and export stuff.
2615 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam,
2616 int lktype, struct vnode **vpp, struct nfsexstuff *exp,
2617 struct ucred **credp)
2619 int i, error, *secflavors;
2622 exp->nes_numsecflavor = 0;
2623 if (VFS_NEEDSGIANT(mp))
2626 error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, vpp);
2628 /* Make sure the server replies ESTALE to the client. */
2630 if (nam && !error) {
2631 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
2632 &exp->nes_numsecflavor, &secflavors);
2634 if (nfs_rootfhset) {
2635 exp->nes_exflag = 0;
2636 exp->nes_numsecflavor = 0;
2642 /* Copy the security flavors. */
2643 for (i = 0; i < exp->nes_numsecflavor; i++)
2644 exp->nes_secflavors[i] = secflavors[i];
2647 if (error == 0 && lktype == LK_SHARED)
2649 * It would be much better to pass lktype to VFS_FHTOVP(),
2650 * but this will have to do until VFS_FHTOVP() has a lock
2651 * type argument like VFS_VGET().
2653 NFSVOPLOCK(*vpp, LK_DOWNGRADE | LK_RETRY);
2660 * nfsd_fhtovp() - convert a fh to a vnode ptr
2661 * - look up fsid in mount list (if not found ret error)
2662 * - get vp and export rights by calling nfsvno_fhtovp()
2663 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
2665 * - if mpp != NULL, return the mount point so that it can
2666 * be used for vn_finished_write() by the caller
2669 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype,
2670 struct vnode **vpp, struct nfsexstuff *exp,
2671 struct mount **mpp, int startwrite, struct thread *p)
2674 struct ucred *credanon;
2677 fhp = (fhandle_t *)nfp->nfsrvfh_data;
2679 * Check for the special case of the nfsv4root_fh.
2681 mp = vfs_busyfs(&fhp->fh_fsid);
2686 nd->nd_repstat = ESTALE;
2691 vn_start_write(NULL, mpp, V_WAIT);
2693 nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp,
2698 * For NFSv4 without a pseudo root fs, unexported file handles
2699 * can be returned, so that Lookup works everywhere.
2701 if (!nd->nd_repstat && exp->nes_exflag == 0 &&
2702 !(nd->nd_flag & ND_NFSV4)) {
2704 nd->nd_repstat = EACCES;
2708 * Personally, I've never seen any point in requiring a
2709 * reserved port#, since only in the rare case where the
2710 * clients are all boxes with secure system priviledges,
2711 * does it provide any enhanced security, but... some people
2712 * believe it to be useful and keep putting this code back in.
2713 * (There is also some "security checker" out there that
2714 * complains if the nfs server doesn't enforce this.)
2715 * However, note the following:
2716 * RFC3530 (NFSv4) specifies that a reserved port# not be
2718 * RFC2623 recommends that, if a reserved port# is checked for,
2719 * that there be a way to turn that off--> ifdef'd.
2721 #ifdef NFS_REQRSVPORT
2722 if (!nd->nd_repstat) {
2723 struct sockaddr_in *saddr;
2724 struct sockaddr_in6 *saddr6;
2726 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
2727 saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *);
2728 if (!(nd->nd_flag & ND_NFSV4) &&
2729 ((saddr->sin_family == AF_INET &&
2730 ntohs(saddr->sin_port) >= IPPORT_RESERVED) ||
2731 (saddr6->sin6_family == AF_INET6 &&
2732 ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) {
2734 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
2737 #endif /* NFS_REQRSVPORT */
2740 * Check/setup credentials.
2742 if (!nd->nd_repstat) {
2743 nd->nd_saveduid = nd->nd_cred->cr_uid;
2744 nd->nd_repstat = nfsd_excred(nd, exp, credanon);
2748 if (credanon != NULL)
2750 if (nd->nd_repstat) {
2752 vn_finished_write(mp);
2759 NFSEXITCODE2(0, nd);
2766 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
2768 struct filedesc *fdp;
2772 fdp = p->td_proc->p_fd;
2773 if (fd >= fdp->fd_nfiles ||
2774 (fp = fdp->fd_ofiles[fd]) == NULL) {
2786 * Called from nfssvc() to update the exports list. Just call
2787 * vfs_export(). This has to be done, since the v4 root fake fs isn't
2788 * in the mount list.
2791 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p)
2793 struct nfsex_args *nfsexargp = (struct nfsex_args *)argp;
2795 struct nameidata nd;
2798 error = vfs_export(&nfsv4root_mnt, &nfsexargp->export);
2799 if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0)
2801 else if (error == 0) {
2802 if (nfsexargp->fspec == NULL) {
2807 * If fspec != NULL, this is the v4root path.
2809 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE,
2810 nfsexargp->fspec, p);
2811 if ((error = namei(&nd)) != 0)
2813 error = nfsvno_getfh(nd.ni_vp, &fh, p);
2816 nfs_rootfh.nfsrvfh_len = NFSX_MYFH;
2817 NFSBCOPY((caddr_t)&fh,
2818 nfs_rootfh.nfsrvfh_data,
2819 sizeof (fhandle_t));
2830 * Get the tcp socket sequence numbers we need.
2831 * (Maybe this should be moved to the tcp sources?)
2834 nfsrv_getsocksndseq(struct socket *so, tcp_seq *maxp, tcp_seq *unap)
2840 inp = sotoinpcb(so);
2841 KASSERT(inp != NULL, ("nfsrv_getsocksndseq: inp == NULL"));
2843 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
2848 tp = intotcpcb(inp);
2849 if (tp->t_state != TCPS_ESTABLISHED) {
2854 *maxp = tp->snd_max;
2855 *unap = tp->snd_una;
2864 * This function needs to test to see if the system is near its limit
2865 * for memory allocation via malloc() or mget() and return True iff
2866 * either of these resources are near their limit.
2867 * XXX (For now, this is just a stub.)
2869 int nfsrv_testmalloclimit = 0;
2871 nfsrv_mallocmget_limit(void)
2873 static int printmesg = 0;
2874 static int testval = 1;
2876 if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) {
2877 if ((printmesg++ % 100) == 0)
2878 printf("nfsd: malloc/mget near limit\n");
2885 * BSD specific initialization of a mount point.
2890 static int inited = 0;
2895 nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
2896 TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
2897 nfsv4root_mnt.mnt_export = NULL;
2898 TAILQ_INIT(&nfsv4root_opt);
2899 TAILQ_INIT(&nfsv4root_newopt);
2900 nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
2901 nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
2902 nfsv4root_mnt.mnt_nvnodelistsize = 0;
2906 * Get a vnode for a file handle, without checking exports, etc.
2909 nfsvno_getvp(fhandle_t *fhp)
2915 mp = vfs_busyfs(&fhp->fh_fsid);
2918 error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp);
2926 * Do a local VOP_ADVLOCK().
2929 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first,
2930 u_int64_t end, struct thread *td)
2936 if (nfsrv_dolocallocks == 0)
2939 /* Check for VI_DOOMED here, so that VOP_ADVLOCK() isn't performed. */
2940 if ((vp->v_iflag & VI_DOOMED) != 0) {
2945 fl.l_whence = SEEK_SET;
2947 fl.l_start = (off_t)first;
2948 if (end == NFS64BITSSET) {
2952 fl.l_len = (off_t)tlen;
2955 * For FreeBSD8, the l_pid and l_sysid must be set to the same
2956 * values for all calls, so that all locks will be held by the
2957 * nfsd server. (The nfsd server handles conflicts between the
2959 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
2960 * bytes, so it can't be put in l_sysid.
2962 if (nfsv4_sysid == 0)
2963 nfsv4_sysid = nlm_acquire_next_sysid();
2964 fl.l_pid = (pid_t)0;
2965 fl.l_sysid = (int)nfsv4_sysid;
2967 NFSVOPUNLOCK(vp, 0);
2968 if (ftype == F_UNLCK)
2969 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl,
2970 (F_POSIX | F_REMOTE));
2972 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl,
2973 (F_POSIX | F_REMOTE));
2974 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2982 * Check the nfsv4 root exports.
2985 nfsvno_v4rootexport(struct nfsrv_descript *nd)
2987 struct ucred *credanon;
2988 int exflags, error = 0, numsecflavor, *secflavors, i;
2990 error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags,
2991 &credanon, &numsecflavor, &secflavors);
2993 error = NFSERR_PROGUNAVAIL;
2996 if (credanon != NULL)
2998 for (i = 0; i < numsecflavor; i++) {
2999 if (secflavors[i] == AUTH_SYS)
3000 nd->nd_flag |= ND_EXAUTHSYS;
3001 else if (secflavors[i] == RPCSEC_GSS_KRB5)
3002 nd->nd_flag |= ND_EXGSS;
3003 else if (secflavors[i] == RPCSEC_GSS_KRB5I)
3004 nd->nd_flag |= ND_EXGSSINTEGRITY;
3005 else if (secflavors[i] == RPCSEC_GSS_KRB5P)
3006 nd->nd_flag |= ND_EXGSSPRIVACY;
3015 * Nfs server psuedo system call for the nfsd's
3021 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap)
3024 struct nfsd_addsock_args sockarg;
3025 struct nfsd_nfsd_args nfsdarg;
3028 if (uap->flag & NFSSVC_NFSDADDSOCK) {
3029 error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg));
3033 * Since we don't know what rights might be required,
3034 * pretend that we need them all. It is better to be too
3035 * careful than too reckless.
3037 if ((error = fget(td, sockarg.sock, CAP_SOCK_ALL, &fp)) != 0)
3040 if (fp->f_type != DTYPE_SOCKET) {
3045 error = nfsrvd_addsock(fp);
3047 } else if (uap->flag & NFSSVC_NFSDNFSD) {
3048 if (uap->argp == NULL) {
3052 error = copyin(uap->argp, (caddr_t)&nfsdarg,
3056 error = nfsrvd_nfsd(td, &nfsdarg);
3058 error = nfssvc_srvcall(td, uap, td->td_ucred);
3067 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred)
3069 struct nfsex_args export;
3070 struct file *fp = NULL;
3072 struct nfsd_clid adminrevoke;
3073 struct nfsd_dumplist dumplist;
3074 struct nfsd_dumpclients *dumpclients;
3075 struct nfsd_dumplocklist dumplocklist;
3076 struct nfsd_dumplocks *dumplocks;
3077 struct nameidata nd;
3082 if (uap->flag & NFSSVC_PUBLICFH) {
3083 NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data,
3084 sizeof (fhandle_t));
3085 error = copyin(uap->argp,
3086 &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t));
3089 } else if (uap->flag & NFSSVC_V4ROOTEXPORT) {
3090 error = copyin(uap->argp,(caddr_t)&export,
3091 sizeof (struct nfsex_args));
3093 error = nfsrv_v4rootexport(&export, cred, p);
3094 } else if (uap->flag & NFSSVC_NOPUBLICFH) {
3097 } else if (uap->flag & NFSSVC_STABLERESTART) {
3098 error = copyin(uap->argp, (caddr_t)&stablefd,
3101 error = fp_getfvp(p, stablefd, &fp, &vp);
3102 if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE))
3104 if (!error && newnfs_numnfsd != 0)
3107 nfsrv_stablefirst.nsf_fp = fp;
3108 nfsrv_setupstable(p);
3110 } else if (uap->flag & NFSSVC_ADMINREVOKE) {
3111 error = copyin(uap->argp, (caddr_t)&adminrevoke,
3112 sizeof (struct nfsd_clid));
3114 error = nfsrv_adminrevoke(&adminrevoke, p);
3115 } else if (uap->flag & NFSSVC_DUMPCLIENTS) {
3116 error = copyin(uap->argp, (caddr_t)&dumplist,
3117 sizeof (struct nfsd_dumplist));
3118 if (!error && (dumplist.ndl_size < 1 ||
3119 dumplist.ndl_size > NFSRV_MAXDUMPLIST))
3122 len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size;
3123 dumpclients = (struct nfsd_dumpclients *)malloc(len,
3125 nfsrv_dumpclients(dumpclients, dumplist.ndl_size);
3126 error = copyout(dumpclients,
3127 CAST_USER_ADDR_T(dumplist.ndl_list), len);
3128 free((caddr_t)dumpclients, M_TEMP);
3130 } else if (uap->flag & NFSSVC_DUMPLOCKS) {
3131 error = copyin(uap->argp, (caddr_t)&dumplocklist,
3132 sizeof (struct nfsd_dumplocklist));
3133 if (!error && (dumplocklist.ndllck_size < 1 ||
3134 dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST))
3137 error = nfsrv_lookupfilename(&nd,
3138 dumplocklist.ndllck_fname, p);
3140 len = sizeof (struct nfsd_dumplocks) *
3141 dumplocklist.ndllck_size;
3142 dumplocks = (struct nfsd_dumplocks *)malloc(len,
3144 nfsrv_dumplocks(nd.ni_vp, dumplocks,
3145 dumplocklist.ndllck_size, p);
3147 error = copyout(dumplocks,
3148 CAST_USER_ADDR_T(dumplocklist.ndllck_list), len);
3149 free((caddr_t)dumplocks, M_TEMP);
3151 } else if (uap->flag & NFSSVC_BACKUPSTABLE) {
3154 nfsd_master_pid = procp->p_pid;
3155 bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1);
3156 nfsd_master_start = procp->p_stats->p_start;
3157 nfsd_master_proc = procp;
3167 * Returns 0 if ok, 1 otherwise.
3170 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp)
3175 * This seems odd, but allow the case where the security flavor
3176 * list is empty. This happens when NFSv4 is traversing non-exported
3177 * file systems. Exported file systems should always have a non-empty
3178 * security flavor list.
3180 if (exp->nes_numsecflavor == 0)
3183 for (i = 0; i < exp->nes_numsecflavor; i++) {
3185 * The tests for privacy and integrity must be first,
3186 * since ND_GSS is set for everything but AUTH_SYS.
3188 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P &&
3189 (nd->nd_flag & ND_GSSPRIVACY))
3191 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I &&
3192 (nd->nd_flag & ND_GSSINTEGRITY))
3194 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 &&
3195 (nd->nd_flag & ND_GSS))
3197 if (exp->nes_secflavors[i] == AUTH_SYS &&
3198 (nd->nd_flag & ND_GSS) == 0)
3205 * Calculate a hash value for the fid in a file handle.
3208 nfsrv_hashfh(fhandle_t *fhp)
3212 hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0);
3217 * Signal the userland master nfsd to backup the stable restart file.
3220 nfsrv_backupstable(void)
3224 if (nfsd_master_proc != NULL) {
3225 procp = pfind(nfsd_master_pid);
3226 /* Try to make sure it is the correct process. */
3227 if (procp == nfsd_master_proc &&
3228 procp->p_stats->p_start.tv_sec ==
3229 nfsd_master_start.tv_sec &&
3230 procp->p_stats->p_start.tv_usec ==
3231 nfsd_master_start.tv_usec &&
3232 strcmp(procp->p_comm, nfsd_master_comm) == 0)
3233 psignal(procp, SIGUSR2);
3235 nfsd_master_proc = NULL;
3242 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
3245 * Called once to initialize data structures...
3248 nfsd_modevent(module_t mod, int type, void *data)
3251 static int loaded = 0;
3258 mtx_init(&nfs_cache_mutex, "nfs_cache_mutex", NULL, MTX_DEF);
3259 mtx_init(&nfs_v4root_mutex, "nfs_v4root_mutex", NULL, MTX_DEF);
3260 mtx_init(&nfsv4root_mnt.mnt_mtx, "struct mount mtx", NULL,
3262 lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0);
3269 #ifdef VV_DISABLEDELEG
3270 vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation;
3271 vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation;
3273 nfsd_call_servertimer = nfsrv_servertimer;
3274 nfsd_call_nfsd = nfssvc_nfsd;
3279 if (newnfs_numnfsd != 0) {
3284 #ifdef VV_DISABLEDELEG
3285 vn_deleg_ops.vndeleg_recall = NULL;
3286 vn_deleg_ops.vndeleg_disable = NULL;
3288 nfsd_call_servertimer = NULL;
3289 nfsd_call_nfsd = NULL;
3291 /* Clean out all NFSv4 state. */
3292 nfsrv_throwawayallstate(curthread);
3294 /* Clean the NFS server reply cache */
3295 nfsrvd_cleancache();
3297 /* Free up the krpc server pool. */
3298 if (nfsrvd_pool != NULL)
3299 svcpool_destroy(nfsrvd_pool);
3301 /* and get rid of the locks */
3302 mtx_destroy(&nfs_cache_mutex);
3303 mtx_destroy(&nfs_v4root_mutex);
3304 mtx_destroy(&nfsv4root_mnt.mnt_mtx);
3305 lockdestroy(&nfsv4root_mnt.mnt_explock);
3317 static moduledata_t nfsd_mod = {
3322 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY);
3324 /* So that loader and kldload(2) can find us, wherever we are.. */
3325 MODULE_VERSION(nfsd, 1);
3326 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1);
3327 MODULE_DEPEND(nfsd, nfslock, 1, 1, 1);
3328 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1);
3329 MODULE_DEPEND(nfsd, krpc, 1, 1, 1);
3330 MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1);