2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * from nfs_vfsops.c 8.12 (Berkeley) 5/20/95
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
47 #include <sys/clock.h>
49 #include <sys/limits.h>
51 #include <sys/malloc.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
64 #include <vm/vm_extern.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
77 FEATURE(nfscl, "NFSv4 client");
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern struct nfsstats newnfsstats;
82 extern int nfsrv_useacl;
83 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
84 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
85 extern struct mtx ncl_iod_mutex;
87 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
88 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
90 SYSCTL_DECL(_vfs_nfs);
91 static int nfs_ip_paranoia = 1;
92 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
93 &nfs_ip_paranoia, 0, "");
94 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
95 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
96 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
97 /* how long between console messages "nfs server foo not responding" */
98 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
99 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
100 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
102 static int nfs_mountroot(struct mount *);
103 static void nfs_sec_name(char *, int *);
104 static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
105 struct nfs_args *argp, const char *, struct ucred *,
107 static int mountnfs(struct nfs_args *, struct mount *,
108 struct sockaddr *, char *, u_char *, int, u_char *, int,
109 u_char *, int, struct vnode **, struct ucred *,
110 struct thread *, int, int);
111 static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
112 struct sockaddr_storage *, int *, off_t *,
114 static vfs_mount_t nfs_mount;
115 static vfs_cmount_t nfs_cmount;
116 static vfs_unmount_t nfs_unmount;
117 static vfs_root_t nfs_root;
118 static vfs_statfs_t nfs_statfs;
119 static vfs_sync_t nfs_sync;
120 static vfs_sysctl_t nfs_sysctl;
123 * nfs vfs operations.
125 static struct vfsops nfs_vfsops = {
126 .vfs_init = ncl_init,
127 .vfs_mount = nfs_mount,
128 .vfs_cmount = nfs_cmount,
129 .vfs_root = nfs_root,
130 .vfs_statfs = nfs_statfs,
131 .vfs_sync = nfs_sync,
132 .vfs_uninit = ncl_uninit,
133 .vfs_unmount = nfs_unmount,
134 .vfs_sysctl = nfs_sysctl,
136 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
138 /* So that loader and kldload(2) can find us, wherever we are.. */
139 MODULE_VERSION(nfs, 1);
140 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
141 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
142 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
143 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
146 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
147 * can be shared by both NFS clients. It is declared here so that it
148 * will be defined for kernels built without NFS_ROOT, although it
149 * isn't used in that case.
151 #if !defined(NFS_ROOT) && !defined(NFSCLIENT)
152 struct nfs_diskless nfs_diskless = { { { 0 } } };
153 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
154 int nfs_diskless_valid = 0;
157 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
158 &nfs_diskless_valid, 0,
159 "Has the diskless struct been filled correctly");
161 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
162 nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
164 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
165 &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
166 "%Ssockaddr_in", "Diskless root nfs address");
169 void newnfsargs_ntoh(struct nfs_args *);
170 static int nfs_mountdiskless(char *,
171 struct sockaddr_in *, struct nfs_args *,
172 struct thread *, struct vnode **, struct mount *);
173 static void nfs_convert_diskless(void);
174 static void nfs_convert_oargs(struct nfs_args *args,
175 struct onfs_args *oargs);
178 newnfs_iosize(struct nfsmount *nmp)
182 /* First, set the upper limit for iosize */
183 if (nmp->nm_flag & NFSMNT_NFSV4) {
184 maxio = NFS_MAXBSIZE;
185 } else if (nmp->nm_flag & NFSMNT_NFSV3) {
186 if (nmp->nm_sotype == SOCK_DGRAM)
187 maxio = NFS_MAXDGRAMDATA;
189 maxio = NFS_MAXBSIZE;
191 maxio = NFS_V2MAXDATA;
193 if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
194 nmp->nm_rsize = maxio;
195 if (nmp->nm_rsize > MAXBSIZE)
196 nmp->nm_rsize = MAXBSIZE;
197 if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
198 nmp->nm_readdirsize = maxio;
199 if (nmp->nm_readdirsize > nmp->nm_rsize)
200 nmp->nm_readdirsize = nmp->nm_rsize;
201 if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
202 nmp->nm_wsize = maxio;
203 if (nmp->nm_wsize > MAXBSIZE)
204 nmp->nm_wsize = MAXBSIZE;
207 * Calculate the size used for io buffers. Use the larger
208 * of the two sizes to minimise nfs requests but make sure
209 * that it is at least one VM page to avoid wasting buffer
210 * space. It must also be at least NFS_DIRBLKSIZ, since
211 * that is the buffer size used for directories.
213 iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
214 iosize = imax(iosize, PAGE_SIZE);
215 iosize = imax(iosize, NFS_DIRBLKSIZ);
216 nmp->nm_mountp->mnt_stat.f_iosize = iosize;
221 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
224 args->version = NFS_ARGSVERSION;
225 args->addr = oargs->addr;
226 args->addrlen = oargs->addrlen;
227 args->sotype = oargs->sotype;
228 args->proto = oargs->proto;
229 args->fh = oargs->fh;
230 args->fhsize = oargs->fhsize;
231 args->flags = oargs->flags;
232 args->wsize = oargs->wsize;
233 args->rsize = oargs->rsize;
234 args->readdirsize = oargs->readdirsize;
235 args->timeo = oargs->timeo;
236 args->retrans = oargs->retrans;
237 args->readahead = oargs->readahead;
238 args->hostname = oargs->hostname;
242 nfs_convert_diskless(void)
245 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
246 sizeof(struct ifaliasreq));
247 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
248 sizeof(struct sockaddr_in));
249 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
250 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
251 nfsv3_diskless.root_fhsize = NFSX_MYFH;
252 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
254 nfsv3_diskless.root_fhsize = NFSX_V2FH;
255 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
257 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
258 sizeof(struct sockaddr_in));
259 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
260 nfsv3_diskless.root_time = nfs_diskless.root_time;
261 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
263 nfs_diskless_valid = 3;
270 nfs_statfs(struct mount *mp, struct statfs *sbp)
274 struct nfsmount *nmp = VFSTONFS(mp);
275 struct nfsvattr nfsva;
278 int error = 0, attrflag, gotfsinfo = 0, ret;
283 error = vfs_busy(mp, MBF_NOWAIT);
286 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
292 mtx_lock(&nmp->nm_mtx);
293 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
294 mtx_unlock(&nmp->nm_mtx);
295 error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
300 mtx_unlock(&nmp->nm_mtx);
302 error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
305 ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
306 td->td_ucred, td, &nfsva, NULL);
309 * Just set default values to get things going.
311 NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
312 nfsva.na_vattr.va_type = VDIR;
313 nfsva.na_vattr.va_mode = 0777;
314 nfsva.na_vattr.va_nlink = 100;
315 nfsva.na_vattr.va_uid = (uid_t)0;
316 nfsva.na_vattr.va_gid = (gid_t)0;
317 nfsva.na_vattr.va_fileid = 2;
318 nfsva.na_vattr.va_gen = 1;
319 nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
320 nfsva.na_vattr.va_size = 512 * 1024;
323 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
325 mtx_lock(&nmp->nm_mtx);
326 if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
327 nfscl_loadfsinfo(nmp, &fs);
328 nfscl_loadsbinfo(nmp, &sb, sbp);
329 sbp->f_iosize = newnfs_iosize(nmp);
330 mtx_unlock(&nmp->nm_mtx);
331 if (sbp != &mp->mnt_stat) {
332 bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
333 bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
335 strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
336 } else if (NFS_ISV4(vp)) {
337 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
345 * nfs version 3 fsinfo rpc call
348 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
352 struct nfsvattr nfsva;
355 error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
358 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
360 mtx_lock(&nmp->nm_mtx);
361 nfscl_loadfsinfo(nmp, &fs);
362 mtx_unlock(&nmp->nm_mtx);
368 * Mount a remote root fs via. nfs. This depends on the info in the
369 * nfs_diskless structure that has been filled in properly by some primary
371 * It goes something like this:
372 * - do enough of "ifconfig" by calling ifioctl() so that the system
373 * can talk to the server
374 * - If nfs_diskless.mygateway is filled in, use that address as
376 * - build the rootfs mount point and call mountnfs() to do the rest.
378 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
379 * structure, as well as other global NFS client variables here, as
380 * nfs_mountroot() will be called once in the boot before any other NFS
381 * client activity occurs.
384 nfs_mountroot(struct mount *mp)
386 struct thread *td = curthread;
387 struct nfsv3_diskless *nd = &nfsv3_diskless;
396 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
397 bootpc_init(); /* use bootp to get nfs_diskless filled in */
398 #elif defined(NFS_ROOT)
399 nfs_setup_diskless();
402 if (nfs_diskless_valid == 0)
404 if (nfs_diskless_valid == 1)
405 nfs_convert_diskless();
408 * XXX splnet, so networks will receive...
413 * Do enough of ifconfig(8) so that the critical net interface can
414 * talk to the server.
416 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
419 panic("nfs_mountroot: socreate(%04x): %d",
420 nd->myif.ifra_addr.sa_family, error);
422 #if 0 /* XXX Bad idea */
424 * We might not have been told the right interface, so we pass
425 * over the first ten interfaces of the same kind, until we get
426 * one of them configured.
429 for (i = strlen(nd->myif.ifra_name) - 1;
430 nd->myif.ifra_name[i] >= '0' &&
431 nd->myif.ifra_name[i] <= '9';
432 nd->myif.ifra_name[i] ++) {
433 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
438 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
440 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
441 if ((cp = getenv("boot.netif.mtu")) != NULL) {
442 ir.ifr_mtu = strtol(cp, NULL, 10);
443 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
445 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
447 printf("nfs_mountroot: SIOCSIFMTU: %d", error);
452 * If the gateway field is filled in, set it as the default route.
453 * Note that pxeboot will set a default route of 0 if the route
454 * is not set by the DHCP server. Check also for a value of 0
455 * to avoid panicking inappropriately in that situation.
457 if (nd->mygateway.sin_len != 0 &&
458 nd->mygateway.sin_addr.s_addr != 0) {
459 struct sockaddr_in mask, sin;
461 bzero((caddr_t)&mask, sizeof(mask));
463 sin.sin_family = AF_INET;
464 sin.sin_len = sizeof(sin);
465 /* XXX MRT use table 0 for this sort of thing */
466 CURVNET_SET(TD_TO_VNET(td));
467 error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
468 (struct sockaddr *)&nd->mygateway,
469 (struct sockaddr *)&mask,
470 RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
473 panic("nfs_mountroot: RTM_ADD: %d", error);
477 * Create the rootfs mount point.
479 nd->root_args.fh = nd->root_fh;
480 nd->root_args.fhsize = nd->root_fhsize;
481 l = ntohl(nd->root_saddr.sin_addr.s_addr);
482 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
483 (l >> 24) & 0xff, (l >> 16) & 0xff,
484 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
485 printf("NFS ROOT: %s\n", buf);
486 nd->root_args.hostname = buf;
487 if ((error = nfs_mountdiskless(buf,
488 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
493 * This is not really an nfs issue, but it is much easier to
494 * set hostname here and then let the "/etc/rc.xxx" files
495 * mount the right /var based upon its preset value.
497 mtx_lock(&prison0.pr_mtx);
498 strlcpy(prison0.pr_hostname, nd->my_hostnam,
499 sizeof(prison0.pr_hostname));
500 mtx_unlock(&prison0.pr_mtx);
501 inittodr(ntohl(nd->root_time));
506 * Internal version of mount system call for diskless setup.
509 nfs_mountdiskless(char *path,
510 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
511 struct vnode **vpp, struct mount *mp)
513 struct sockaddr *nam;
518 * Find the directory path in "path", which also has the server's
519 * name/ip address in it.
521 dirpath = strchr(path, ':');
523 dirlen = strlen(++dirpath);
526 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
527 if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
528 NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
529 NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
530 printf("nfs_mountroot: mount %s on /: %d\n", path, error);
537 nfs_sec_name(char *sec, int *flagsp)
539 if (!strcmp(sec, "krb5"))
540 *flagsp |= NFSMNT_KERB;
541 else if (!strcmp(sec, "krb5i"))
542 *flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
543 else if (!strcmp(sec, "krb5p"))
544 *flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
548 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
549 const char *hostname, struct ucred *cred, struct thread *td)
558 * Set read-only flag if requested; otherwise, clear it if this is
559 * an update. If this is not an update, then either the read-only
560 * flag is already clear, or this is a root mount and it was set
561 * intentionally at some previous point.
563 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
565 mp->mnt_flag |= MNT_RDONLY;
567 } else if (mp->mnt_flag & MNT_UPDATE) {
569 mp->mnt_flag &= ~MNT_RDONLY;
574 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
575 * no sense in that context. Also, set up appropriate retransmit
576 * and soft timeout behavior.
578 if (argp->sotype == SOCK_STREAM) {
579 nmp->nm_flag &= ~NFSMNT_NOCONN;
580 nmp->nm_timeo = NFS_MAXTIMEO;
581 if ((argp->flags & NFSMNT_NFSV4) != 0)
582 nmp->nm_retry = INT_MAX;
584 nmp->nm_retry = NFS_RETRANS_TCP;
587 /* Also clear RDIRPLUS if NFSv2, it crashes some servers */
588 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
589 argp->flags &= ~NFSMNT_RDIRPLUS;
590 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
593 /* Re-bind if rsrvd port requested and wasn't on one */
594 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
595 && (argp->flags & NFSMNT_RESVPORT);
596 /* Also re-bind if we're switching to/from a connected UDP socket */
597 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
598 (argp->flags & NFSMNT_NOCONN));
600 /* Update flags atomically. Don't change the lock bits. */
601 nmp->nm_flag = argp->flags | nmp->nm_flag;
604 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
605 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
606 if (nmp->nm_timeo < NFS_MINTIMEO)
607 nmp->nm_timeo = NFS_MINTIMEO;
608 else if (nmp->nm_timeo > NFS_MAXTIMEO)
609 nmp->nm_timeo = NFS_MAXTIMEO;
612 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
613 nmp->nm_retry = argp->retrans;
614 if (nmp->nm_retry > NFS_MAXREXMIT)
615 nmp->nm_retry = NFS_MAXREXMIT;
618 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
619 nmp->nm_wsize = argp->wsize;
621 * Clip at the power of 2 below the size. There is an
622 * issue (not isolated) that causes intermittent page
623 * faults if this is not done.
625 if (nmp->nm_wsize > NFS_FABLKSIZE)
626 nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
628 nmp->nm_wsize = NFS_FABLKSIZE;
631 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
632 nmp->nm_rsize = argp->rsize;
634 * Clip at the power of 2 below the size. There is an
635 * issue (not isolated) that causes intermittent page
636 * faults if this is not done.
638 if (nmp->nm_rsize > NFS_FABLKSIZE)
639 nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
641 nmp->nm_rsize = NFS_FABLKSIZE;
644 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
645 nmp->nm_readdirsize = argp->readdirsize;
648 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
649 nmp->nm_acregmin = argp->acregmin;
651 nmp->nm_acregmin = NFS_MINATTRTIMO;
652 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
653 nmp->nm_acregmax = argp->acregmax;
655 nmp->nm_acregmax = NFS_MAXATTRTIMO;
656 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
657 nmp->nm_acdirmin = argp->acdirmin;
659 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
660 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
661 nmp->nm_acdirmax = argp->acdirmax;
663 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
664 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
665 nmp->nm_acdirmin = nmp->nm_acdirmax;
666 if (nmp->nm_acregmin > nmp->nm_acregmax)
667 nmp->nm_acregmin = nmp->nm_acregmax;
669 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
670 if (argp->readahead <= NFS_MAXRAHEAD)
671 nmp->nm_readahead = argp->readahead;
673 nmp->nm_readahead = NFS_MAXRAHEAD;
675 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
676 if (argp->wcommitsize < nmp->nm_wsize)
677 nmp->nm_wcommitsize = nmp->nm_wsize;
679 nmp->nm_wcommitsize = argp->wcommitsize;
682 adjsock |= ((nmp->nm_sotype != argp->sotype) ||
683 (nmp->nm_soproto != argp->proto));
685 if (nmp->nm_client != NULL && adjsock) {
686 int haslock = 0, error = 0;
688 if (nmp->nm_sotype == SOCK_STREAM) {
689 error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
694 newnfs_disconnect(&nmp->nm_sockreq);
696 newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
697 nmp->nm_sotype = argp->sotype;
698 nmp->nm_soproto = argp->proto;
699 if (nmp->nm_sotype == SOCK_DGRAM)
700 while (newnfs_connect(nmp, &nmp->nm_sockreq,
702 printf("newnfs_args: retrying connect\n");
703 (void) nfs_catnap(PSOCK, 0, "newnfscon");
707 nmp->nm_sotype = argp->sotype;
708 nmp->nm_soproto = argp->proto;
711 if (hostname != NULL) {
712 strlcpy(nmp->nm_hostname, hostname,
713 sizeof(nmp->nm_hostname));
714 p = strchr(nmp->nm_hostname, ':');
720 static const char *nfs_opts[] = { "from", "nfs_args",
721 "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
722 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
723 "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
724 "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
725 "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
726 "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
727 "principal", "nfsv4", "gssname", "allgssname", "dirpath",
728 "nametimeo", "negnametimeo", "nocto", "noncontigwr", "wcommitsize",
735 * It seems a bit dumb to copyinstr() the host and path here and then
736 * bcopy() them in mountnfs(), but I wanted to detect errors before
737 * doing the sockargs() call because sockargs() allocates an mbuf and
738 * an error after that means that I have to release the mbuf.
742 nfs_mount(struct mount *mp)
744 struct nfs_args args = {
745 .version = NFS_ARGSVERSION,
747 .addrlen = sizeof (struct sockaddr_in),
748 .sotype = SOCK_STREAM,
752 .flags = NFSMNT_RESVPORT,
755 .readdirsize = NFS_READDIRSIZE,
757 .retrans = NFS_RETRANS,
758 .readahead = NFS_DEFRAHEAD,
759 .wcommitsize = 0, /* was: NQ_DEFLEASE */
761 .acregmin = NFS_MINATTRTIMO,
762 .acregmax = NFS_MAXATTRTIMO,
763 .acdirmin = NFS_MINDIRATTRTIMO,
764 .acdirmax = NFS_MAXDIRATTRTIMO,
766 int error = 0, ret, len;
767 struct sockaddr *nam = NULL;
771 u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
772 char *cp, *opt, *name, *secname;
773 int nametimeo = NFS_DEFAULT_NAMETIMEO;
774 int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
775 int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
778 has_nfs_args_opt = 0;
779 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
785 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
786 error = nfs_mountroot(mp);
793 * The old mount_nfs program passed the struct nfs_args
794 * from userspace to kernel. The new mount_nfs program
795 * passes string options via nmount() from userspace to kernel
796 * and we populate the struct nfs_args in the kernel.
798 if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
799 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
804 if (args.version != NFS_ARGSVERSION) {
805 error = EPROGMISMATCH;
808 has_nfs_args_opt = 1;
811 /* Handle the new style options. */
812 if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
813 args.flags |= NFSMNT_NOCONN;
814 if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
815 args.flags |= NFSMNT_NOCONN;
816 if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
817 args.flags |= NFSMNT_NOLOCKD;
818 if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
819 args.flags &= ~NFSMNT_NOLOCKD;
820 if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
821 args.flags |= NFSMNT_INT;
822 if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
823 args.flags |= NFSMNT_RDIRPLUS;
824 if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
825 args.flags |= NFSMNT_RESVPORT;
826 if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
827 args.flags &= ~NFSMNT_RESVPORT;
828 if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
829 args.flags |= NFSMNT_SOFT;
830 if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
831 args.flags &= ~NFSMNT_SOFT;
832 if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
833 args.sotype = SOCK_DGRAM;
834 if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
835 args.sotype = SOCK_DGRAM;
836 if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
837 args.sotype = SOCK_STREAM;
838 if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
839 args.flags |= NFSMNT_NFSV3;
840 if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
841 args.flags |= NFSMNT_NFSV4;
842 args.sotype = SOCK_STREAM;
844 if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
845 args.flags |= NFSMNT_ALLGSSNAME;
846 if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
847 args.flags |= NFSMNT_NOCTO;
848 if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
849 args.flags |= NFSMNT_NONCONTIGWR;
850 if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
852 vfs_mount_error(mp, "illegal readdirsize");
856 ret = sscanf(opt, "%d", &args.readdirsize);
857 if (ret != 1 || args.readdirsize <= 0) {
858 vfs_mount_error(mp, "illegal readdirsize: %s",
863 args.flags |= NFSMNT_READDIRSIZE;
865 if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
867 vfs_mount_error(mp, "illegal readahead");
871 ret = sscanf(opt, "%d", &args.readahead);
872 if (ret != 1 || args.readahead <= 0) {
873 vfs_mount_error(mp, "illegal readahead: %s",
878 args.flags |= NFSMNT_READAHEAD;
880 if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
882 vfs_mount_error(mp, "illegal wsize");
886 ret = sscanf(opt, "%d", &args.wsize);
887 if (ret != 1 || args.wsize <= 0) {
888 vfs_mount_error(mp, "illegal wsize: %s",
893 args.flags |= NFSMNT_WSIZE;
895 if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
897 vfs_mount_error(mp, "illegal rsize");
901 ret = sscanf(opt, "%d", &args.rsize);
902 if (ret != 1 || args.rsize <= 0) {
903 vfs_mount_error(mp, "illegal wsize: %s",
908 args.flags |= NFSMNT_RSIZE;
910 if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
912 vfs_mount_error(mp, "illegal retrans");
916 ret = sscanf(opt, "%d", &args.retrans);
917 if (ret != 1 || args.retrans <= 0) {
918 vfs_mount_error(mp, "illegal retrans: %s",
923 args.flags |= NFSMNT_RETRANS;
925 if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
926 ret = sscanf(opt, "%d", &args.acregmin);
927 if (ret != 1 || args.acregmin < 0) {
928 vfs_mount_error(mp, "illegal acregmin: %s",
933 args.flags |= NFSMNT_ACREGMIN;
935 if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
936 ret = sscanf(opt, "%d", &args.acregmax);
937 if (ret != 1 || args.acregmax < 0) {
938 vfs_mount_error(mp, "illegal acregmax: %s",
943 args.flags |= NFSMNT_ACREGMAX;
945 if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
946 ret = sscanf(opt, "%d", &args.acdirmin);
947 if (ret != 1 || args.acdirmin < 0) {
948 vfs_mount_error(mp, "illegal acdirmin: %s",
953 args.flags |= NFSMNT_ACDIRMIN;
955 if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
956 ret = sscanf(opt, "%d", &args.acdirmax);
957 if (ret != 1 || args.acdirmax < 0) {
958 vfs_mount_error(mp, "illegal acdirmax: %s",
963 args.flags |= NFSMNT_ACDIRMAX;
965 if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
966 ret = sscanf(opt, "%d", &args.wcommitsize);
967 if (ret != 1 || args.wcommitsize < 0) {
968 vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
972 args.flags |= NFSMNT_WCOMMITSIZE;
974 if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
975 ret = sscanf(opt, "%d", &args.timeo);
976 if (ret != 1 || args.timeo <= 0) {
977 vfs_mount_error(mp, "illegal timeout: %s",
982 args.flags |= NFSMNT_TIMEO;
984 if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
985 ret = sscanf(opt, "%d", &nametimeo);
986 if (ret != 1 || nametimeo < 0) {
987 vfs_mount_error(mp, "illegal nametimeo: %s", opt);
992 if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
994 ret = sscanf(opt, "%d", &negnametimeo);
995 if (ret != 1 || negnametimeo < 0) {
996 vfs_mount_error(mp, "illegal negnametimeo: %s",
1002 if (vfs_getopt(mp->mnt_optnew, "sec",
1003 (void **) &secname, NULL) == 0)
1004 nfs_sec_name(secname, &args.flags);
1006 if (mp->mnt_flag & MNT_UPDATE) {
1007 struct nfsmount *nmp = VFSTONFS(mp);
1015 * If a change from TCP->UDP is done and there are thread(s)
1016 * that have I/O RPC(s) in progress with a tranfer size
1017 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1018 * hung, retrying the RPC(s) forever. Usually these threads
1019 * will be seen doing an uninterruptible sleep on wait channel
1020 * "newnfsreq" (truncated to "newnfsre" by procstat).
1022 if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1023 tprintf(td->td_proc, LOG_WARNING,
1024 "Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1027 * When doing an update, we can't change version,
1028 * security, switch lockd strategies or change cookie
1031 args.flags = (args.flags &
1037 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1044 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1045 nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1050 * Make the nfs_ip_paranoia sysctl serve as the default connection
1051 * or no-connection mode for those protocols that support
1052 * no-connection mode (the flag will be cleared later for protocols
1053 * that do not support no-connection mode). This will allow a client
1054 * to receive replies from a different IP then the request was
1055 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
1058 if (nfs_ip_paranoia == 0)
1059 args.flags |= NFSMNT_NOCONN;
1061 if (has_nfs_args_opt != 0) {
1063 * In the 'nfs_args' case, the pointers in the args
1064 * structure are in userland - we copy them in here.
1066 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1067 vfs_mount_error(mp, "Bad file handle");
1071 error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1075 error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1078 bzero(&hst[hstlen], MNAMELEN - hstlen);
1079 args.hostname = hst;
1080 /* sockargs() call must be after above copyin() calls */
1081 error = getsockaddr(&nam, (caddr_t)args.addr,
1086 if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1087 &args.fhsize) == 0) {
1088 if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1089 vfs_mount_error(mp, "Bad file handle");
1093 bcopy(args.fh, nfh, args.fhsize);
1097 (void) vfs_getopt(mp->mnt_optnew, "hostname",
1098 (void **)&args.hostname, &len);
1099 if (args.hostname == NULL) {
1100 vfs_mount_error(mp, "Invalid hostname");
1104 bcopy(args.hostname, hst, MNAMELEN);
1105 hst[MNAMELEN - 1] = '\0';
1108 if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1109 strlcpy(srvkrbname, name, sizeof (srvkrbname));
1111 snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1112 cp = strchr(srvkrbname, ':');
1116 srvkrbnamelen = strlen(srvkrbname);
1118 if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1119 strlcpy(krbname, name, sizeof (krbname));
1122 krbnamelen = strlen(krbname);
1124 if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1125 strlcpy(dirpath, name, sizeof (dirpath));
1128 dirlen = strlen(dirpath);
1130 if (has_nfs_args_opt == 0) {
1131 if (vfs_getopt(mp->mnt_optnew, "addr",
1132 (void **)&args.addr, &args.addrlen) == 0) {
1133 if (args.addrlen > SOCK_MAXADDRLEN) {
1134 error = ENAMETOOLONG;
1137 nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1138 bcopy(args.addr, nam, args.addrlen);
1139 nam->sa_len = args.addrlen;
1141 vfs_mount_error(mp, "No server address");
1148 error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1149 dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1150 nametimeo, negnametimeo);
1154 mp->mnt_kern_flag |= MNTK_MPSAFE | MNTK_LOOKUP_SHARED |
1166 * It seems a bit dumb to copyinstr() the host and path here and then
1167 * bcopy() them in mountnfs(), but I wanted to detect errors before
1168 * doing the sockargs() call because sockargs() allocates an mbuf and
1169 * an error after that means that I have to release the mbuf.
1173 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1176 struct nfs_args args;
1178 error = copyin(data, &args, sizeof (struct nfs_args));
1182 ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1184 error = kernel_mount(ma, flags);
1189 * Common code for mount and mountroot
1192 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1193 char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1194 u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1195 struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo)
1197 struct nfsmount *nmp;
1199 int error, trycnt, ret;
1200 struct nfsvattr nfsva;
1201 static u_int64_t clval = 0;
1203 if (mp->mnt_flag & MNT_UPDATE) {
1205 printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1206 FREE(nam, M_SONAME);
1209 MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1210 krbnamelen + dirlen + srvkrbnamelen + 2,
1211 M_NEWNFSMNT, M_WAITOK | M_ZERO);
1212 TAILQ_INIT(&nmp->nm_bufq);
1214 clval = (u_int64_t)nfsboottime.tv_sec;
1215 nmp->nm_clval = clval++;
1216 nmp->nm_krbnamelen = krbnamelen;
1217 nmp->nm_dirpathlen = dirlen;
1218 nmp->nm_srvkrbnamelen = srvkrbnamelen;
1219 if (td->td_ucred->cr_uid != (uid_t)0) {
1221 * nm_uid is used to get KerberosV credentials for
1222 * the nfsv4 state handling operations if there is
1223 * no host based principal set. Use the uid of
1224 * this user if not root, since they are doing the
1225 * mount. I don't think setting this for root will
1226 * work, since root normally does not have user
1227 * credentials in a credentials cache.
1229 nmp->nm_uid = td->td_ucred->cr_uid;
1232 * Just set to -1, so it won't be used.
1234 nmp->nm_uid = (uid_t)-1;
1237 /* Copy and null terminate all the names */
1238 if (nmp->nm_krbnamelen > 0) {
1239 bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1240 nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1242 if (nmp->nm_dirpathlen > 0) {
1243 bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1244 nmp->nm_dirpathlen);
1245 nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1248 if (nmp->nm_srvkrbnamelen > 0) {
1249 bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1250 nmp->nm_srvkrbnamelen);
1251 nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1252 + nmp->nm_srvkrbnamelen + 2] = '\0';
1254 nmp->nm_sockreq.nr_cred = crhold(cred);
1255 mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1257 nmp->nm_getinfo = nfs_getnlminfo;
1258 nmp->nm_vinvalbuf = ncl_vinvalbuf;
1261 nmp->nm_mountp = mp;
1262 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1265 * Since nfs_decode_args() might optionally set them, these
1266 * need to be set to defaults before the call, so that the
1267 * optional settings aren't overwritten.
1269 nmp->nm_nametimeo = nametimeo;
1270 nmp->nm_negnametimeo = negnametimeo;
1271 nmp->nm_timeo = NFS_TIMEO;
1272 nmp->nm_retry = NFS_RETRANS;
1273 nmp->nm_readahead = NFS_DEFRAHEAD;
1275 /* This is empirical approximation of sqrt(hibufspace) * 256. */
1276 nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1277 while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1278 nmp->nm_wcommitsize *= 2;
1279 nmp->nm_wcommitsize *= 256;
1282 nfs_decode_args(mp, nmp, argp, hst, cred, td);
1285 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
1286 * high, depending on whether we end up with negative offsets in
1287 * the client or server somewhere. 2GB-1 may be safer.
1289 * For V3, ncl_fsinfo will adjust this as necessary. Assume maximum
1290 * that we can handle until we find out otherwise.
1291 * XXX Our "safe" limit on the client is what we can store in our
1292 * buffer cache using signed(!) block numbers.
1294 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1295 nmp->nm_maxfilesize = 0xffffffffLL;
1297 nmp->nm_maxfilesize = OFF_MAX;
1299 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1300 nmp->nm_wsize = NFS_WSIZE;
1301 nmp->nm_rsize = NFS_RSIZE;
1302 nmp->nm_readdirsize = NFS_READDIRSIZE;
1304 nmp->nm_numgrps = NFS_MAXGRPS;
1305 nmp->nm_tprintf_delay = nfs_tprintf_delay;
1306 if (nmp->nm_tprintf_delay < 0)
1307 nmp->nm_tprintf_delay = 0;
1308 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1309 if (nmp->nm_tprintf_initial_delay < 0)
1310 nmp->nm_tprintf_initial_delay = 0;
1311 nmp->nm_fhsize = argp->fhsize;
1312 if (nmp->nm_fhsize > 0)
1313 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1314 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1316 /* Set up the sockets and per-host congestion */
1317 nmp->nm_sotype = argp->sotype;
1318 nmp->nm_soproto = argp->proto;
1319 nmp->nm_sockreq.nr_prog = NFS_PROG;
1320 if ((argp->flags & NFSMNT_NFSV4))
1321 nmp->nm_sockreq.nr_vers = NFS_VER4;
1322 else if ((argp->flags & NFSMNT_NFSV3))
1323 nmp->nm_sockreq.nr_vers = NFS_VER3;
1325 nmp->nm_sockreq.nr_vers = NFS_VER2;
1328 if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1332 * A reference count is needed on the nfsnode representing the
1333 * remote root. If this object is not persistent, then backward
1334 * traversals of the mount point (i.e. "..") will not work if
1335 * the nfsnode gets flushed out of the cache. Ufs does not have
1336 * this problem, because one can identify root inodes by their
1337 * number == ROOTINO (2).
1339 if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1340 nmp->nm_dirpathlen > 0) {
1342 * If the fhsize on the mount point == 0 for V4, the mount
1343 * path needs to be looked up.
1347 error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1350 (void) nfs_catnap(PZERO, error, "nfsgetdirp");
1351 } while (error && --trycnt > 0);
1353 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1357 if (nmp->nm_fhsize > 0) {
1359 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1360 * non-zero for the root vnode. f_iosize will be set correctly
1361 * by nfs_statfs() before any I/O occurs.
1363 mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1364 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1371 * Get file attributes and transfer parameters for the
1372 * mountpoint. This has the side effect of filling in
1373 * (*vpp)->v_type with the correct value.
1375 ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1376 cred, td, &nfsva, NULL);
1379 * Just set default values to get things going.
1381 NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1382 nfsva.na_vattr.va_type = VDIR;
1383 nfsva.na_vattr.va_mode = 0777;
1384 nfsva.na_vattr.va_nlink = 100;
1385 nfsva.na_vattr.va_uid = (uid_t)0;
1386 nfsva.na_vattr.va_gid = (gid_t)0;
1387 nfsva.na_vattr.va_fileid = 2;
1388 nfsva.na_vattr.va_gen = 1;
1389 nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1390 nfsva.na_vattr.va_size = 512 * 1024;
1392 (void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1393 if (argp->flags & NFSMNT_NFSV3)
1394 ncl_fsinfo(nmp, *vpp, cred, td);
1396 /* Mark if the mount point supports NFSv4 ACLs. */
1397 if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1399 NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1401 mp->mnt_flag |= MNT_NFS4ACLS;
1406 * Lose the lock but keep the ref.
1408 NFSVOPUNLOCK(*vpp, 0);
1414 newnfs_disconnect(&nmp->nm_sockreq);
1415 crfree(nmp->nm_sockreq.nr_cred);
1416 mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1417 mtx_destroy(&nmp->nm_mtx);
1418 FREE(nmp, M_NEWNFSMNT);
1419 FREE(nam, M_SONAME);
1424 * unmount system call
1427 nfs_unmount(struct mount *mp, int mntflags)
1430 struct nfsmount *nmp;
1431 int error, flags = 0, i, trycnt = 0;
1435 if (mntflags & MNT_FORCE)
1436 flags |= FORCECLOSE;
1439 * Goes something like this..
1440 * - Call vflush() to clear out vnodes for this filesystem
1441 * - Close the socket
1442 * - Free up the data structures
1444 /* In the forced case, cancel any outstanding requests. */
1445 if (mntflags & MNT_FORCE) {
1446 error = newnfs_nmcancelreqs(nmp);
1449 /* For a forced close, get rid of the renew thread now */
1450 nfscl_umount(nmp, td);
1452 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1454 error = vflush(mp, 1, flags, td);
1455 if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1456 (void) nfs_catnap(PSOCK, error, "newndm");
1457 } while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1462 * We are now committed to the unmount.
1464 if ((mntflags & MNT_FORCE) == 0)
1465 nfscl_umount(nmp, td);
1466 /* Make sure no nfsiods are assigned to this mount. */
1467 mtx_lock(&ncl_iod_mutex);
1468 for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1469 if (ncl_iodmount[i] == nmp) {
1470 ncl_iodwant[i] = NFSIOD_AVAILABLE;
1471 ncl_iodmount[i] = NULL;
1473 mtx_unlock(&ncl_iod_mutex);
1474 newnfs_disconnect(&nmp->nm_sockreq);
1475 crfree(nmp->nm_sockreq.nr_cred);
1476 FREE(nmp->nm_nam, M_SONAME);
1478 mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1479 mtx_destroy(&nmp->nm_mtx);
1480 FREE(nmp, M_NEWNFSMNT);
1486 * Return root of a filesystem
1489 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1492 struct nfsmount *nmp;
1497 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1502 * Get transfer parameters and attributes for root vnode once.
1504 mtx_lock(&nmp->nm_mtx);
1505 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1506 mtx_unlock(&nmp->nm_mtx);
1507 ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1509 mtx_unlock(&nmp->nm_mtx);
1510 if (vp->v_type == VNON)
1512 vp->v_vflag |= VV_ROOT;
1518 * Flush out the buffer cache
1522 nfs_sync(struct mount *mp, int waitfor)
1524 struct vnode *vp, *mvp;
1526 int error, allerror = 0;
1532 * If a forced dismount is in progress, return from here so that
1533 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1534 * calling VFS_UNMOUNT().
1536 if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1543 * Force stale buffer cache information to be flushed.
1546 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1547 /* XXX Racy bv_cnt check. */
1548 if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1549 waitfor == MNT_LAZY) {
1553 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1554 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1557 error = VOP_FSYNC(vp, waitfor, td);
1560 NFSVOPUNLOCK(vp, 0);
1567 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1569 struct nfsmount *nmp = VFSTONFS(mp);
1573 bzero(&vq, sizeof(vq));
1576 case VFS_CTL_NOLOCKS:
1577 val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1578 if (req->oldptr != NULL) {
1579 error = SYSCTL_OUT(req, &val, sizeof(val));
1583 if (req->newptr != NULL) {
1584 error = SYSCTL_IN(req, &val, sizeof(val));
1588 nmp->nm_flag |= NFSMNT_NOLOCKS;
1590 nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1595 mtx_lock(&nmp->nm_mtx);
1596 if (nmp->nm_state & NFSSTA_TIMEO)
1597 vq.vq_flags |= VQ_NOTRESP;
1598 mtx_unlock(&nmp->nm_mtx);
1600 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1601 (nmp->nm_state & NFSSTA_LOCKTIMEO))
1602 vq.vq_flags |= VQ_NOTRESPLOCK;
1604 error = SYSCTL_OUT(req, &vq, sizeof(vq));
1607 if (req->oldptr != NULL) {
1608 error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1609 sizeof(nmp->nm_tprintf_initial_delay));
1613 if (req->newptr != NULL) {
1614 error = vfs_suser(mp, req->td);
1617 error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1618 sizeof(nmp->nm_tprintf_initial_delay));
1621 if (nmp->nm_tprintf_initial_delay < 0)
1622 nmp->nm_tprintf_initial_delay = 0;
1632 * Extract the information needed by the nlm from the nfs vnode.
1635 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1636 struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1637 struct timeval *timeop)
1639 struct nfsmount *nmp;
1640 struct nfsnode *np = VTONFS(vp);
1642 nmp = VFSTONFS(vp->v_mount);
1644 *fhlenp = (size_t)np->n_fhp->nfh_len;
1646 bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1648 bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1650 *is_v3p = NFS_ISV3(vp);
1652 *sizep = np->n_size;
1653 if (timeop != NULL) {
1654 timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1655 timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1660 * This function prints out an option name, based on the conditional
1663 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1664 char *opt, char **buf, size_t *blen)
1668 if (testval != 0 && *blen > strlen(opt)) {
1669 len = snprintf(*buf, *blen, "%s", opt);
1670 if (len != strlen(opt))
1678 * This function printf out an options integer value.
1680 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1681 char *opt, char **buf, size_t *blen)
1685 if (*blen > strlen(opt) + 1) {
1686 /* Could result in truncated output string. */
1687 len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1696 * Load the option flags and values into the buffer.
1698 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1705 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1707 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1709 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1710 "nfsv2", &buf, &blen);
1711 nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1712 nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1713 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1715 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1717 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1719 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1721 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1723 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1725 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1727 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1728 ",noncontigwr", &buf, &blen);
1729 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1730 0, ",lockd", &buf, &blen);
1731 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1732 NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1733 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1735 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1737 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1738 NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1739 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1740 NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1742 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1743 NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1745 nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1746 nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1747 nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1748 nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1749 nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1750 nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1752 nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1753 nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1754 nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1756 nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1757 nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1759 nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1760 nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);