2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * from nfs_vfsops.c 8.12 (Berkeley) 5/20/95
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
47 #include <sys/clock.h>
49 #include <sys/limits.h>
51 #include <sys/malloc.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
64 #include <vm/vm_extern.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
77 FEATURE(nfscl, "NFSv4 client");
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern struct nfsstats newnfsstats;
82 extern int nfsrv_useacl;
83 extern int nfscl_debuglevel;
84 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86 extern struct mtx ncl_iod_mutex;
89 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
90 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
92 SYSCTL_DECL(_vfs_nfs);
93 static int nfs_ip_paranoia = 1;
94 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95 &nfs_ip_paranoia, 0, "");
96 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99 /* how long between console messages "nfs server foo not responding" */
100 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
104 static int nfs_mountroot(struct mount *);
105 static void nfs_sec_name(char *, int *);
106 static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
107 struct nfs_args *argp, const char *, struct ucred *,
109 static int mountnfs(struct nfs_args *, struct mount *,
110 struct sockaddr *, char *, u_char *, int, u_char *, int,
111 u_char *, int, struct vnode **, struct ucred *,
112 struct thread *, int, int, int);
113 static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
114 struct sockaddr_storage *, int *, off_t *,
116 static vfs_mount_t nfs_mount;
117 static vfs_cmount_t nfs_cmount;
118 static vfs_unmount_t nfs_unmount;
119 static vfs_root_t nfs_root;
120 static vfs_statfs_t nfs_statfs;
121 static vfs_sync_t nfs_sync;
122 static vfs_sysctl_t nfs_sysctl;
123 static vfs_purge_t nfs_purge;
126 * nfs vfs operations.
128 static struct vfsops nfs_vfsops = {
129 .vfs_init = ncl_init,
130 .vfs_mount = nfs_mount,
131 .vfs_cmount = nfs_cmount,
132 .vfs_root = nfs_root,
133 .vfs_statfs = nfs_statfs,
134 .vfs_sync = nfs_sync,
135 .vfs_uninit = ncl_uninit,
136 .vfs_unmount = nfs_unmount,
137 .vfs_sysctl = nfs_sysctl,
138 .vfs_purge = nfs_purge,
140 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
142 /* So that loader and kldload(2) can find us, wherever we are.. */
143 MODULE_VERSION(nfs, 1);
144 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
145 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
146 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
147 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
150 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
151 * can be shared by both NFS clients. It is declared here so that it
152 * will be defined for kernels built without NFS_ROOT, although it
153 * isn't used in that case.
155 #if !defined(NFS_ROOT) && !defined(NFSCLIENT)
156 struct nfs_diskless nfs_diskless = { { { 0 } } };
157 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
158 int nfs_diskless_valid = 0;
161 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
162 &nfs_diskless_valid, 0,
163 "Has the diskless struct been filled correctly");
165 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
166 nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
168 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
169 &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
170 "%Ssockaddr_in", "Diskless root nfs address");
173 void newnfsargs_ntoh(struct nfs_args *);
174 static int nfs_mountdiskless(char *,
175 struct sockaddr_in *, struct nfs_args *,
176 struct thread *, struct vnode **, struct mount *);
177 static void nfs_convert_diskless(void);
178 static void nfs_convert_oargs(struct nfs_args *args,
179 struct onfs_args *oargs);
182 newnfs_iosize(struct nfsmount *nmp)
186 /* First, set the upper limit for iosize */
187 if (nmp->nm_flag & NFSMNT_NFSV4) {
188 maxio = NFS_MAXBSIZE;
189 } else if (nmp->nm_flag & NFSMNT_NFSV3) {
190 if (nmp->nm_sotype == SOCK_DGRAM)
191 maxio = NFS_MAXDGRAMDATA;
193 maxio = NFS_MAXBSIZE;
195 maxio = NFS_V2MAXDATA;
197 if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
198 nmp->nm_rsize = maxio;
199 if (nmp->nm_rsize > MAXBSIZE)
200 nmp->nm_rsize = MAXBSIZE;
201 if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
202 nmp->nm_readdirsize = maxio;
203 if (nmp->nm_readdirsize > nmp->nm_rsize)
204 nmp->nm_readdirsize = nmp->nm_rsize;
205 if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
206 nmp->nm_wsize = maxio;
207 if (nmp->nm_wsize > MAXBSIZE)
208 nmp->nm_wsize = MAXBSIZE;
211 * Calculate the size used for io buffers. Use the larger
212 * of the two sizes to minimise nfs requests but make sure
213 * that it is at least one VM page to avoid wasting buffer
216 iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
217 iosize = imax(iosize, PAGE_SIZE);
218 nmp->nm_mountp->mnt_stat.f_iosize = iosize;
223 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
226 args->version = NFS_ARGSVERSION;
227 args->addr = oargs->addr;
228 args->addrlen = oargs->addrlen;
229 args->sotype = oargs->sotype;
230 args->proto = oargs->proto;
231 args->fh = oargs->fh;
232 args->fhsize = oargs->fhsize;
233 args->flags = oargs->flags;
234 args->wsize = oargs->wsize;
235 args->rsize = oargs->rsize;
236 args->readdirsize = oargs->readdirsize;
237 args->timeo = oargs->timeo;
238 args->retrans = oargs->retrans;
239 args->readahead = oargs->readahead;
240 args->hostname = oargs->hostname;
244 nfs_convert_diskless(void)
247 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
248 sizeof(struct ifaliasreq));
249 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
250 sizeof(struct sockaddr_in));
251 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
252 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
253 nfsv3_diskless.root_fhsize = NFSX_MYFH;
254 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
256 nfsv3_diskless.root_fhsize = NFSX_V2FH;
257 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
259 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
260 sizeof(struct sockaddr_in));
261 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
262 nfsv3_diskless.root_time = nfs_diskless.root_time;
263 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
265 nfs_diskless_valid = 3;
272 nfs_statfs(struct mount *mp, struct statfs *sbp)
276 struct nfsmount *nmp = VFSTONFS(mp);
277 struct nfsvattr nfsva;
280 int error = 0, attrflag, gotfsinfo = 0, ret;
285 error = vfs_busy(mp, MBF_NOWAIT);
288 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
294 mtx_lock(&nmp->nm_mtx);
295 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
296 mtx_unlock(&nmp->nm_mtx);
297 error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
302 mtx_unlock(&nmp->nm_mtx);
304 error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
307 NFSCL_DEBUG(2, "statfs=%d\n", error);
309 ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
310 td->td_ucred, td, &nfsva, NULL, NULL);
313 * Just set default values to get things going.
315 NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
316 nfsva.na_vattr.va_type = VDIR;
317 nfsva.na_vattr.va_mode = 0777;
318 nfsva.na_vattr.va_nlink = 100;
319 nfsva.na_vattr.va_uid = (uid_t)0;
320 nfsva.na_vattr.va_gid = (gid_t)0;
321 nfsva.na_vattr.va_fileid = 2;
322 nfsva.na_vattr.va_gen = 1;
323 nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
324 nfsva.na_vattr.va_size = 512 * 1024;
327 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
329 mtx_lock(&nmp->nm_mtx);
330 if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
331 nfscl_loadfsinfo(nmp, &fs);
332 nfscl_loadsbinfo(nmp, &sb, sbp);
333 sbp->f_iosize = newnfs_iosize(nmp);
334 mtx_unlock(&nmp->nm_mtx);
335 if (sbp != &mp->mnt_stat) {
336 bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
337 bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
339 strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
340 } else if (NFS_ISV4(vp)) {
341 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
349 * nfs version 3 fsinfo rpc call
352 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
356 struct nfsvattr nfsva;
359 error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
362 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
364 mtx_lock(&nmp->nm_mtx);
365 nfscl_loadfsinfo(nmp, &fs);
366 mtx_unlock(&nmp->nm_mtx);
372 * Mount a remote root fs via. nfs. This depends on the info in the
373 * nfs_diskless structure that has been filled in properly by some primary
375 * It goes something like this:
376 * - do enough of "ifconfig" by calling ifioctl() so that the system
377 * can talk to the server
378 * - If nfs_diskless.mygateway is filled in, use that address as
380 * - build the rootfs mount point and call mountnfs() to do the rest.
382 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
383 * structure, as well as other global NFS client variables here, as
384 * nfs_mountroot() will be called once in the boot before any other NFS
385 * client activity occurs.
388 nfs_mountroot(struct mount *mp)
390 struct thread *td = curthread;
391 struct nfsv3_diskless *nd = &nfsv3_diskless;
400 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
401 bootpc_init(); /* use bootp to get nfs_diskless filled in */
402 #elif defined(NFS_ROOT)
403 nfs_setup_diskless();
406 if (nfs_diskless_valid == 0)
408 if (nfs_diskless_valid == 1)
409 nfs_convert_diskless();
412 * XXX splnet, so networks will receive...
417 * Do enough of ifconfig(8) so that the critical net interface can
418 * talk to the server.
420 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
423 panic("nfs_mountroot: socreate(%04x): %d",
424 nd->myif.ifra_addr.sa_family, error);
426 #if 0 /* XXX Bad idea */
428 * We might not have been told the right interface, so we pass
429 * over the first ten interfaces of the same kind, until we get
430 * one of them configured.
433 for (i = strlen(nd->myif.ifra_name) - 1;
434 nd->myif.ifra_name[i] >= '0' &&
435 nd->myif.ifra_name[i] <= '9';
436 nd->myif.ifra_name[i] ++) {
437 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
442 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
444 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
445 if ((cp = getenv("boot.netif.mtu")) != NULL) {
446 ir.ifr_mtu = strtol(cp, NULL, 10);
447 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
449 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
451 printf("nfs_mountroot: SIOCSIFMTU: %d", error);
456 * If the gateway field is filled in, set it as the default route.
457 * Note that pxeboot will set a default route of 0 if the route
458 * is not set by the DHCP server. Check also for a value of 0
459 * to avoid panicking inappropriately in that situation.
461 if (nd->mygateway.sin_len != 0 &&
462 nd->mygateway.sin_addr.s_addr != 0) {
463 struct sockaddr_in mask, sin;
465 bzero((caddr_t)&mask, sizeof(mask));
467 sin.sin_family = AF_INET;
468 sin.sin_len = sizeof(sin);
469 /* XXX MRT use table 0 for this sort of thing */
470 CURVNET_SET(TD_TO_VNET(td));
471 error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
472 (struct sockaddr *)&nd->mygateway,
473 (struct sockaddr *)&mask,
474 RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
477 panic("nfs_mountroot: RTM_ADD: %d", error);
481 * Create the rootfs mount point.
483 nd->root_args.fh = nd->root_fh;
484 nd->root_args.fhsize = nd->root_fhsize;
485 l = ntohl(nd->root_saddr.sin_addr.s_addr);
486 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
487 (l >> 24) & 0xff, (l >> 16) & 0xff,
488 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
489 printf("NFS ROOT: %s\n", buf);
490 nd->root_args.hostname = buf;
491 if ((error = nfs_mountdiskless(buf,
492 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
497 * This is not really an nfs issue, but it is much easier to
498 * set hostname here and then let the "/etc/rc.xxx" files
499 * mount the right /var based upon its preset value.
501 mtx_lock(&prison0.pr_mtx);
502 strlcpy(prison0.pr_hostname, nd->my_hostnam,
503 sizeof(prison0.pr_hostname));
504 mtx_unlock(&prison0.pr_mtx);
505 inittodr(ntohl(nd->root_time));
510 * Internal version of mount system call for diskless setup.
513 nfs_mountdiskless(char *path,
514 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
515 struct vnode **vpp, struct mount *mp)
517 struct sockaddr *nam;
522 * Find the directory path in "path", which also has the server's
523 * name/ip address in it.
525 dirpath = strchr(path, ':');
527 dirlen = strlen(++dirpath);
530 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
531 if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
532 NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
533 NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
534 printf("nfs_mountroot: mount %s on /: %d\n", path, error);
541 nfs_sec_name(char *sec, int *flagsp)
543 if (!strcmp(sec, "krb5"))
544 *flagsp |= NFSMNT_KERB;
545 else if (!strcmp(sec, "krb5i"))
546 *flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
547 else if (!strcmp(sec, "krb5p"))
548 *flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
552 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
553 const char *hostname, struct ucred *cred, struct thread *td)
562 * Set read-only flag if requested; otherwise, clear it if this is
563 * an update. If this is not an update, then either the read-only
564 * flag is already clear, or this is a root mount and it was set
565 * intentionally at some previous point.
567 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
569 mp->mnt_flag |= MNT_RDONLY;
571 } else if (mp->mnt_flag & MNT_UPDATE) {
573 mp->mnt_flag &= ~MNT_RDONLY;
578 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
579 * no sense in that context. Also, set up appropriate retransmit
580 * and soft timeout behavior.
582 if (argp->sotype == SOCK_STREAM) {
583 nmp->nm_flag &= ~NFSMNT_NOCONN;
584 nmp->nm_timeo = NFS_MAXTIMEO;
585 if ((argp->flags & NFSMNT_NFSV4) != 0)
586 nmp->nm_retry = INT_MAX;
588 nmp->nm_retry = NFS_RETRANS_TCP;
591 /* Also clear RDIRPLUS if NFSv2, it crashes some servers */
592 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
593 argp->flags &= ~NFSMNT_RDIRPLUS;
594 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
597 /* Re-bind if rsrvd port requested and wasn't on one */
598 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
599 && (argp->flags & NFSMNT_RESVPORT);
600 /* Also re-bind if we're switching to/from a connected UDP socket */
601 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
602 (argp->flags & NFSMNT_NOCONN));
604 /* Update flags atomically. Don't change the lock bits. */
605 nmp->nm_flag = argp->flags | nmp->nm_flag;
608 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
609 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
610 if (nmp->nm_timeo < NFS_MINTIMEO)
611 nmp->nm_timeo = NFS_MINTIMEO;
612 else if (nmp->nm_timeo > NFS_MAXTIMEO)
613 nmp->nm_timeo = NFS_MAXTIMEO;
616 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
617 nmp->nm_retry = argp->retrans;
618 if (nmp->nm_retry > NFS_MAXREXMIT)
619 nmp->nm_retry = NFS_MAXREXMIT;
622 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
623 nmp->nm_wsize = argp->wsize;
625 * Clip at the power of 2 below the size. There is an
626 * issue (not isolated) that causes intermittent page
627 * faults if this is not done.
629 if (nmp->nm_wsize > NFS_FABLKSIZE)
630 nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
632 nmp->nm_wsize = NFS_FABLKSIZE;
635 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
636 nmp->nm_rsize = argp->rsize;
638 * Clip at the power of 2 below the size. There is an
639 * issue (not isolated) that causes intermittent page
640 * faults if this is not done.
642 if (nmp->nm_rsize > NFS_FABLKSIZE)
643 nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
645 nmp->nm_rsize = NFS_FABLKSIZE;
648 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
649 nmp->nm_readdirsize = argp->readdirsize;
652 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
653 nmp->nm_acregmin = argp->acregmin;
655 nmp->nm_acregmin = NFS_MINATTRTIMO;
656 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
657 nmp->nm_acregmax = argp->acregmax;
659 nmp->nm_acregmax = NFS_MAXATTRTIMO;
660 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
661 nmp->nm_acdirmin = argp->acdirmin;
663 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
664 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
665 nmp->nm_acdirmax = argp->acdirmax;
667 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
668 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
669 nmp->nm_acdirmin = nmp->nm_acdirmax;
670 if (nmp->nm_acregmin > nmp->nm_acregmax)
671 nmp->nm_acregmin = nmp->nm_acregmax;
673 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
674 if (argp->readahead <= NFS_MAXRAHEAD)
675 nmp->nm_readahead = argp->readahead;
677 nmp->nm_readahead = NFS_MAXRAHEAD;
679 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
680 if (argp->wcommitsize < nmp->nm_wsize)
681 nmp->nm_wcommitsize = nmp->nm_wsize;
683 nmp->nm_wcommitsize = argp->wcommitsize;
686 adjsock |= ((nmp->nm_sotype != argp->sotype) ||
687 (nmp->nm_soproto != argp->proto));
689 if (nmp->nm_client != NULL && adjsock) {
690 int haslock = 0, error = 0;
692 if (nmp->nm_sotype == SOCK_STREAM) {
693 error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
698 newnfs_disconnect(&nmp->nm_sockreq);
700 newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
701 nmp->nm_sotype = argp->sotype;
702 nmp->nm_soproto = argp->proto;
703 if (nmp->nm_sotype == SOCK_DGRAM)
704 while (newnfs_connect(nmp, &nmp->nm_sockreq,
706 printf("newnfs_args: retrying connect\n");
707 (void) nfs_catnap(PSOCK, 0, "newnfscon");
711 nmp->nm_sotype = argp->sotype;
712 nmp->nm_soproto = argp->proto;
715 if (hostname != NULL) {
716 strlcpy(nmp->nm_hostname, hostname,
717 sizeof(nmp->nm_hostname));
718 p = strchr(nmp->nm_hostname, ':');
724 static const char *nfs_opts[] = { "from", "nfs_args",
725 "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
726 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
727 "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
728 "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
729 "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
730 "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
731 "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
732 "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
733 "pnfs", "wcommitsize",
740 * It seems a bit dumb to copyinstr() the host and path here and then
741 * bcopy() them in mountnfs(), but I wanted to detect errors before
742 * doing the sockargs() call because sockargs() allocates an mbuf and
743 * an error after that means that I have to release the mbuf.
747 nfs_mount(struct mount *mp)
749 struct nfs_args args = {
750 .version = NFS_ARGSVERSION,
752 .addrlen = sizeof (struct sockaddr_in),
753 .sotype = SOCK_STREAM,
757 .flags = NFSMNT_RESVPORT,
760 .readdirsize = NFS_READDIRSIZE,
762 .retrans = NFS_RETRANS,
763 .readahead = NFS_DEFRAHEAD,
764 .wcommitsize = 0, /* was: NQ_DEFLEASE */
766 .acregmin = NFS_MINATTRTIMO,
767 .acregmax = NFS_MAXATTRTIMO,
768 .acdirmin = NFS_MINDIRATTRTIMO,
769 .acdirmax = NFS_MAXDIRATTRTIMO,
771 int error = 0, ret, len;
772 struct sockaddr *nam = NULL;
776 u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
777 char *opt, *name, *secname;
778 int nametimeo = NFS_DEFAULT_NAMETIMEO;
779 int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
781 int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
784 has_nfs_args_opt = 0;
785 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
791 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
792 error = nfs_mountroot(mp);
799 * The old mount_nfs program passed the struct nfs_args
800 * from userspace to kernel. The new mount_nfs program
801 * passes string options via nmount() from userspace to kernel
802 * and we populate the struct nfs_args in the kernel.
804 if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
805 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
810 if (args.version != NFS_ARGSVERSION) {
811 error = EPROGMISMATCH;
814 has_nfs_args_opt = 1;
817 /* Handle the new style options. */
818 if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
819 args.acdirmin = args.acdirmax =
820 args.acregmin = args.acregmax = 0;
821 args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
822 NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
824 if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
825 args.flags |= NFSMNT_NOCONN;
826 if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
827 args.flags &= ~NFSMNT_NOCONN;
828 if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
829 args.flags |= NFSMNT_NOLOCKD;
830 if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
831 args.flags &= ~NFSMNT_NOLOCKD;
832 if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
833 args.flags |= NFSMNT_INT;
834 if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
835 args.flags |= NFSMNT_RDIRPLUS;
836 if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
837 args.flags |= NFSMNT_RESVPORT;
838 if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
839 args.flags &= ~NFSMNT_RESVPORT;
840 if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
841 args.flags |= NFSMNT_SOFT;
842 if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
843 args.flags &= ~NFSMNT_SOFT;
844 if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
845 args.sotype = SOCK_DGRAM;
846 if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
847 args.sotype = SOCK_DGRAM;
848 if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
849 args.sotype = SOCK_STREAM;
850 if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
851 args.flags |= NFSMNT_NFSV3;
852 if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
853 args.flags |= NFSMNT_NFSV4;
854 args.sotype = SOCK_STREAM;
856 if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
857 args.flags |= NFSMNT_ALLGSSNAME;
858 if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
859 args.flags |= NFSMNT_NOCTO;
860 if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
861 args.flags |= NFSMNT_NONCONTIGWR;
862 if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
863 args.flags |= NFSMNT_PNFS;
864 if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
866 vfs_mount_error(mp, "illegal readdirsize");
870 ret = sscanf(opt, "%d", &args.readdirsize);
871 if (ret != 1 || args.readdirsize <= 0) {
872 vfs_mount_error(mp, "illegal readdirsize: %s",
877 args.flags |= NFSMNT_READDIRSIZE;
879 if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
881 vfs_mount_error(mp, "illegal readahead");
885 ret = sscanf(opt, "%d", &args.readahead);
886 if (ret != 1 || args.readahead <= 0) {
887 vfs_mount_error(mp, "illegal readahead: %s",
892 args.flags |= NFSMNT_READAHEAD;
894 if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
896 vfs_mount_error(mp, "illegal wsize");
900 ret = sscanf(opt, "%d", &args.wsize);
901 if (ret != 1 || args.wsize <= 0) {
902 vfs_mount_error(mp, "illegal wsize: %s",
907 args.flags |= NFSMNT_WSIZE;
909 if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
911 vfs_mount_error(mp, "illegal rsize");
915 ret = sscanf(opt, "%d", &args.rsize);
916 if (ret != 1 || args.rsize <= 0) {
917 vfs_mount_error(mp, "illegal wsize: %s",
922 args.flags |= NFSMNT_RSIZE;
924 if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
926 vfs_mount_error(mp, "illegal retrans");
930 ret = sscanf(opt, "%d", &args.retrans);
931 if (ret != 1 || args.retrans <= 0) {
932 vfs_mount_error(mp, "illegal retrans: %s",
937 args.flags |= NFSMNT_RETRANS;
939 if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
940 ret = sscanf(opt, "%d", &args.acregmin);
941 if (ret != 1 || args.acregmin < 0) {
942 vfs_mount_error(mp, "illegal actimeo: %s",
947 args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
948 args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
949 NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
951 if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
952 ret = sscanf(opt, "%d", &args.acregmin);
953 if (ret != 1 || args.acregmin < 0) {
954 vfs_mount_error(mp, "illegal acregmin: %s",
959 args.flags |= NFSMNT_ACREGMIN;
961 if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
962 ret = sscanf(opt, "%d", &args.acregmax);
963 if (ret != 1 || args.acregmax < 0) {
964 vfs_mount_error(mp, "illegal acregmax: %s",
969 args.flags |= NFSMNT_ACREGMAX;
971 if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
972 ret = sscanf(opt, "%d", &args.acdirmin);
973 if (ret != 1 || args.acdirmin < 0) {
974 vfs_mount_error(mp, "illegal acdirmin: %s",
979 args.flags |= NFSMNT_ACDIRMIN;
981 if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
982 ret = sscanf(opt, "%d", &args.acdirmax);
983 if (ret != 1 || args.acdirmax < 0) {
984 vfs_mount_error(mp, "illegal acdirmax: %s",
989 args.flags |= NFSMNT_ACDIRMAX;
991 if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
992 ret = sscanf(opt, "%d", &args.wcommitsize);
993 if (ret != 1 || args.wcommitsize < 0) {
994 vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
998 args.flags |= NFSMNT_WCOMMITSIZE;
1000 if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1001 ret = sscanf(opt, "%d", &args.timeo);
1002 if (ret != 1 || args.timeo <= 0) {
1003 vfs_mount_error(mp, "illegal timeo: %s",
1008 args.flags |= NFSMNT_TIMEO;
1010 if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1011 ret = sscanf(opt, "%d", &args.timeo);
1012 if (ret != 1 || args.timeo <= 0) {
1013 vfs_mount_error(mp, "illegal timeout: %s",
1018 args.flags |= NFSMNT_TIMEO;
1020 if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1021 ret = sscanf(opt, "%d", &nametimeo);
1022 if (ret != 1 || nametimeo < 0) {
1023 vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1028 if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1030 ret = sscanf(opt, "%d", &negnametimeo);
1031 if (ret != 1 || negnametimeo < 0) {
1032 vfs_mount_error(mp, "illegal negnametimeo: %s",
1038 if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1040 ret = sscanf(opt, "%d", &minvers);
1041 if (ret != 1 || minvers < 0 || minvers > 1 ||
1042 (args.flags & NFSMNT_NFSV4) == 0) {
1043 vfs_mount_error(mp, "illegal minorversion: %s", opt);
1048 if (vfs_getopt(mp->mnt_optnew, "sec",
1049 (void **) &secname, NULL) == 0)
1050 nfs_sec_name(secname, &args.flags);
1052 if (mp->mnt_flag & MNT_UPDATE) {
1053 struct nfsmount *nmp = VFSTONFS(mp);
1061 * If a change from TCP->UDP is done and there are thread(s)
1062 * that have I/O RPC(s) in progress with a tranfer size
1063 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1064 * hung, retrying the RPC(s) forever. Usually these threads
1065 * will be seen doing an uninterruptible sleep on wait channel
1066 * "newnfsreq" (truncated to "newnfsre" by procstat).
1068 if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1069 tprintf(td->td_proc, LOG_WARNING,
1070 "Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1073 * When doing an update, we can't change version,
1074 * security, switch lockd strategies or change cookie
1077 args.flags = (args.flags &
1083 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1090 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1091 nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1096 * Make the nfs_ip_paranoia sysctl serve as the default connection
1097 * or no-connection mode for those protocols that support
1098 * no-connection mode (the flag will be cleared later for protocols
1099 * that do not support no-connection mode). This will allow a client
1100 * to receive replies from a different IP then the request was
1101 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
1104 if (nfs_ip_paranoia == 0)
1105 args.flags |= NFSMNT_NOCONN;
1107 if (has_nfs_args_opt != 0) {
1109 * In the 'nfs_args' case, the pointers in the args
1110 * structure are in userland - we copy them in here.
1112 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1113 vfs_mount_error(mp, "Bad file handle");
1117 error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1121 error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1124 bzero(&hst[hstlen], MNAMELEN - hstlen);
1125 args.hostname = hst;
1126 /* sockargs() call must be after above copyin() calls */
1127 error = getsockaddr(&nam, (caddr_t)args.addr,
1132 if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1133 &args.fhsize) == 0) {
1134 if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1135 vfs_mount_error(mp, "Bad file handle");
1139 bcopy(args.fh, nfh, args.fhsize);
1143 (void) vfs_getopt(mp->mnt_optnew, "hostname",
1144 (void **)&args.hostname, &len);
1145 if (args.hostname == NULL) {
1146 vfs_mount_error(mp, "Invalid hostname");
1150 bcopy(args.hostname, hst, MNAMELEN);
1151 hst[MNAMELEN - 1] = '\0';
1154 if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1155 strlcpy(srvkrbname, name, sizeof (srvkrbname));
1157 snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1158 srvkrbnamelen = strlen(srvkrbname);
1160 if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1161 strlcpy(krbname, name, sizeof (krbname));
1164 krbnamelen = strlen(krbname);
1166 if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1167 strlcpy(dirpath, name, sizeof (dirpath));
1170 dirlen = strlen(dirpath);
1172 if (has_nfs_args_opt == 0) {
1173 if (vfs_getopt(mp->mnt_optnew, "addr",
1174 (void **)&args.addr, &args.addrlen) == 0) {
1175 if (args.addrlen > SOCK_MAXADDRLEN) {
1176 error = ENAMETOOLONG;
1179 nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1180 bcopy(args.addr, nam, args.addrlen);
1181 nam->sa_len = args.addrlen;
1183 vfs_mount_error(mp, "No server address");
1190 error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1191 dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1192 nametimeo, negnametimeo, minvers);
1196 mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1208 * It seems a bit dumb to copyinstr() the host and path here and then
1209 * bcopy() them in mountnfs(), but I wanted to detect errors before
1210 * doing the sockargs() call because sockargs() allocates an mbuf and
1211 * an error after that means that I have to release the mbuf.
1215 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1218 struct nfs_args args;
1220 error = copyin(data, &args, sizeof (struct nfs_args));
1224 ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1226 error = kernel_mount(ma, flags);
1231 * Common code for mount and mountroot
1234 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1235 char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1236 u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1237 struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1240 struct nfsmount *nmp;
1242 int error, trycnt, ret;
1243 struct nfsvattr nfsva;
1244 struct nfsclclient *clp;
1245 struct nfsclds *dsp, *tdsp;
1247 static u_int64_t clval = 0;
1249 NFSCL_DEBUG(3, "in mnt\n");
1251 if (mp->mnt_flag & MNT_UPDATE) {
1253 printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1254 FREE(nam, M_SONAME);
1257 MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1258 krbnamelen + dirlen + srvkrbnamelen + 2,
1259 M_NEWNFSMNT, M_WAITOK | M_ZERO);
1260 TAILQ_INIT(&nmp->nm_bufq);
1262 clval = (u_int64_t)nfsboottime.tv_sec;
1263 nmp->nm_clval = clval++;
1264 nmp->nm_krbnamelen = krbnamelen;
1265 nmp->nm_dirpathlen = dirlen;
1266 nmp->nm_srvkrbnamelen = srvkrbnamelen;
1267 if (td->td_ucred->cr_uid != (uid_t)0) {
1269 * nm_uid is used to get KerberosV credentials for
1270 * the nfsv4 state handling operations if there is
1271 * no host based principal set. Use the uid of
1272 * this user if not root, since they are doing the
1273 * mount. I don't think setting this for root will
1274 * work, since root normally does not have user
1275 * credentials in a credentials cache.
1277 nmp->nm_uid = td->td_ucred->cr_uid;
1280 * Just set to -1, so it won't be used.
1282 nmp->nm_uid = (uid_t)-1;
1285 /* Copy and null terminate all the names */
1286 if (nmp->nm_krbnamelen > 0) {
1287 bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1288 nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1290 if (nmp->nm_dirpathlen > 0) {
1291 bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1292 nmp->nm_dirpathlen);
1293 nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1296 if (nmp->nm_srvkrbnamelen > 0) {
1297 bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1298 nmp->nm_srvkrbnamelen);
1299 nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1300 + nmp->nm_srvkrbnamelen + 2] = '\0';
1302 nmp->nm_sockreq.nr_cred = crhold(cred);
1303 mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1305 nmp->nm_getinfo = nfs_getnlminfo;
1306 nmp->nm_vinvalbuf = ncl_vinvalbuf;
1309 nmp->nm_mountp = mp;
1310 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1313 * Since nfs_decode_args() might optionally set them, these
1314 * need to be set to defaults before the call, so that the
1315 * optional settings aren't overwritten.
1317 nmp->nm_nametimeo = nametimeo;
1318 nmp->nm_negnametimeo = negnametimeo;
1319 nmp->nm_timeo = NFS_TIMEO;
1320 nmp->nm_retry = NFS_RETRANS;
1321 nmp->nm_readahead = NFS_DEFRAHEAD;
1322 if (desiredvnodes >= 11000)
1323 nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1325 nmp->nm_wcommitsize = hibufspace / 10;
1326 if ((argp->flags & NFSMNT_NFSV4) != 0)
1327 nmp->nm_minorvers = minvers;
1329 nmp->nm_minorvers = 0;
1331 nfs_decode_args(mp, nmp, argp, hst, cred, td);
1334 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
1335 * high, depending on whether we end up with negative offsets in
1336 * the client or server somewhere. 2GB-1 may be safer.
1338 * For V3, ncl_fsinfo will adjust this as necessary. Assume maximum
1339 * that we can handle until we find out otherwise.
1341 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1342 nmp->nm_maxfilesize = 0xffffffffLL;
1344 nmp->nm_maxfilesize = OFF_MAX;
1346 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1347 nmp->nm_wsize = NFS_WSIZE;
1348 nmp->nm_rsize = NFS_RSIZE;
1349 nmp->nm_readdirsize = NFS_READDIRSIZE;
1351 nmp->nm_numgrps = NFS_MAXGRPS;
1352 nmp->nm_tprintf_delay = nfs_tprintf_delay;
1353 if (nmp->nm_tprintf_delay < 0)
1354 nmp->nm_tprintf_delay = 0;
1355 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1356 if (nmp->nm_tprintf_initial_delay < 0)
1357 nmp->nm_tprintf_initial_delay = 0;
1358 nmp->nm_fhsize = argp->fhsize;
1359 if (nmp->nm_fhsize > 0)
1360 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1361 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1363 /* Set up the sockets and per-host congestion */
1364 nmp->nm_sotype = argp->sotype;
1365 nmp->nm_soproto = argp->proto;
1366 nmp->nm_sockreq.nr_prog = NFS_PROG;
1367 if ((argp->flags & NFSMNT_NFSV4))
1368 nmp->nm_sockreq.nr_vers = NFS_VER4;
1369 else if ((argp->flags & NFSMNT_NFSV3))
1370 nmp->nm_sockreq.nr_vers = NFS_VER3;
1372 nmp->nm_sockreq.nr_vers = NFS_VER2;
1375 if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1377 /* For NFSv4.1, get the clientid now. */
1378 if (nmp->nm_minorvers > 0) {
1379 NFSCL_DEBUG(3, "at getcl\n");
1380 error = nfscl_getcl(mp, cred, td, 0, &clp);
1381 NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1386 if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1387 nmp->nm_dirpathlen > 0) {
1388 NFSCL_DEBUG(3, "in dirp\n");
1390 * If the fhsize on the mount point == 0 for V4, the mount
1391 * path needs to be looked up.
1395 error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1397 NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1399 (void) nfs_catnap(PZERO, error, "nfsgetdirp");
1400 } while (error && --trycnt > 0);
1402 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1408 * A reference count is needed on the nfsnode representing the
1409 * remote root. If this object is not persistent, then backward
1410 * traversals of the mount point (i.e. "..") will not work if
1411 * the nfsnode gets flushed out of the cache. Ufs does not have
1412 * this problem, because one can identify root inodes by their
1413 * number == ROOTINO (2).
1415 if (nmp->nm_fhsize > 0) {
1417 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1418 * non-zero for the root vnode. f_iosize will be set correctly
1419 * by nfs_statfs() before any I/O occurs.
1421 mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1422 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1429 * Get file attributes and transfer parameters for the
1430 * mountpoint. This has the side effect of filling in
1431 * (*vpp)->v_type with the correct value.
1433 ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1434 cred, td, &nfsva, NULL, &lease);
1437 * Just set default values to get things going.
1439 NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1440 nfsva.na_vattr.va_type = VDIR;
1441 nfsva.na_vattr.va_mode = 0777;
1442 nfsva.na_vattr.va_nlink = 100;
1443 nfsva.na_vattr.va_uid = (uid_t)0;
1444 nfsva.na_vattr.va_gid = (gid_t)0;
1445 nfsva.na_vattr.va_fileid = 2;
1446 nfsva.na_vattr.va_gen = 1;
1447 nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1448 nfsva.na_vattr.va_size = 512 * 1024;
1451 (void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1452 if (nmp->nm_minorvers > 0) {
1453 NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1455 clp->nfsc_renew = NFSCL_RENEW(lease);
1456 clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1457 clp->nfsc_clientidrev++;
1458 if (clp->nfsc_clientidrev == 0)
1459 clp->nfsc_clientidrev++;
1462 * Mount will succeed, so the renew thread can be
1465 nfscl_start_renewthread(clp);
1466 nfscl_clientrelease(clp);
1468 if (argp->flags & NFSMNT_NFSV3)
1469 ncl_fsinfo(nmp, *vpp, cred, td);
1471 /* Mark if the mount point supports NFSv4 ACLs. */
1472 if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1474 NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1476 mp->mnt_flag |= MNT_NFS4ACLS;
1481 * Lose the lock but keep the ref.
1483 NFSVOPUNLOCK(*vpp, 0);
1490 nfscl_clientrelease(clp);
1491 newnfs_disconnect(&nmp->nm_sockreq);
1492 crfree(nmp->nm_sockreq.nr_cred);
1493 if (nmp->nm_sockreq.nr_auth != NULL)
1494 AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1495 mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1496 mtx_destroy(&nmp->nm_mtx);
1497 if (nmp->nm_clp != NULL) {
1499 LIST_REMOVE(nmp->nm_clp, nfsc_list);
1501 free(nmp->nm_clp, M_NFSCLCLIENT);
1503 TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1504 nfscl_freenfsclds(dsp);
1505 FREE(nmp, M_NEWNFSMNT);
1506 FREE(nam, M_SONAME);
1511 * unmount system call
1514 nfs_unmount(struct mount *mp, int mntflags)
1517 struct nfsmount *nmp;
1518 int error, flags = 0, i, trycnt = 0;
1519 struct nfsclds *dsp, *tdsp;
1523 if (mntflags & MNT_FORCE)
1524 flags |= FORCECLOSE;
1527 * Goes something like this..
1528 * - Call vflush() to clear out vnodes for this filesystem
1529 * - Close the socket
1530 * - Free up the data structures
1532 /* In the forced case, cancel any outstanding requests. */
1533 if (mntflags & MNT_FORCE) {
1534 error = newnfs_nmcancelreqs(nmp);
1537 /* For a forced close, get rid of the renew thread now */
1538 nfscl_umount(nmp, td);
1540 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1542 error = vflush(mp, 1, flags, td);
1543 if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1544 (void) nfs_catnap(PSOCK, error, "newndm");
1545 } while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1550 * We are now committed to the unmount.
1552 if ((mntflags & MNT_FORCE) == 0)
1553 nfscl_umount(nmp, td);
1554 /* Make sure no nfsiods are assigned to this mount. */
1555 mtx_lock(&ncl_iod_mutex);
1556 for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1557 if (ncl_iodmount[i] == nmp) {
1558 ncl_iodwant[i] = NFSIOD_AVAILABLE;
1559 ncl_iodmount[i] = NULL;
1561 mtx_unlock(&ncl_iod_mutex);
1562 newnfs_disconnect(&nmp->nm_sockreq);
1563 crfree(nmp->nm_sockreq.nr_cred);
1564 FREE(nmp->nm_nam, M_SONAME);
1565 if (nmp->nm_sockreq.nr_auth != NULL)
1566 AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1567 mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1568 mtx_destroy(&nmp->nm_mtx);
1569 TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1570 nfscl_freenfsclds(dsp);
1571 FREE(nmp, M_NEWNFSMNT);
1577 * Return root of a filesystem
1580 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1583 struct nfsmount *nmp;
1588 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1593 * Get transfer parameters and attributes for root vnode once.
1595 mtx_lock(&nmp->nm_mtx);
1596 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1597 mtx_unlock(&nmp->nm_mtx);
1598 ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1600 mtx_unlock(&nmp->nm_mtx);
1601 if (vp->v_type == VNON)
1603 vp->v_vflag |= VV_ROOT;
1609 * Flush out the buffer cache
1613 nfs_sync(struct mount *mp, int waitfor)
1615 struct vnode *vp, *mvp;
1617 int error, allerror = 0;
1623 * If a forced dismount is in progress, return from here so that
1624 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1625 * calling VFS_UNMOUNT().
1627 if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1634 * Force stale buffer cache information to be flushed.
1637 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1638 /* XXX Racy bv_cnt check. */
1639 if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1640 waitfor == MNT_LAZY) {
1644 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1645 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1648 error = VOP_FSYNC(vp, waitfor, td);
1651 NFSVOPUNLOCK(vp, 0);
1658 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1660 struct nfsmount *nmp = VFSTONFS(mp);
1664 bzero(&vq, sizeof(vq));
1667 case VFS_CTL_NOLOCKS:
1668 val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1669 if (req->oldptr != NULL) {
1670 error = SYSCTL_OUT(req, &val, sizeof(val));
1674 if (req->newptr != NULL) {
1675 error = SYSCTL_IN(req, &val, sizeof(val));
1679 nmp->nm_flag |= NFSMNT_NOLOCKS;
1681 nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1686 mtx_lock(&nmp->nm_mtx);
1687 if (nmp->nm_state & NFSSTA_TIMEO)
1688 vq.vq_flags |= VQ_NOTRESP;
1689 mtx_unlock(&nmp->nm_mtx);
1691 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1692 (nmp->nm_state & NFSSTA_LOCKTIMEO))
1693 vq.vq_flags |= VQ_NOTRESPLOCK;
1695 error = SYSCTL_OUT(req, &vq, sizeof(vq));
1698 if (req->oldptr != NULL) {
1699 error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1700 sizeof(nmp->nm_tprintf_initial_delay));
1704 if (req->newptr != NULL) {
1705 error = vfs_suser(mp, req->td);
1708 error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1709 sizeof(nmp->nm_tprintf_initial_delay));
1712 if (nmp->nm_tprintf_initial_delay < 0)
1713 nmp->nm_tprintf_initial_delay = 0;
1723 * Purge any RPCs in progress, so that they will all return errors.
1724 * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1728 nfs_purge(struct mount *mp)
1730 struct nfsmount *nmp = VFSTONFS(mp);
1732 newnfs_nmcancelreqs(nmp);
1736 * Extract the information needed by the nlm from the nfs vnode.
1739 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1740 struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1741 struct timeval *timeop)
1743 struct nfsmount *nmp;
1744 struct nfsnode *np = VTONFS(vp);
1746 nmp = VFSTONFS(vp->v_mount);
1748 *fhlenp = (size_t)np->n_fhp->nfh_len;
1750 bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1752 bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1754 *is_v3p = NFS_ISV3(vp);
1756 *sizep = np->n_size;
1757 if (timeop != NULL) {
1758 timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1759 timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1764 * This function prints out an option name, based on the conditional
1767 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1768 char *opt, char **buf, size_t *blen)
1772 if (testval != 0 && *blen > strlen(opt)) {
1773 len = snprintf(*buf, *blen, "%s", opt);
1774 if (len != strlen(opt))
1782 * This function printf out an options integer value.
1784 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1785 char *opt, char **buf, size_t *blen)
1789 if (*blen > strlen(opt) + 1) {
1790 /* Could result in truncated output string. */
1791 len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1800 * Load the option flags and values into the buffer.
1802 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1809 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1811 if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1812 nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1814 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1817 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1819 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1820 "nfsv2", &buf, &blen);
1821 nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1822 nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1823 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1825 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1827 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1829 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1831 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1833 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1835 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1837 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1838 ",noncontigwr", &buf, &blen);
1839 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1840 0, ",lockd", &buf, &blen);
1841 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1842 NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1843 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1845 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1847 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1848 NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1849 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1850 NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1852 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1853 NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1855 nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1856 nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1857 nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1858 nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1859 nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1860 nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1862 nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1863 nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1864 nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1866 nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1867 nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1869 nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1870 nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);