2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * from nfs_vfsops.c 8.12 (Berkeley) 5/20/95
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
47 #include <sys/clock.h>
49 #include <sys/limits.h>
51 #include <sys/malloc.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
64 #include <vm/vm_extern.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
77 FEATURE(nfscl, "NFSv4 client");
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern struct nfsstats newnfsstats;
82 extern int nfsrv_useacl;
83 extern int nfscl_debuglevel;
84 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86 extern struct mtx ncl_iod_mutex;
89 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
90 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
92 SYSCTL_DECL(_vfs_nfs);
93 static int nfs_ip_paranoia = 1;
94 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95 &nfs_ip_paranoia, 0, "");
96 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99 /* how long between console messages "nfs server foo not responding" */
100 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
104 static int nfs_mountroot(struct mount *);
105 static void nfs_sec_name(char *, int *);
106 static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
107 struct nfs_args *argp, const char *, struct ucred *,
109 static int mountnfs(struct nfs_args *, struct mount *,
110 struct sockaddr *, char *, u_char *, int, u_char *, int,
111 u_char *, int, struct vnode **, struct ucred *,
112 struct thread *, int, int, int);
113 static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
114 struct sockaddr_storage *, int *, off_t *,
116 static vfs_mount_t nfs_mount;
117 static vfs_cmount_t nfs_cmount;
118 static vfs_unmount_t nfs_unmount;
119 static vfs_root_t nfs_root;
120 static vfs_statfs_t nfs_statfs;
121 static vfs_sync_t nfs_sync;
122 static vfs_sysctl_t nfs_sysctl;
123 static vfs_purge_t nfs_purge;
126 * nfs vfs operations.
128 static struct vfsops nfs_vfsops = {
129 .vfs_init = ncl_init,
130 .vfs_mount = nfs_mount,
131 .vfs_cmount = nfs_cmount,
132 .vfs_root = nfs_root,
133 .vfs_statfs = nfs_statfs,
134 .vfs_sync = nfs_sync,
135 .vfs_uninit = ncl_uninit,
136 .vfs_unmount = nfs_unmount,
137 .vfs_sysctl = nfs_sysctl,
138 .vfs_purge = nfs_purge,
140 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
142 /* So that loader and kldload(2) can find us, wherever we are.. */
143 MODULE_VERSION(nfs, 1);
144 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
145 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
146 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
147 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
150 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
151 * can be shared by both NFS clients. It is declared here so that it
152 * will be defined for kernels built without NFS_ROOT, although it
153 * isn't used in that case.
155 #if !defined(NFS_ROOT) && !defined(NFSCLIENT)
156 struct nfs_diskless nfs_diskless = { { { 0 } } };
157 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
158 int nfs_diskless_valid = 0;
161 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
162 &nfs_diskless_valid, 0,
163 "Has the diskless struct been filled correctly");
165 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
166 nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
168 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
169 &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
170 "%Ssockaddr_in", "Diskless root nfs address");
173 void newnfsargs_ntoh(struct nfs_args *);
174 static int nfs_mountdiskless(char *,
175 struct sockaddr_in *, struct nfs_args *,
176 struct thread *, struct vnode **, struct mount *);
177 static void nfs_convert_diskless(void);
178 static void nfs_convert_oargs(struct nfs_args *args,
179 struct onfs_args *oargs);
182 newnfs_iosize(struct nfsmount *nmp)
186 /* First, set the upper limit for iosize */
187 if (nmp->nm_flag & NFSMNT_NFSV4) {
188 maxio = NFS_MAXBSIZE;
189 } else if (nmp->nm_flag & NFSMNT_NFSV3) {
190 if (nmp->nm_sotype == SOCK_DGRAM)
191 maxio = NFS_MAXDGRAMDATA;
193 maxio = NFS_MAXBSIZE;
195 maxio = NFS_V2MAXDATA;
197 if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
198 nmp->nm_rsize = maxio;
199 if (nmp->nm_rsize > NFS_MAXBSIZE)
200 nmp->nm_rsize = NFS_MAXBSIZE;
201 if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
202 nmp->nm_readdirsize = maxio;
203 if (nmp->nm_readdirsize > nmp->nm_rsize)
204 nmp->nm_readdirsize = nmp->nm_rsize;
205 if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
206 nmp->nm_wsize = maxio;
207 if (nmp->nm_wsize > NFS_MAXBSIZE)
208 nmp->nm_wsize = NFS_MAXBSIZE;
211 * Calculate the size used for io buffers. Use the larger
212 * of the two sizes to minimise nfs requests but make sure
213 * that it is at least one VM page to avoid wasting buffer
216 iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
217 iosize = imax(iosize, PAGE_SIZE);
218 nmp->nm_mountp->mnt_stat.f_iosize = iosize;
223 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
226 args->version = NFS_ARGSVERSION;
227 args->addr = oargs->addr;
228 args->addrlen = oargs->addrlen;
229 args->sotype = oargs->sotype;
230 args->proto = oargs->proto;
231 args->fh = oargs->fh;
232 args->fhsize = oargs->fhsize;
233 args->flags = oargs->flags;
234 args->wsize = oargs->wsize;
235 args->rsize = oargs->rsize;
236 args->readdirsize = oargs->readdirsize;
237 args->timeo = oargs->timeo;
238 args->retrans = oargs->retrans;
239 args->readahead = oargs->readahead;
240 args->hostname = oargs->hostname;
244 nfs_convert_diskless(void)
247 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
248 sizeof(struct ifaliasreq));
249 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
250 sizeof(struct sockaddr_in));
251 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
252 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
253 nfsv3_diskless.root_fhsize = NFSX_MYFH;
254 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
256 nfsv3_diskless.root_fhsize = NFSX_V2FH;
257 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
259 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
260 sizeof(struct sockaddr_in));
261 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
262 nfsv3_diskless.root_time = nfs_diskless.root_time;
263 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
265 nfs_diskless_valid = 3;
272 nfs_statfs(struct mount *mp, struct statfs *sbp)
276 struct nfsmount *nmp = VFSTONFS(mp);
277 struct nfsvattr nfsva;
280 int error = 0, attrflag, gotfsinfo = 0, ret;
285 error = vfs_busy(mp, MBF_NOWAIT);
288 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
294 mtx_lock(&nmp->nm_mtx);
295 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
296 mtx_unlock(&nmp->nm_mtx);
297 error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
302 mtx_unlock(&nmp->nm_mtx);
304 error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
307 NFSCL_DEBUG(2, "statfs=%d\n", error);
309 ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
310 td->td_ucred, td, &nfsva, NULL, NULL);
313 * Just set default values to get things going.
315 NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
316 nfsva.na_vattr.va_type = VDIR;
317 nfsva.na_vattr.va_mode = 0777;
318 nfsva.na_vattr.va_nlink = 100;
319 nfsva.na_vattr.va_uid = (uid_t)0;
320 nfsva.na_vattr.va_gid = (gid_t)0;
321 nfsva.na_vattr.va_fileid = 2;
322 nfsva.na_vattr.va_gen = 1;
323 nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
324 nfsva.na_vattr.va_size = 512 * 1024;
327 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
329 mtx_lock(&nmp->nm_mtx);
330 if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
331 nfscl_loadfsinfo(nmp, &fs);
332 nfscl_loadsbinfo(nmp, &sb, sbp);
333 sbp->f_iosize = newnfs_iosize(nmp);
334 mtx_unlock(&nmp->nm_mtx);
335 if (sbp != &mp->mnt_stat) {
336 bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
337 bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
339 strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
340 } else if (NFS_ISV4(vp)) {
341 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
349 * nfs version 3 fsinfo rpc call
352 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
356 struct nfsvattr nfsva;
359 error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
362 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
364 mtx_lock(&nmp->nm_mtx);
365 nfscl_loadfsinfo(nmp, &fs);
366 mtx_unlock(&nmp->nm_mtx);
372 * Mount a remote root fs via. nfs. This depends on the info in the
373 * nfs_diskless structure that has been filled in properly by some primary
375 * It goes something like this:
376 * - do enough of "ifconfig" by calling ifioctl() so that the system
377 * can talk to the server
378 * - If nfs_diskless.mygateway is filled in, use that address as
380 * - build the rootfs mount point and call mountnfs() to do the rest.
382 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
383 * structure, as well as other global NFS client variables here, as
384 * nfs_mountroot() will be called once in the boot before any other NFS
385 * client activity occurs.
388 nfs_mountroot(struct mount *mp)
390 struct thread *td = curthread;
391 struct nfsv3_diskless *nd = &nfsv3_diskless;
400 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
401 bootpc_init(); /* use bootp to get nfs_diskless filled in */
402 #elif defined(NFS_ROOT)
403 nfs_setup_diskless();
406 if (nfs_diskless_valid == 0)
408 if (nfs_diskless_valid == 1)
409 nfs_convert_diskless();
412 * XXX splnet, so networks will receive...
417 * Do enough of ifconfig(8) so that the critical net interface can
418 * talk to the server.
420 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
423 panic("nfs_mountroot: socreate(%04x): %d",
424 nd->myif.ifra_addr.sa_family, error);
426 #if 0 /* XXX Bad idea */
428 * We might not have been told the right interface, so we pass
429 * over the first ten interfaces of the same kind, until we get
430 * one of them configured.
433 for (i = strlen(nd->myif.ifra_name) - 1;
434 nd->myif.ifra_name[i] >= '0' &&
435 nd->myif.ifra_name[i] <= '9';
436 nd->myif.ifra_name[i] ++) {
437 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
442 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
444 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
445 if ((cp = getenv("boot.netif.mtu")) != NULL) {
446 ir.ifr_mtu = strtol(cp, NULL, 10);
447 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
449 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
451 printf("nfs_mountroot: SIOCSIFMTU: %d", error);
456 * If the gateway field is filled in, set it as the default route.
457 * Note that pxeboot will set a default route of 0 if the route
458 * is not set by the DHCP server. Check also for a value of 0
459 * to avoid panicking inappropriately in that situation.
461 if (nd->mygateway.sin_len != 0 &&
462 nd->mygateway.sin_addr.s_addr != 0) {
463 struct sockaddr_in mask, sin;
465 bzero((caddr_t)&mask, sizeof(mask));
467 sin.sin_family = AF_INET;
468 sin.sin_len = sizeof(sin);
469 /* XXX MRT use table 0 for this sort of thing */
470 CURVNET_SET(TD_TO_VNET(td));
471 error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
472 (struct sockaddr *)&nd->mygateway,
473 (struct sockaddr *)&mask,
474 RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
477 panic("nfs_mountroot: RTM_ADD: %d", error);
481 * Create the rootfs mount point.
483 nd->root_args.fh = nd->root_fh;
484 nd->root_args.fhsize = nd->root_fhsize;
485 l = ntohl(nd->root_saddr.sin_addr.s_addr);
486 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
487 (l >> 24) & 0xff, (l >> 16) & 0xff,
488 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
489 printf("NFS ROOT: %s\n", buf);
490 nd->root_args.hostname = buf;
491 if ((error = nfs_mountdiskless(buf,
492 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
497 * This is not really an nfs issue, but it is much easier to
498 * set hostname here and then let the "/etc/rc.xxx" files
499 * mount the right /var based upon its preset value.
501 mtx_lock(&prison0.pr_mtx);
502 strlcpy(prison0.pr_hostname, nd->my_hostnam,
503 sizeof(prison0.pr_hostname));
504 mtx_unlock(&prison0.pr_mtx);
505 inittodr(ntohl(nd->root_time));
510 * Internal version of mount system call for diskless setup.
513 nfs_mountdiskless(char *path,
514 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
515 struct vnode **vpp, struct mount *mp)
517 struct sockaddr *nam;
522 * Find the directory path in "path", which also has the server's
523 * name/ip address in it.
525 dirpath = strchr(path, ':');
527 dirlen = strlen(++dirpath);
530 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
531 if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
532 NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
533 NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
534 printf("nfs_mountroot: mount %s on /: %d\n", path, error);
541 nfs_sec_name(char *sec, int *flagsp)
543 if (!strcmp(sec, "krb5"))
544 *flagsp |= NFSMNT_KERB;
545 else if (!strcmp(sec, "krb5i"))
546 *flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
547 else if (!strcmp(sec, "krb5p"))
548 *flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
552 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
553 const char *hostname, struct ucred *cred, struct thread *td)
562 * Set read-only flag if requested; otherwise, clear it if this is
563 * an update. If this is not an update, then either the read-only
564 * flag is already clear, or this is a root mount and it was set
565 * intentionally at some previous point.
567 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
569 mp->mnt_flag |= MNT_RDONLY;
571 } else if (mp->mnt_flag & MNT_UPDATE) {
573 mp->mnt_flag &= ~MNT_RDONLY;
578 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
579 * no sense in that context. Also, set up appropriate retransmit
580 * and soft timeout behavior.
582 if (argp->sotype == SOCK_STREAM) {
583 nmp->nm_flag &= ~NFSMNT_NOCONN;
584 nmp->nm_timeo = NFS_MAXTIMEO;
585 if ((argp->flags & NFSMNT_NFSV4) != 0)
586 nmp->nm_retry = INT_MAX;
588 nmp->nm_retry = NFS_RETRANS_TCP;
591 /* Also clear RDIRPLUS if NFSv2, it crashes some servers */
592 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
593 argp->flags &= ~NFSMNT_RDIRPLUS;
594 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
597 /* Re-bind if rsrvd port requested and wasn't on one */
598 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
599 && (argp->flags & NFSMNT_RESVPORT);
600 /* Also re-bind if we're switching to/from a connected UDP socket */
601 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
602 (argp->flags & NFSMNT_NOCONN));
604 /* Update flags atomically. Don't change the lock bits. */
605 nmp->nm_flag = argp->flags | nmp->nm_flag;
608 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
609 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
610 if (nmp->nm_timeo < NFS_MINTIMEO)
611 nmp->nm_timeo = NFS_MINTIMEO;
612 else if (nmp->nm_timeo > NFS_MAXTIMEO)
613 nmp->nm_timeo = NFS_MAXTIMEO;
616 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
617 nmp->nm_retry = argp->retrans;
618 if (nmp->nm_retry > NFS_MAXREXMIT)
619 nmp->nm_retry = NFS_MAXREXMIT;
622 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
623 nmp->nm_wsize = argp->wsize;
625 * Clip at the power of 2 below the size. There is an
626 * issue (not isolated) that causes intermittent page
627 * faults if this is not done.
629 if (nmp->nm_wsize > NFS_FABLKSIZE)
630 nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
632 nmp->nm_wsize = NFS_FABLKSIZE;
635 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
636 nmp->nm_rsize = argp->rsize;
638 * Clip at the power of 2 below the size. There is an
639 * issue (not isolated) that causes intermittent page
640 * faults if this is not done.
642 if (nmp->nm_rsize > NFS_FABLKSIZE)
643 nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
645 nmp->nm_rsize = NFS_FABLKSIZE;
648 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
649 nmp->nm_readdirsize = argp->readdirsize;
652 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
653 nmp->nm_acregmin = argp->acregmin;
655 nmp->nm_acregmin = NFS_MINATTRTIMO;
656 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
657 nmp->nm_acregmax = argp->acregmax;
659 nmp->nm_acregmax = NFS_MAXATTRTIMO;
660 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
661 nmp->nm_acdirmin = argp->acdirmin;
663 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
664 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
665 nmp->nm_acdirmax = argp->acdirmax;
667 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
668 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
669 nmp->nm_acdirmin = nmp->nm_acdirmax;
670 if (nmp->nm_acregmin > nmp->nm_acregmax)
671 nmp->nm_acregmin = nmp->nm_acregmax;
673 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
674 if (argp->readahead <= NFS_MAXRAHEAD)
675 nmp->nm_readahead = argp->readahead;
677 nmp->nm_readahead = NFS_MAXRAHEAD;
679 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
680 if (argp->wcommitsize < nmp->nm_wsize)
681 nmp->nm_wcommitsize = nmp->nm_wsize;
683 nmp->nm_wcommitsize = argp->wcommitsize;
686 adjsock |= ((nmp->nm_sotype != argp->sotype) ||
687 (nmp->nm_soproto != argp->proto));
689 if (nmp->nm_client != NULL && adjsock) {
690 int haslock = 0, error = 0;
692 if (nmp->nm_sotype == SOCK_STREAM) {
693 error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
698 newnfs_disconnect(&nmp->nm_sockreq);
700 newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
701 nmp->nm_sotype = argp->sotype;
702 nmp->nm_soproto = argp->proto;
703 if (nmp->nm_sotype == SOCK_DGRAM)
704 while (newnfs_connect(nmp, &nmp->nm_sockreq,
706 printf("newnfs_args: retrying connect\n");
707 (void) nfs_catnap(PSOCK, 0, "newnfscon");
711 nmp->nm_sotype = argp->sotype;
712 nmp->nm_soproto = argp->proto;
715 if (hostname != NULL) {
716 strlcpy(nmp->nm_hostname, hostname,
717 sizeof(nmp->nm_hostname));
718 p = strchr(nmp->nm_hostname, ':');
724 static const char *nfs_opts[] = { "from", "nfs_args",
725 "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
726 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
727 "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
728 "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
729 "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
730 "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
731 "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
732 "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
733 "pnfs", "wcommitsize",
740 * It seems a bit dumb to copyinstr() the host and path here and then
741 * bcopy() them in mountnfs(), but I wanted to detect errors before
742 * doing the sockargs() call because sockargs() allocates an mbuf and
743 * an error after that means that I have to release the mbuf.
747 nfs_mount(struct mount *mp)
749 struct nfs_args args = {
750 .version = NFS_ARGSVERSION,
752 .addrlen = sizeof (struct sockaddr_in),
753 .sotype = SOCK_STREAM,
757 .flags = NFSMNT_RESVPORT,
760 .readdirsize = NFS_READDIRSIZE,
762 .retrans = NFS_RETRANS,
763 .readahead = NFS_DEFRAHEAD,
764 .wcommitsize = 0, /* was: NQ_DEFLEASE */
766 .acregmin = NFS_MINATTRTIMO,
767 .acregmax = NFS_MAXATTRTIMO,
768 .acdirmin = NFS_MINDIRATTRTIMO,
769 .acdirmax = NFS_MAXDIRATTRTIMO,
771 int error = 0, ret, len;
772 struct sockaddr *nam = NULL;
776 u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
777 char *opt, *name, *secname;
778 int nametimeo = NFS_DEFAULT_NAMETIMEO;
779 int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
781 int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
784 has_nfs_args_opt = 0;
785 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
791 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
792 error = nfs_mountroot(mp);
799 * The old mount_nfs program passed the struct nfs_args
800 * from userspace to kernel. The new mount_nfs program
801 * passes string options via nmount() from userspace to kernel
802 * and we populate the struct nfs_args in the kernel.
804 if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
805 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
810 if (args.version != NFS_ARGSVERSION) {
811 error = EPROGMISMATCH;
814 has_nfs_args_opt = 1;
817 /* Handle the new style options. */
818 if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
819 args.acdirmin = args.acdirmax =
820 args.acregmin = args.acregmax = 0;
821 args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
822 NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
824 if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
825 args.flags |= NFSMNT_NOCONN;
826 if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
827 args.flags &= ~NFSMNT_NOCONN;
828 if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
829 args.flags |= NFSMNT_NOLOCKD;
830 if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
831 args.flags &= ~NFSMNT_NOLOCKD;
832 if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
833 args.flags |= NFSMNT_INT;
834 if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
835 args.flags |= NFSMNT_RDIRPLUS;
836 if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
837 args.flags |= NFSMNT_RESVPORT;
838 if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
839 args.flags &= ~NFSMNT_RESVPORT;
840 if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
841 args.flags |= NFSMNT_SOFT;
842 if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
843 args.flags &= ~NFSMNT_SOFT;
844 if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
845 args.sotype = SOCK_DGRAM;
846 if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
847 args.sotype = SOCK_DGRAM;
848 if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
849 args.sotype = SOCK_STREAM;
850 if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
851 args.flags |= NFSMNT_NFSV3;
852 if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
853 args.flags |= NFSMNT_NFSV4;
854 args.sotype = SOCK_STREAM;
856 if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
857 args.flags |= NFSMNT_ALLGSSNAME;
858 if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
859 args.flags |= NFSMNT_NOCTO;
860 if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
861 args.flags |= NFSMNT_NONCONTIGWR;
862 if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
863 args.flags |= NFSMNT_PNFS;
864 if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
866 vfs_mount_error(mp, "illegal readdirsize");
870 ret = sscanf(opt, "%d", &args.readdirsize);
871 if (ret != 1 || args.readdirsize <= 0) {
872 vfs_mount_error(mp, "illegal readdirsize: %s",
877 args.flags |= NFSMNT_READDIRSIZE;
879 if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
881 vfs_mount_error(mp, "illegal readahead");
885 ret = sscanf(opt, "%d", &args.readahead);
886 if (ret != 1 || args.readahead <= 0) {
887 vfs_mount_error(mp, "illegal readahead: %s",
892 args.flags |= NFSMNT_READAHEAD;
894 if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
896 vfs_mount_error(mp, "illegal wsize");
900 ret = sscanf(opt, "%d", &args.wsize);
901 if (ret != 1 || args.wsize <= 0) {
902 vfs_mount_error(mp, "illegal wsize: %s",
907 args.flags |= NFSMNT_WSIZE;
909 if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
911 vfs_mount_error(mp, "illegal rsize");
915 ret = sscanf(opt, "%d", &args.rsize);
916 if (ret != 1 || args.rsize <= 0) {
917 vfs_mount_error(mp, "illegal wsize: %s",
922 args.flags |= NFSMNT_RSIZE;
924 if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
926 vfs_mount_error(mp, "illegal retrans");
930 ret = sscanf(opt, "%d", &args.retrans);
931 if (ret != 1 || args.retrans <= 0) {
932 vfs_mount_error(mp, "illegal retrans: %s",
937 args.flags |= NFSMNT_RETRANS;
939 if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
940 ret = sscanf(opt, "%d", &args.acregmin);
941 if (ret != 1 || args.acregmin < 0) {
942 vfs_mount_error(mp, "illegal actimeo: %s",
947 args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
948 args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
949 NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
951 if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
952 ret = sscanf(opt, "%d", &args.acregmin);
953 if (ret != 1 || args.acregmin < 0) {
954 vfs_mount_error(mp, "illegal acregmin: %s",
959 args.flags |= NFSMNT_ACREGMIN;
961 if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
962 ret = sscanf(opt, "%d", &args.acregmax);
963 if (ret != 1 || args.acregmax < 0) {
964 vfs_mount_error(mp, "illegal acregmax: %s",
969 args.flags |= NFSMNT_ACREGMAX;
971 if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
972 ret = sscanf(opt, "%d", &args.acdirmin);
973 if (ret != 1 || args.acdirmin < 0) {
974 vfs_mount_error(mp, "illegal acdirmin: %s",
979 args.flags |= NFSMNT_ACDIRMIN;
981 if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
982 ret = sscanf(opt, "%d", &args.acdirmax);
983 if (ret != 1 || args.acdirmax < 0) {
984 vfs_mount_error(mp, "illegal acdirmax: %s",
989 args.flags |= NFSMNT_ACDIRMAX;
991 if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
992 ret = sscanf(opt, "%d", &args.wcommitsize);
993 if (ret != 1 || args.wcommitsize < 0) {
994 vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
998 args.flags |= NFSMNT_WCOMMITSIZE;
1000 if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1001 ret = sscanf(opt, "%d", &args.timeo);
1002 if (ret != 1 || args.timeo <= 0) {
1003 vfs_mount_error(mp, "illegal timeo: %s",
1008 args.flags |= NFSMNT_TIMEO;
1010 if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1011 ret = sscanf(opt, "%d", &args.timeo);
1012 if (ret != 1 || args.timeo <= 0) {
1013 vfs_mount_error(mp, "illegal timeout: %s",
1018 args.flags |= NFSMNT_TIMEO;
1020 if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1021 ret = sscanf(opt, "%d", &nametimeo);
1022 if (ret != 1 || nametimeo < 0) {
1023 vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1028 if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1030 ret = sscanf(opt, "%d", &negnametimeo);
1031 if (ret != 1 || negnametimeo < 0) {
1032 vfs_mount_error(mp, "illegal negnametimeo: %s",
1038 if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1040 ret = sscanf(opt, "%d", &minvers);
1041 if (ret != 1 || minvers < 0 || minvers > 1 ||
1042 (args.flags & NFSMNT_NFSV4) == 0) {
1043 vfs_mount_error(mp, "illegal minorversion: %s", opt);
1048 if (vfs_getopt(mp->mnt_optnew, "sec",
1049 (void **) &secname, NULL) == 0)
1050 nfs_sec_name(secname, &args.flags);
1052 if (mp->mnt_flag & MNT_UPDATE) {
1053 struct nfsmount *nmp = VFSTONFS(mp);
1061 * If a change from TCP->UDP is done and there are thread(s)
1062 * that have I/O RPC(s) in progress with a tranfer size
1063 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1064 * hung, retrying the RPC(s) forever. Usually these threads
1065 * will be seen doing an uninterruptible sleep on wait channel
1066 * "newnfsreq" (truncated to "newnfsre" by procstat).
1068 if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1069 tprintf(td->td_proc, LOG_WARNING,
1070 "Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1073 * When doing an update, we can't change version,
1074 * security, switch lockd strategies or change cookie
1077 args.flags = (args.flags &
1083 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1090 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1091 nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1096 * Make the nfs_ip_paranoia sysctl serve as the default connection
1097 * or no-connection mode for those protocols that support
1098 * no-connection mode (the flag will be cleared later for protocols
1099 * that do not support no-connection mode). This will allow a client
1100 * to receive replies from a different IP then the request was
1101 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
1104 if (nfs_ip_paranoia == 0)
1105 args.flags |= NFSMNT_NOCONN;
1107 if (has_nfs_args_opt != 0) {
1109 * In the 'nfs_args' case, the pointers in the args
1110 * structure are in userland - we copy them in here.
1112 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1113 vfs_mount_error(mp, "Bad file handle");
1117 error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1121 error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1124 bzero(&hst[hstlen], MNAMELEN - hstlen);
1125 args.hostname = hst;
1126 /* sockargs() call must be after above copyin() calls */
1127 error = getsockaddr(&nam, (caddr_t)args.addr,
1132 if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1133 &args.fhsize) == 0) {
1134 if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1135 vfs_mount_error(mp, "Bad file handle");
1139 bcopy(args.fh, nfh, args.fhsize);
1143 (void) vfs_getopt(mp->mnt_optnew, "hostname",
1144 (void **)&args.hostname, &len);
1145 if (args.hostname == NULL) {
1146 vfs_mount_error(mp, "Invalid hostname");
1150 bcopy(args.hostname, hst, MNAMELEN);
1151 hst[MNAMELEN - 1] = '\0';
1154 if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1155 strlcpy(srvkrbname, name, sizeof (srvkrbname));
1157 snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1158 srvkrbnamelen = strlen(srvkrbname);
1160 if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1161 strlcpy(krbname, name, sizeof (krbname));
1164 krbnamelen = strlen(krbname);
1166 if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1167 strlcpy(dirpath, name, sizeof (dirpath));
1170 dirlen = strlen(dirpath);
1172 if (has_nfs_args_opt == 0) {
1173 if (vfs_getopt(mp->mnt_optnew, "addr",
1174 (void **)&args.addr, &args.addrlen) == 0) {
1175 if (args.addrlen > SOCK_MAXADDRLEN) {
1176 error = ENAMETOOLONG;
1179 nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1180 bcopy(args.addr, nam, args.addrlen);
1181 nam->sa_len = args.addrlen;
1183 vfs_mount_error(mp, "No server address");
1190 error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1191 dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1192 nametimeo, negnametimeo, minvers);
1196 mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1208 * It seems a bit dumb to copyinstr() the host and path here and then
1209 * bcopy() them in mountnfs(), but I wanted to detect errors before
1210 * doing the sockargs() call because sockargs() allocates an mbuf and
1211 * an error after that means that I have to release the mbuf.
1215 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1218 struct nfs_args args;
1220 error = copyin(data, &args, sizeof (struct nfs_args));
1224 ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1226 error = kernel_mount(ma, flags);
1231 * Common code for mount and mountroot
1234 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1235 char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1236 u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1237 struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1240 struct nfsmount *nmp;
1242 int error, trycnt, ret;
1243 struct nfsvattr nfsva;
1244 struct nfsclclient *clp;
1245 struct nfsclds *dsp, *tdsp;
1247 static u_int64_t clval = 0;
1249 NFSCL_DEBUG(3, "in mnt\n");
1251 if (mp->mnt_flag & MNT_UPDATE) {
1253 printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1254 FREE(nam, M_SONAME);
1257 MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1258 krbnamelen + dirlen + srvkrbnamelen + 2,
1259 M_NEWNFSMNT, M_WAITOK | M_ZERO);
1260 TAILQ_INIT(&nmp->nm_bufq);
1262 clval = (u_int64_t)nfsboottime.tv_sec;
1263 nmp->nm_clval = clval++;
1264 nmp->nm_krbnamelen = krbnamelen;
1265 nmp->nm_dirpathlen = dirlen;
1266 nmp->nm_srvkrbnamelen = srvkrbnamelen;
1267 if (td->td_ucred->cr_uid != (uid_t)0) {
1269 * nm_uid is used to get KerberosV credentials for
1270 * the nfsv4 state handling operations if there is
1271 * no host based principal set. Use the uid of
1272 * this user if not root, since they are doing the
1273 * mount. I don't think setting this for root will
1274 * work, since root normally does not have user
1275 * credentials in a credentials cache.
1277 nmp->nm_uid = td->td_ucred->cr_uid;
1280 * Just set to -1, so it won't be used.
1282 nmp->nm_uid = (uid_t)-1;
1285 /* Copy and null terminate all the names */
1286 if (nmp->nm_krbnamelen > 0) {
1287 bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1288 nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1290 if (nmp->nm_dirpathlen > 0) {
1291 bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1292 nmp->nm_dirpathlen);
1293 nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1296 if (nmp->nm_srvkrbnamelen > 0) {
1297 bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1298 nmp->nm_srvkrbnamelen);
1299 nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1300 + nmp->nm_srvkrbnamelen + 2] = '\0';
1302 nmp->nm_sockreq.nr_cred = crhold(cred);
1303 mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1305 nmp->nm_getinfo = nfs_getnlminfo;
1306 nmp->nm_vinvalbuf = ncl_vinvalbuf;
1309 nmp->nm_mountp = mp;
1310 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1313 * Since nfs_decode_args() might optionally set them, these
1314 * need to be set to defaults before the call, so that the
1315 * optional settings aren't overwritten.
1317 nmp->nm_nametimeo = nametimeo;
1318 nmp->nm_negnametimeo = negnametimeo;
1319 nmp->nm_timeo = NFS_TIMEO;
1320 nmp->nm_retry = NFS_RETRANS;
1321 nmp->nm_readahead = NFS_DEFRAHEAD;
1323 /* This is empirical approximation of sqrt(hibufspace) * 256. */
1324 nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1325 while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1326 nmp->nm_wcommitsize *= 2;
1327 nmp->nm_wcommitsize *= 256;
1329 if ((argp->flags & NFSMNT_NFSV4) != 0)
1330 nmp->nm_minorvers = minvers;
1332 nmp->nm_minorvers = 0;
1334 nfs_decode_args(mp, nmp, argp, hst, cred, td);
1337 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
1338 * high, depending on whether we end up with negative offsets in
1339 * the client or server somewhere. 2GB-1 may be safer.
1341 * For V3, ncl_fsinfo will adjust this as necessary. Assume maximum
1342 * that we can handle until we find out otherwise.
1344 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1345 nmp->nm_maxfilesize = 0xffffffffLL;
1347 nmp->nm_maxfilesize = OFF_MAX;
1349 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1350 nmp->nm_wsize = NFS_WSIZE;
1351 nmp->nm_rsize = NFS_RSIZE;
1352 nmp->nm_readdirsize = NFS_READDIRSIZE;
1354 nmp->nm_numgrps = NFS_MAXGRPS;
1355 nmp->nm_tprintf_delay = nfs_tprintf_delay;
1356 if (nmp->nm_tprintf_delay < 0)
1357 nmp->nm_tprintf_delay = 0;
1358 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1359 if (nmp->nm_tprintf_initial_delay < 0)
1360 nmp->nm_tprintf_initial_delay = 0;
1361 nmp->nm_fhsize = argp->fhsize;
1362 if (nmp->nm_fhsize > 0)
1363 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1364 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1366 /* Set up the sockets and per-host congestion */
1367 nmp->nm_sotype = argp->sotype;
1368 nmp->nm_soproto = argp->proto;
1369 nmp->nm_sockreq.nr_prog = NFS_PROG;
1370 if ((argp->flags & NFSMNT_NFSV4))
1371 nmp->nm_sockreq.nr_vers = NFS_VER4;
1372 else if ((argp->flags & NFSMNT_NFSV3))
1373 nmp->nm_sockreq.nr_vers = NFS_VER3;
1375 nmp->nm_sockreq.nr_vers = NFS_VER2;
1378 if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1380 /* For NFSv4.1, get the clientid now. */
1381 if (nmp->nm_minorvers > 0) {
1382 NFSCL_DEBUG(3, "at getcl\n");
1383 error = nfscl_getcl(mp, cred, td, 0, &clp);
1384 NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1389 if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1390 nmp->nm_dirpathlen > 0) {
1391 NFSCL_DEBUG(3, "in dirp\n");
1393 * If the fhsize on the mount point == 0 for V4, the mount
1394 * path needs to be looked up.
1398 error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1400 NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1402 (void) nfs_catnap(PZERO, error, "nfsgetdirp");
1403 } while (error && --trycnt > 0);
1405 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1411 * A reference count is needed on the nfsnode representing the
1412 * remote root. If this object is not persistent, then backward
1413 * traversals of the mount point (i.e. "..") will not work if
1414 * the nfsnode gets flushed out of the cache. Ufs does not have
1415 * this problem, because one can identify root inodes by their
1416 * number == ROOTINO (2).
1418 if (nmp->nm_fhsize > 0) {
1420 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1421 * non-zero for the root vnode. f_iosize will be set correctly
1422 * by nfs_statfs() before any I/O occurs.
1424 mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1425 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1432 * Get file attributes and transfer parameters for the
1433 * mountpoint. This has the side effect of filling in
1434 * (*vpp)->v_type with the correct value.
1436 ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1437 cred, td, &nfsva, NULL, &lease);
1440 * Just set default values to get things going.
1442 NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1443 nfsva.na_vattr.va_type = VDIR;
1444 nfsva.na_vattr.va_mode = 0777;
1445 nfsva.na_vattr.va_nlink = 100;
1446 nfsva.na_vattr.va_uid = (uid_t)0;
1447 nfsva.na_vattr.va_gid = (gid_t)0;
1448 nfsva.na_vattr.va_fileid = 2;
1449 nfsva.na_vattr.va_gen = 1;
1450 nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1451 nfsva.na_vattr.va_size = 512 * 1024;
1454 (void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1455 if (nmp->nm_minorvers > 0) {
1456 NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1458 clp->nfsc_renew = NFSCL_RENEW(lease);
1459 clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1460 clp->nfsc_clientidrev++;
1461 if (clp->nfsc_clientidrev == 0)
1462 clp->nfsc_clientidrev++;
1465 * Mount will succeed, so the renew thread can be
1468 nfscl_start_renewthread(clp);
1469 nfscl_clientrelease(clp);
1471 if (argp->flags & NFSMNT_NFSV3)
1472 ncl_fsinfo(nmp, *vpp, cred, td);
1474 /* Mark if the mount point supports NFSv4 ACLs. */
1475 if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1477 NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1479 mp->mnt_flag |= MNT_NFS4ACLS;
1484 * Lose the lock but keep the ref.
1486 NFSVOPUNLOCK(*vpp, 0);
1493 nfscl_clientrelease(clp);
1494 newnfs_disconnect(&nmp->nm_sockreq);
1495 crfree(nmp->nm_sockreq.nr_cred);
1496 if (nmp->nm_sockreq.nr_auth != NULL)
1497 AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1498 mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1499 mtx_destroy(&nmp->nm_mtx);
1500 if (nmp->nm_clp != NULL) {
1502 LIST_REMOVE(nmp->nm_clp, nfsc_list);
1504 free(nmp->nm_clp, M_NFSCLCLIENT);
1506 TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1507 nfscl_freenfsclds(dsp);
1508 FREE(nmp, M_NEWNFSMNT);
1509 FREE(nam, M_SONAME);
1514 * unmount system call
1517 nfs_unmount(struct mount *mp, int mntflags)
1520 struct nfsmount *nmp;
1521 int error, flags = 0, i, trycnt = 0;
1522 struct nfsclds *dsp, *tdsp;
1526 if (mntflags & MNT_FORCE)
1527 flags |= FORCECLOSE;
1530 * Goes something like this..
1531 * - Call vflush() to clear out vnodes for this filesystem
1532 * - Close the socket
1533 * - Free up the data structures
1535 /* In the forced case, cancel any outstanding requests. */
1536 if (mntflags & MNT_FORCE) {
1537 error = newnfs_nmcancelreqs(nmp);
1540 /* For a forced close, get rid of the renew thread now */
1541 nfscl_umount(nmp, td);
1543 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1545 error = vflush(mp, 1, flags, td);
1546 if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1547 (void) nfs_catnap(PSOCK, error, "newndm");
1548 } while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1553 * We are now committed to the unmount.
1555 if ((mntflags & MNT_FORCE) == 0)
1556 nfscl_umount(nmp, td);
1557 /* Make sure no nfsiods are assigned to this mount. */
1558 mtx_lock(&ncl_iod_mutex);
1559 for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1560 if (ncl_iodmount[i] == nmp) {
1561 ncl_iodwant[i] = NFSIOD_AVAILABLE;
1562 ncl_iodmount[i] = NULL;
1564 mtx_unlock(&ncl_iod_mutex);
1565 newnfs_disconnect(&nmp->nm_sockreq);
1566 crfree(nmp->nm_sockreq.nr_cred);
1567 FREE(nmp->nm_nam, M_SONAME);
1568 if (nmp->nm_sockreq.nr_auth != NULL)
1569 AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1570 mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1571 mtx_destroy(&nmp->nm_mtx);
1572 TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1573 nfscl_freenfsclds(dsp);
1574 FREE(nmp, M_NEWNFSMNT);
1580 * Return root of a filesystem
1583 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1586 struct nfsmount *nmp;
1591 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1596 * Get transfer parameters and attributes for root vnode once.
1598 mtx_lock(&nmp->nm_mtx);
1599 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1600 mtx_unlock(&nmp->nm_mtx);
1601 ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1603 mtx_unlock(&nmp->nm_mtx);
1604 if (vp->v_type == VNON)
1606 vp->v_vflag |= VV_ROOT;
1612 * Flush out the buffer cache
1616 nfs_sync(struct mount *mp, int waitfor)
1618 struct vnode *vp, *mvp;
1620 int error, allerror = 0;
1626 * If a forced dismount is in progress, return from here so that
1627 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1628 * calling VFS_UNMOUNT().
1630 if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1637 * Force stale buffer cache information to be flushed.
1640 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1641 /* XXX Racy bv_cnt check. */
1642 if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1643 waitfor == MNT_LAZY) {
1647 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1648 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1651 error = VOP_FSYNC(vp, waitfor, td);
1654 NFSVOPUNLOCK(vp, 0);
1661 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1663 struct nfsmount *nmp = VFSTONFS(mp);
1667 bzero(&vq, sizeof(vq));
1670 case VFS_CTL_NOLOCKS:
1671 val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1672 if (req->oldptr != NULL) {
1673 error = SYSCTL_OUT(req, &val, sizeof(val));
1677 if (req->newptr != NULL) {
1678 error = SYSCTL_IN(req, &val, sizeof(val));
1682 nmp->nm_flag |= NFSMNT_NOLOCKS;
1684 nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1689 mtx_lock(&nmp->nm_mtx);
1690 if (nmp->nm_state & NFSSTA_TIMEO)
1691 vq.vq_flags |= VQ_NOTRESP;
1692 mtx_unlock(&nmp->nm_mtx);
1694 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1695 (nmp->nm_state & NFSSTA_LOCKTIMEO))
1696 vq.vq_flags |= VQ_NOTRESPLOCK;
1698 error = SYSCTL_OUT(req, &vq, sizeof(vq));
1701 if (req->oldptr != NULL) {
1702 error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1703 sizeof(nmp->nm_tprintf_initial_delay));
1707 if (req->newptr != NULL) {
1708 error = vfs_suser(mp, req->td);
1711 error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1712 sizeof(nmp->nm_tprintf_initial_delay));
1715 if (nmp->nm_tprintf_initial_delay < 0)
1716 nmp->nm_tprintf_initial_delay = 0;
1726 * Purge any RPCs in progress, so that they will all return errors.
1727 * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1731 nfs_purge(struct mount *mp)
1733 struct nfsmount *nmp = VFSTONFS(mp);
1735 newnfs_nmcancelreqs(nmp);
1739 * Extract the information needed by the nlm from the nfs vnode.
1742 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1743 struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1744 struct timeval *timeop)
1746 struct nfsmount *nmp;
1747 struct nfsnode *np = VTONFS(vp);
1749 nmp = VFSTONFS(vp->v_mount);
1751 *fhlenp = (size_t)np->n_fhp->nfh_len;
1753 bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1755 bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1757 *is_v3p = NFS_ISV3(vp);
1759 *sizep = np->n_size;
1760 if (timeop != NULL) {
1761 timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1762 timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1767 * This function prints out an option name, based on the conditional
1770 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1771 char *opt, char **buf, size_t *blen)
1775 if (testval != 0 && *blen > strlen(opt)) {
1776 len = snprintf(*buf, *blen, "%s", opt);
1777 if (len != strlen(opt))
1785 * This function printf out an options integer value.
1787 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1788 char *opt, char **buf, size_t *blen)
1792 if (*blen > strlen(opt) + 1) {
1793 /* Could result in truncated output string. */
1794 len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1803 * Load the option flags and values into the buffer.
1805 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1812 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1814 if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1815 nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1817 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1820 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1822 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1823 "nfsv2", &buf, &blen);
1824 nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1825 nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1826 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1828 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1830 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1832 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1834 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1836 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1838 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1840 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1841 ",noncontigwr", &buf, &blen);
1842 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1843 0, ",lockd", &buf, &blen);
1844 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1845 NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1846 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1848 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1850 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1851 NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1852 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1853 NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1855 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1856 NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1858 nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1859 nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1860 nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1861 nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1862 nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1863 nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1865 nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1866 nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1867 nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1869 nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1870 nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1872 nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1873 nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);