2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * from nfs_vfsops.c 8.12 (Berkeley) 5/20/95
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
47 #include <sys/clock.h>
49 #include <sys/limits.h>
51 #include <sys/malloc.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
64 #include <vm/vm_extern.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
77 FEATURE(nfscl, "NFSv4 client");
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern struct nfsstats newnfsstats;
82 extern int nfsrv_useacl;
83 extern int nfscl_debuglevel;
84 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86 extern struct mtx ncl_iod_mutex;
89 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
90 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
92 SYSCTL_DECL(_vfs_nfs);
93 static int nfs_ip_paranoia = 1;
94 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95 &nfs_ip_paranoia, 0, "");
96 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99 /* how long between console messages "nfs server foo not responding" */
100 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
105 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
106 "Toggle debug flag");
109 static int nfs_mountroot(struct mount *);
110 static void nfs_sec_name(char *, int *);
111 static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
112 struct nfs_args *argp, const char *, struct ucred *,
114 static int mountnfs(struct nfs_args *, struct mount *,
115 struct sockaddr *, char *, u_char *, int, u_char *, int,
116 u_char *, int, struct vnode **, struct ucred *,
117 struct thread *, int, int, int);
118 static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
119 struct sockaddr_storage *, int *, off_t *,
121 static vfs_mount_t nfs_mount;
122 static vfs_cmount_t nfs_cmount;
123 static vfs_unmount_t nfs_unmount;
124 static vfs_root_t nfs_root;
125 static vfs_statfs_t nfs_statfs;
126 static vfs_sync_t nfs_sync;
127 static vfs_sysctl_t nfs_sysctl;
128 static vfs_purge_t nfs_purge;
131 * nfs vfs operations.
133 static struct vfsops nfs_vfsops = {
134 .vfs_init = ncl_init,
135 .vfs_mount = nfs_mount,
136 .vfs_cmount = nfs_cmount,
137 .vfs_root = nfs_root,
138 .vfs_statfs = nfs_statfs,
139 .vfs_sync = nfs_sync,
140 .vfs_uninit = ncl_uninit,
141 .vfs_unmount = nfs_unmount,
142 .vfs_sysctl = nfs_sysctl,
143 .vfs_purge = nfs_purge,
145 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
147 /* So that loader and kldload(2) can find us, wherever we are.. */
148 MODULE_VERSION(nfs, 1);
149 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
150 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
151 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
152 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
155 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
156 * can be shared by both NFS clients. It is declared here so that it
157 * will be defined for kernels built without NFS_ROOT, although it
158 * isn't used in that case.
160 #if !defined(NFS_ROOT)
161 struct nfs_diskless nfs_diskless = { { { 0 } } };
162 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
163 int nfs_diskless_valid = 0;
166 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
167 &nfs_diskless_valid, 0,
168 "Has the diskless struct been filled correctly");
170 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
171 nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
173 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
174 &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
175 "%Ssockaddr_in", "Diskless root nfs address");
178 void newnfsargs_ntoh(struct nfs_args *);
179 static int nfs_mountdiskless(char *,
180 struct sockaddr_in *, struct nfs_args *,
181 struct thread *, struct vnode **, struct mount *);
182 static void nfs_convert_diskless(void);
183 static void nfs_convert_oargs(struct nfs_args *args,
184 struct onfs_args *oargs);
187 newnfs_iosize(struct nfsmount *nmp)
191 /* First, set the upper limit for iosize */
192 if (nmp->nm_flag & NFSMNT_NFSV4) {
193 maxio = NFS_MAXBSIZE;
194 } else if (nmp->nm_flag & NFSMNT_NFSV3) {
195 if (nmp->nm_sotype == SOCK_DGRAM)
196 maxio = NFS_MAXDGRAMDATA;
198 maxio = NFS_MAXBSIZE;
200 maxio = NFS_V2MAXDATA;
202 if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
203 nmp->nm_rsize = maxio;
204 if (nmp->nm_rsize > MAXBSIZE)
205 nmp->nm_rsize = MAXBSIZE;
206 if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
207 nmp->nm_readdirsize = maxio;
208 if (nmp->nm_readdirsize > nmp->nm_rsize)
209 nmp->nm_readdirsize = nmp->nm_rsize;
210 if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
211 nmp->nm_wsize = maxio;
212 if (nmp->nm_wsize > MAXBSIZE)
213 nmp->nm_wsize = MAXBSIZE;
216 * Calculate the size used for io buffers. Use the larger
217 * of the two sizes to minimise nfs requests but make sure
218 * that it is at least one VM page to avoid wasting buffer
221 iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
222 iosize = imax(iosize, PAGE_SIZE);
223 nmp->nm_mountp->mnt_stat.f_iosize = iosize;
228 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
231 args->version = NFS_ARGSVERSION;
232 args->addr = oargs->addr;
233 args->addrlen = oargs->addrlen;
234 args->sotype = oargs->sotype;
235 args->proto = oargs->proto;
236 args->fh = oargs->fh;
237 args->fhsize = oargs->fhsize;
238 args->flags = oargs->flags;
239 args->wsize = oargs->wsize;
240 args->rsize = oargs->rsize;
241 args->readdirsize = oargs->readdirsize;
242 args->timeo = oargs->timeo;
243 args->retrans = oargs->retrans;
244 args->readahead = oargs->readahead;
245 args->hostname = oargs->hostname;
249 nfs_convert_diskless(void)
252 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
253 sizeof(struct ifaliasreq));
254 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
255 sizeof(struct sockaddr_in));
256 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
257 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
258 nfsv3_diskless.root_fhsize = NFSX_MYFH;
259 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
261 nfsv3_diskless.root_fhsize = NFSX_V2FH;
262 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
264 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
265 sizeof(struct sockaddr_in));
266 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
267 nfsv3_diskless.root_time = nfs_diskless.root_time;
268 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
270 nfs_diskless_valid = 3;
277 nfs_statfs(struct mount *mp, struct statfs *sbp)
281 struct nfsmount *nmp = VFSTONFS(mp);
282 struct nfsvattr nfsva;
285 int error = 0, attrflag, gotfsinfo = 0, ret;
290 error = vfs_busy(mp, MBF_NOWAIT);
293 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
299 mtx_lock(&nmp->nm_mtx);
300 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
301 mtx_unlock(&nmp->nm_mtx);
302 error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
307 mtx_unlock(&nmp->nm_mtx);
309 error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
312 NFSCL_DEBUG(2, "statfs=%d\n", error);
314 ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
315 td->td_ucred, td, &nfsva, NULL, NULL);
318 * Just set default values to get things going.
320 NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
321 nfsva.na_vattr.va_type = VDIR;
322 nfsva.na_vattr.va_mode = 0777;
323 nfsva.na_vattr.va_nlink = 100;
324 nfsva.na_vattr.va_uid = (uid_t)0;
325 nfsva.na_vattr.va_gid = (gid_t)0;
326 nfsva.na_vattr.va_fileid = 2;
327 nfsva.na_vattr.va_gen = 1;
328 nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
329 nfsva.na_vattr.va_size = 512 * 1024;
332 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
334 mtx_lock(&nmp->nm_mtx);
335 if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
336 nfscl_loadfsinfo(nmp, &fs);
337 nfscl_loadsbinfo(nmp, &sb, sbp);
338 sbp->f_iosize = newnfs_iosize(nmp);
339 mtx_unlock(&nmp->nm_mtx);
340 if (sbp != &mp->mnt_stat) {
341 bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
342 bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
344 strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
345 } else if (NFS_ISV4(vp)) {
346 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
354 * nfs version 3 fsinfo rpc call
357 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
361 struct nfsvattr nfsva;
364 error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
367 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
369 mtx_lock(&nmp->nm_mtx);
370 nfscl_loadfsinfo(nmp, &fs);
371 mtx_unlock(&nmp->nm_mtx);
377 * Mount a remote root fs via. nfs. This depends on the info in the
378 * nfs_diskless structure that has been filled in properly by some primary
380 * It goes something like this:
381 * - do enough of "ifconfig" by calling ifioctl() so that the system
382 * can talk to the server
383 * - If nfs_diskless.mygateway is filled in, use that address as
385 * - build the rootfs mount point and call mountnfs() to do the rest.
387 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
388 * structure, as well as other global NFS client variables here, as
389 * nfs_mountroot() will be called once in the boot before any other NFS
390 * client activity occurs.
393 nfs_mountroot(struct mount *mp)
395 struct thread *td = curthread;
396 struct nfsv3_diskless *nd = &nfsv3_diskless;
405 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
406 bootpc_init(); /* use bootp to get nfs_diskless filled in */
407 #elif defined(NFS_ROOT)
408 nfs_setup_diskless();
411 if (nfs_diskless_valid == 0)
413 if (nfs_diskless_valid == 1)
414 nfs_convert_diskless();
417 * XXX splnet, so networks will receive...
422 * Do enough of ifconfig(8) so that the critical net interface can
423 * talk to the server.
425 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
428 panic("nfs_mountroot: socreate(%04x): %d",
429 nd->myif.ifra_addr.sa_family, error);
431 #if 0 /* XXX Bad idea */
433 * We might not have been told the right interface, so we pass
434 * over the first ten interfaces of the same kind, until we get
435 * one of them configured.
438 for (i = strlen(nd->myif.ifra_name) - 1;
439 nd->myif.ifra_name[i] >= '0' &&
440 nd->myif.ifra_name[i] <= '9';
441 nd->myif.ifra_name[i] ++) {
442 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
447 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
449 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
450 if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
451 ir.ifr_mtu = strtol(cp, NULL, 10);
452 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
454 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
456 printf("nfs_mountroot: SIOCSIFMTU: %d", error);
461 * If the gateway field is filled in, set it as the default route.
462 * Note that pxeboot will set a default route of 0 if the route
463 * is not set by the DHCP server. Check also for a value of 0
464 * to avoid panicking inappropriately in that situation.
466 if (nd->mygateway.sin_len != 0 &&
467 nd->mygateway.sin_addr.s_addr != 0) {
468 struct sockaddr_in mask, sin;
470 bzero((caddr_t)&mask, sizeof(mask));
472 sin.sin_family = AF_INET;
473 sin.sin_len = sizeof(sin);
474 /* XXX MRT use table 0 for this sort of thing */
475 CURVNET_SET(TD_TO_VNET(td));
476 error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
477 (struct sockaddr *)&nd->mygateway,
478 (struct sockaddr *)&mask,
479 RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
482 panic("nfs_mountroot: RTM_ADD: %d", error);
486 * Create the rootfs mount point.
488 nd->root_args.fh = nd->root_fh;
489 nd->root_args.fhsize = nd->root_fhsize;
490 l = ntohl(nd->root_saddr.sin_addr.s_addr);
491 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
492 (l >> 24) & 0xff, (l >> 16) & 0xff,
493 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
494 printf("NFS ROOT: %s\n", buf);
495 nd->root_args.hostname = buf;
496 if ((error = nfs_mountdiskless(buf,
497 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
502 * This is not really an nfs issue, but it is much easier to
503 * set hostname here and then let the "/etc/rc.xxx" files
504 * mount the right /var based upon its preset value.
506 mtx_lock(&prison0.pr_mtx);
507 strlcpy(prison0.pr_hostname, nd->my_hostnam,
508 sizeof(prison0.pr_hostname));
509 mtx_unlock(&prison0.pr_mtx);
510 inittodr(ntohl(nd->root_time));
515 * Internal version of mount system call for diskless setup.
518 nfs_mountdiskless(char *path,
519 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
520 struct vnode **vpp, struct mount *mp)
522 struct sockaddr *nam;
527 * Find the directory path in "path", which also has the server's
528 * name/ip address in it.
530 dirpath = strchr(path, ':');
532 dirlen = strlen(++dirpath);
535 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
536 if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
537 NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
538 NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
539 printf("nfs_mountroot: mount %s on /: %d\n", path, error);
546 nfs_sec_name(char *sec, int *flagsp)
548 if (!strcmp(sec, "krb5"))
549 *flagsp |= NFSMNT_KERB;
550 else if (!strcmp(sec, "krb5i"))
551 *flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
552 else if (!strcmp(sec, "krb5p"))
553 *flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
557 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
558 const char *hostname, struct ucred *cred, struct thread *td)
567 * Set read-only flag if requested; otherwise, clear it if this is
568 * an update. If this is not an update, then either the read-only
569 * flag is already clear, or this is a root mount and it was set
570 * intentionally at some previous point.
572 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
574 mp->mnt_flag |= MNT_RDONLY;
576 } else if (mp->mnt_flag & MNT_UPDATE) {
578 mp->mnt_flag &= ~MNT_RDONLY;
583 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
584 * no sense in that context. Also, set up appropriate retransmit
585 * and soft timeout behavior.
587 if (argp->sotype == SOCK_STREAM) {
588 nmp->nm_flag &= ~NFSMNT_NOCONN;
589 nmp->nm_timeo = NFS_MAXTIMEO;
590 if ((argp->flags & NFSMNT_NFSV4) != 0)
591 nmp->nm_retry = INT_MAX;
593 nmp->nm_retry = NFS_RETRANS_TCP;
596 /* Also clear RDIRPLUS if NFSv2, it crashes some servers */
597 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
598 argp->flags &= ~NFSMNT_RDIRPLUS;
599 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
602 /* Re-bind if rsrvd port requested and wasn't on one */
603 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
604 && (argp->flags & NFSMNT_RESVPORT);
605 /* Also re-bind if we're switching to/from a connected UDP socket */
606 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
607 (argp->flags & NFSMNT_NOCONN));
609 /* Update flags atomically. Don't change the lock bits. */
610 nmp->nm_flag = argp->flags | nmp->nm_flag;
613 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
614 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
615 if (nmp->nm_timeo < NFS_MINTIMEO)
616 nmp->nm_timeo = NFS_MINTIMEO;
617 else if (nmp->nm_timeo > NFS_MAXTIMEO)
618 nmp->nm_timeo = NFS_MAXTIMEO;
621 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
622 nmp->nm_retry = argp->retrans;
623 if (nmp->nm_retry > NFS_MAXREXMIT)
624 nmp->nm_retry = NFS_MAXREXMIT;
627 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
628 nmp->nm_wsize = argp->wsize;
630 * Clip at the power of 2 below the size. There is an
631 * issue (not isolated) that causes intermittent page
632 * faults if this is not done.
634 if (nmp->nm_wsize > NFS_FABLKSIZE)
635 nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
637 nmp->nm_wsize = NFS_FABLKSIZE;
640 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
641 nmp->nm_rsize = argp->rsize;
643 * Clip at the power of 2 below the size. There is an
644 * issue (not isolated) that causes intermittent page
645 * faults if this is not done.
647 if (nmp->nm_rsize > NFS_FABLKSIZE)
648 nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
650 nmp->nm_rsize = NFS_FABLKSIZE;
653 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
654 nmp->nm_readdirsize = argp->readdirsize;
657 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
658 nmp->nm_acregmin = argp->acregmin;
660 nmp->nm_acregmin = NFS_MINATTRTIMO;
661 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
662 nmp->nm_acregmax = argp->acregmax;
664 nmp->nm_acregmax = NFS_MAXATTRTIMO;
665 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
666 nmp->nm_acdirmin = argp->acdirmin;
668 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
669 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
670 nmp->nm_acdirmax = argp->acdirmax;
672 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
673 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
674 nmp->nm_acdirmin = nmp->nm_acdirmax;
675 if (nmp->nm_acregmin > nmp->nm_acregmax)
676 nmp->nm_acregmin = nmp->nm_acregmax;
678 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
679 if (argp->readahead <= NFS_MAXRAHEAD)
680 nmp->nm_readahead = argp->readahead;
682 nmp->nm_readahead = NFS_MAXRAHEAD;
684 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
685 if (argp->wcommitsize < nmp->nm_wsize)
686 nmp->nm_wcommitsize = nmp->nm_wsize;
688 nmp->nm_wcommitsize = argp->wcommitsize;
691 adjsock |= ((nmp->nm_sotype != argp->sotype) ||
692 (nmp->nm_soproto != argp->proto));
694 if (nmp->nm_client != NULL && adjsock) {
695 int haslock = 0, error = 0;
697 if (nmp->nm_sotype == SOCK_STREAM) {
698 error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
703 newnfs_disconnect(&nmp->nm_sockreq);
705 newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
706 nmp->nm_sotype = argp->sotype;
707 nmp->nm_soproto = argp->proto;
708 if (nmp->nm_sotype == SOCK_DGRAM)
709 while (newnfs_connect(nmp, &nmp->nm_sockreq,
711 printf("newnfs_args: retrying connect\n");
712 (void) nfs_catnap(PSOCK, 0, "nfscon");
716 nmp->nm_sotype = argp->sotype;
717 nmp->nm_soproto = argp->proto;
720 if (hostname != NULL) {
721 strlcpy(nmp->nm_hostname, hostname,
722 sizeof(nmp->nm_hostname));
723 p = strchr(nmp->nm_hostname, ':');
729 static const char *nfs_opts[] = { "from", "nfs_args",
730 "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
731 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
732 "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
733 "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
734 "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
735 "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
736 "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
737 "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
738 "pnfs", "wcommitsize",
745 * It seems a bit dumb to copyinstr() the host and path here and then
746 * bcopy() them in mountnfs(), but I wanted to detect errors before
747 * doing the sockargs() call because sockargs() allocates an mbuf and
748 * an error after that means that I have to release the mbuf.
752 nfs_mount(struct mount *mp)
754 struct nfs_args args = {
755 .version = NFS_ARGSVERSION,
757 .addrlen = sizeof (struct sockaddr_in),
758 .sotype = SOCK_STREAM,
762 .flags = NFSMNT_RESVPORT,
765 .readdirsize = NFS_READDIRSIZE,
767 .retrans = NFS_RETRANS,
768 .readahead = NFS_DEFRAHEAD,
769 .wcommitsize = 0, /* was: NQ_DEFLEASE */
771 .acregmin = NFS_MINATTRTIMO,
772 .acregmax = NFS_MAXATTRTIMO,
773 .acdirmin = NFS_MINDIRATTRTIMO,
774 .acdirmax = NFS_MAXDIRATTRTIMO,
776 int error = 0, ret, len;
777 struct sockaddr *nam = NULL;
781 u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
782 char *opt, *name, *secname;
783 int nametimeo = NFS_DEFAULT_NAMETIMEO;
784 int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
786 int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
789 has_nfs_args_opt = 0;
790 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
796 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
797 error = nfs_mountroot(mp);
804 * The old mount_nfs program passed the struct nfs_args
805 * from userspace to kernel. The new mount_nfs program
806 * passes string options via nmount() from userspace to kernel
807 * and we populate the struct nfs_args in the kernel.
809 if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
810 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
815 if (args.version != NFS_ARGSVERSION) {
816 error = EPROGMISMATCH;
819 has_nfs_args_opt = 1;
822 /* Handle the new style options. */
823 if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
824 args.acdirmin = args.acdirmax =
825 args.acregmin = args.acregmax = 0;
826 args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
827 NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
829 if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
830 args.flags |= NFSMNT_NOCONN;
831 if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
832 args.flags &= ~NFSMNT_NOCONN;
833 if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
834 args.flags |= NFSMNT_NOLOCKD;
835 if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
836 args.flags &= ~NFSMNT_NOLOCKD;
837 if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
838 args.flags |= NFSMNT_INT;
839 if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
840 args.flags |= NFSMNT_RDIRPLUS;
841 if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
842 args.flags |= NFSMNT_RESVPORT;
843 if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
844 args.flags &= ~NFSMNT_RESVPORT;
845 if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
846 args.flags |= NFSMNT_SOFT;
847 if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
848 args.flags &= ~NFSMNT_SOFT;
849 if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
850 args.sotype = SOCK_DGRAM;
851 if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
852 args.sotype = SOCK_DGRAM;
853 if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
854 args.sotype = SOCK_STREAM;
855 if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
856 args.flags |= NFSMNT_NFSV3;
857 if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
858 args.flags |= NFSMNT_NFSV4;
859 args.sotype = SOCK_STREAM;
861 if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
862 args.flags |= NFSMNT_ALLGSSNAME;
863 if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
864 args.flags |= NFSMNT_NOCTO;
865 if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
866 args.flags |= NFSMNT_NONCONTIGWR;
867 if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
868 args.flags |= NFSMNT_PNFS;
869 if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
871 vfs_mount_error(mp, "illegal readdirsize");
875 ret = sscanf(opt, "%d", &args.readdirsize);
876 if (ret != 1 || args.readdirsize <= 0) {
877 vfs_mount_error(mp, "illegal readdirsize: %s",
882 args.flags |= NFSMNT_READDIRSIZE;
884 if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
886 vfs_mount_error(mp, "illegal readahead");
890 ret = sscanf(opt, "%d", &args.readahead);
891 if (ret != 1 || args.readahead <= 0) {
892 vfs_mount_error(mp, "illegal readahead: %s",
897 args.flags |= NFSMNT_READAHEAD;
899 if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
901 vfs_mount_error(mp, "illegal wsize");
905 ret = sscanf(opt, "%d", &args.wsize);
906 if (ret != 1 || args.wsize <= 0) {
907 vfs_mount_error(mp, "illegal wsize: %s",
912 args.flags |= NFSMNT_WSIZE;
914 if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
916 vfs_mount_error(mp, "illegal rsize");
920 ret = sscanf(opt, "%d", &args.rsize);
921 if (ret != 1 || args.rsize <= 0) {
922 vfs_mount_error(mp, "illegal wsize: %s",
927 args.flags |= NFSMNT_RSIZE;
929 if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
931 vfs_mount_error(mp, "illegal retrans");
935 ret = sscanf(opt, "%d", &args.retrans);
936 if (ret != 1 || args.retrans <= 0) {
937 vfs_mount_error(mp, "illegal retrans: %s",
942 args.flags |= NFSMNT_RETRANS;
944 if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
945 ret = sscanf(opt, "%d", &args.acregmin);
946 if (ret != 1 || args.acregmin < 0) {
947 vfs_mount_error(mp, "illegal actimeo: %s",
952 args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
953 args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
954 NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
956 if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
957 ret = sscanf(opt, "%d", &args.acregmin);
958 if (ret != 1 || args.acregmin < 0) {
959 vfs_mount_error(mp, "illegal acregmin: %s",
964 args.flags |= NFSMNT_ACREGMIN;
966 if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
967 ret = sscanf(opt, "%d", &args.acregmax);
968 if (ret != 1 || args.acregmax < 0) {
969 vfs_mount_error(mp, "illegal acregmax: %s",
974 args.flags |= NFSMNT_ACREGMAX;
976 if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
977 ret = sscanf(opt, "%d", &args.acdirmin);
978 if (ret != 1 || args.acdirmin < 0) {
979 vfs_mount_error(mp, "illegal acdirmin: %s",
984 args.flags |= NFSMNT_ACDIRMIN;
986 if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
987 ret = sscanf(opt, "%d", &args.acdirmax);
988 if (ret != 1 || args.acdirmax < 0) {
989 vfs_mount_error(mp, "illegal acdirmax: %s",
994 args.flags |= NFSMNT_ACDIRMAX;
996 if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
997 ret = sscanf(opt, "%d", &args.wcommitsize);
998 if (ret != 1 || args.wcommitsize < 0) {
999 vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1003 args.flags |= NFSMNT_WCOMMITSIZE;
1005 if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1006 ret = sscanf(opt, "%d", &args.timeo);
1007 if (ret != 1 || args.timeo <= 0) {
1008 vfs_mount_error(mp, "illegal timeo: %s",
1013 args.flags |= NFSMNT_TIMEO;
1015 if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1016 ret = sscanf(opt, "%d", &args.timeo);
1017 if (ret != 1 || args.timeo <= 0) {
1018 vfs_mount_error(mp, "illegal timeout: %s",
1023 args.flags |= NFSMNT_TIMEO;
1025 if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1026 ret = sscanf(opt, "%d", &nametimeo);
1027 if (ret != 1 || nametimeo < 0) {
1028 vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1033 if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1035 ret = sscanf(opt, "%d", &negnametimeo);
1036 if (ret != 1 || negnametimeo < 0) {
1037 vfs_mount_error(mp, "illegal negnametimeo: %s",
1043 if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1045 ret = sscanf(opt, "%d", &minvers);
1046 if (ret != 1 || minvers < 0 || minvers > 1 ||
1047 (args.flags & NFSMNT_NFSV4) == 0) {
1048 vfs_mount_error(mp, "illegal minorversion: %s", opt);
1053 if (vfs_getopt(mp->mnt_optnew, "sec",
1054 (void **) &secname, NULL) == 0)
1055 nfs_sec_name(secname, &args.flags);
1057 if (mp->mnt_flag & MNT_UPDATE) {
1058 struct nfsmount *nmp = VFSTONFS(mp);
1066 * If a change from TCP->UDP is done and there are thread(s)
1067 * that have I/O RPC(s) in progress with a tranfer size
1068 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1069 * hung, retrying the RPC(s) forever. Usually these threads
1070 * will be seen doing an uninterruptible sleep on wait channel
1073 if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1074 tprintf(td->td_proc, LOG_WARNING,
1075 "Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1078 * When doing an update, we can't change version,
1079 * security, switch lockd strategies or change cookie
1082 args.flags = (args.flags &
1088 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1095 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1096 nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1101 * Make the nfs_ip_paranoia sysctl serve as the default connection
1102 * or no-connection mode for those protocols that support
1103 * no-connection mode (the flag will be cleared later for protocols
1104 * that do not support no-connection mode). This will allow a client
1105 * to receive replies from a different IP then the request was
1106 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
1109 if (nfs_ip_paranoia == 0)
1110 args.flags |= NFSMNT_NOCONN;
1112 if (has_nfs_args_opt != 0) {
1114 * In the 'nfs_args' case, the pointers in the args
1115 * structure are in userland - we copy them in here.
1117 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1118 vfs_mount_error(mp, "Bad file handle");
1122 error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1126 error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1129 bzero(&hst[hstlen], MNAMELEN - hstlen);
1130 args.hostname = hst;
1131 /* sockargs() call must be after above copyin() calls */
1132 error = getsockaddr(&nam, (caddr_t)args.addr,
1137 if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1138 &args.fhsize) == 0) {
1139 if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1140 vfs_mount_error(mp, "Bad file handle");
1144 bcopy(args.fh, nfh, args.fhsize);
1148 (void) vfs_getopt(mp->mnt_optnew, "hostname",
1149 (void **)&args.hostname, &len);
1150 if (args.hostname == NULL) {
1151 vfs_mount_error(mp, "Invalid hostname");
1155 bcopy(args.hostname, hst, MNAMELEN);
1156 hst[MNAMELEN - 1] = '\0';
1159 if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1160 strlcpy(srvkrbname, name, sizeof (srvkrbname));
1162 snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1163 srvkrbnamelen = strlen(srvkrbname);
1165 if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1166 strlcpy(krbname, name, sizeof (krbname));
1169 krbnamelen = strlen(krbname);
1171 if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1172 strlcpy(dirpath, name, sizeof (dirpath));
1175 dirlen = strlen(dirpath);
1177 if (has_nfs_args_opt == 0) {
1178 if (vfs_getopt(mp->mnt_optnew, "addr",
1179 (void **)&args.addr, &args.addrlen) == 0) {
1180 if (args.addrlen > SOCK_MAXADDRLEN) {
1181 error = ENAMETOOLONG;
1184 nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1185 bcopy(args.addr, nam, args.addrlen);
1186 nam->sa_len = args.addrlen;
1188 vfs_mount_error(mp, "No server address");
1195 error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1196 dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1197 nametimeo, negnametimeo, minvers);
1201 mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1213 * It seems a bit dumb to copyinstr() the host and path here and then
1214 * bcopy() them in mountnfs(), but I wanted to detect errors before
1215 * doing the sockargs() call because sockargs() allocates an mbuf and
1216 * an error after that means that I have to release the mbuf.
1220 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1223 struct nfs_args args;
1225 error = copyin(data, &args, sizeof (struct nfs_args));
1229 ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1231 error = kernel_mount(ma, flags);
1236 * Common code for mount and mountroot
1239 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1240 char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1241 u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1242 struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1245 struct nfsmount *nmp;
1247 int error, trycnt, ret;
1248 struct nfsvattr nfsva;
1249 struct nfsclclient *clp;
1250 struct nfsclds *dsp, *tdsp;
1252 static u_int64_t clval = 0;
1254 NFSCL_DEBUG(3, "in mnt\n");
1256 if (mp->mnt_flag & MNT_UPDATE) {
1258 printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1259 FREE(nam, M_SONAME);
1262 MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1263 krbnamelen + dirlen + srvkrbnamelen + 2,
1264 M_NEWNFSMNT, M_WAITOK | M_ZERO);
1265 TAILQ_INIT(&nmp->nm_bufq);
1267 clval = (u_int64_t)nfsboottime.tv_sec;
1268 nmp->nm_clval = clval++;
1269 nmp->nm_krbnamelen = krbnamelen;
1270 nmp->nm_dirpathlen = dirlen;
1271 nmp->nm_srvkrbnamelen = srvkrbnamelen;
1272 if (td->td_ucred->cr_uid != (uid_t)0) {
1274 * nm_uid is used to get KerberosV credentials for
1275 * the nfsv4 state handling operations if there is
1276 * no host based principal set. Use the uid of
1277 * this user if not root, since they are doing the
1278 * mount. I don't think setting this for root will
1279 * work, since root normally does not have user
1280 * credentials in a credentials cache.
1282 nmp->nm_uid = td->td_ucred->cr_uid;
1285 * Just set to -1, so it won't be used.
1287 nmp->nm_uid = (uid_t)-1;
1290 /* Copy and null terminate all the names */
1291 if (nmp->nm_krbnamelen > 0) {
1292 bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1293 nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1295 if (nmp->nm_dirpathlen > 0) {
1296 bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1297 nmp->nm_dirpathlen);
1298 nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1301 if (nmp->nm_srvkrbnamelen > 0) {
1302 bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1303 nmp->nm_srvkrbnamelen);
1304 nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1305 + nmp->nm_srvkrbnamelen + 2] = '\0';
1307 nmp->nm_sockreq.nr_cred = crhold(cred);
1308 mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1310 nmp->nm_getinfo = nfs_getnlminfo;
1311 nmp->nm_vinvalbuf = ncl_vinvalbuf;
1314 nmp->nm_mountp = mp;
1315 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1318 * Since nfs_decode_args() might optionally set them, these
1319 * need to be set to defaults before the call, so that the
1320 * optional settings aren't overwritten.
1322 nmp->nm_nametimeo = nametimeo;
1323 nmp->nm_negnametimeo = negnametimeo;
1324 nmp->nm_timeo = NFS_TIMEO;
1325 nmp->nm_retry = NFS_RETRANS;
1326 nmp->nm_readahead = NFS_DEFRAHEAD;
1328 /* This is empirical approximation of sqrt(hibufspace) * 256. */
1329 nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1330 while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1331 nmp->nm_wcommitsize *= 2;
1332 nmp->nm_wcommitsize *= 256;
1334 if ((argp->flags & NFSMNT_NFSV4) != 0)
1335 nmp->nm_minorvers = minvers;
1337 nmp->nm_minorvers = 0;
1339 nfs_decode_args(mp, nmp, argp, hst, cred, td);
1342 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
1343 * high, depending on whether we end up with negative offsets in
1344 * the client or server somewhere. 2GB-1 may be safer.
1346 * For V3, ncl_fsinfo will adjust this as necessary. Assume maximum
1347 * that we can handle until we find out otherwise.
1349 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1350 nmp->nm_maxfilesize = 0xffffffffLL;
1352 nmp->nm_maxfilesize = OFF_MAX;
1354 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1355 nmp->nm_wsize = NFS_WSIZE;
1356 nmp->nm_rsize = NFS_RSIZE;
1357 nmp->nm_readdirsize = NFS_READDIRSIZE;
1359 nmp->nm_numgrps = NFS_MAXGRPS;
1360 nmp->nm_tprintf_delay = nfs_tprintf_delay;
1361 if (nmp->nm_tprintf_delay < 0)
1362 nmp->nm_tprintf_delay = 0;
1363 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1364 if (nmp->nm_tprintf_initial_delay < 0)
1365 nmp->nm_tprintf_initial_delay = 0;
1366 nmp->nm_fhsize = argp->fhsize;
1367 if (nmp->nm_fhsize > 0)
1368 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1369 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1371 /* Set up the sockets and per-host congestion */
1372 nmp->nm_sotype = argp->sotype;
1373 nmp->nm_soproto = argp->proto;
1374 nmp->nm_sockreq.nr_prog = NFS_PROG;
1375 if ((argp->flags & NFSMNT_NFSV4))
1376 nmp->nm_sockreq.nr_vers = NFS_VER4;
1377 else if ((argp->flags & NFSMNT_NFSV3))
1378 nmp->nm_sockreq.nr_vers = NFS_VER3;
1380 nmp->nm_sockreq.nr_vers = NFS_VER2;
1383 if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1385 /* For NFSv4.1, get the clientid now. */
1386 if (nmp->nm_minorvers > 0) {
1387 NFSCL_DEBUG(3, "at getcl\n");
1388 error = nfscl_getcl(mp, cred, td, 0, &clp);
1389 NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1394 if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1395 nmp->nm_dirpathlen > 0) {
1396 NFSCL_DEBUG(3, "in dirp\n");
1398 * If the fhsize on the mount point == 0 for V4, the mount
1399 * path needs to be looked up.
1403 error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1405 NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1407 (void) nfs_catnap(PZERO, error, "nfsgetdirp");
1408 } while (error && --trycnt > 0);
1410 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1416 * A reference count is needed on the nfsnode representing the
1417 * remote root. If this object is not persistent, then backward
1418 * traversals of the mount point (i.e. "..") will not work if
1419 * the nfsnode gets flushed out of the cache. Ufs does not have
1420 * this problem, because one can identify root inodes by their
1421 * number == ROOTINO (2).
1423 if (nmp->nm_fhsize > 0) {
1425 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1426 * non-zero for the root vnode. f_iosize will be set correctly
1427 * by nfs_statfs() before any I/O occurs.
1429 mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1430 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1437 * Get file attributes and transfer parameters for the
1438 * mountpoint. This has the side effect of filling in
1439 * (*vpp)->v_type with the correct value.
1441 ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1442 cred, td, &nfsva, NULL, &lease);
1445 * Just set default values to get things going.
1447 NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1448 nfsva.na_vattr.va_type = VDIR;
1449 nfsva.na_vattr.va_mode = 0777;
1450 nfsva.na_vattr.va_nlink = 100;
1451 nfsva.na_vattr.va_uid = (uid_t)0;
1452 nfsva.na_vattr.va_gid = (gid_t)0;
1453 nfsva.na_vattr.va_fileid = 2;
1454 nfsva.na_vattr.va_gen = 1;
1455 nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1456 nfsva.na_vattr.va_size = 512 * 1024;
1459 (void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1460 if (nmp->nm_minorvers > 0) {
1461 NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1463 clp->nfsc_renew = NFSCL_RENEW(lease);
1464 clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1465 clp->nfsc_clientidrev++;
1466 if (clp->nfsc_clientidrev == 0)
1467 clp->nfsc_clientidrev++;
1470 * Mount will succeed, so the renew thread can be
1473 nfscl_start_renewthread(clp);
1474 nfscl_clientrelease(clp);
1476 if (argp->flags & NFSMNT_NFSV3)
1477 ncl_fsinfo(nmp, *vpp, cred, td);
1479 /* Mark if the mount point supports NFSv4 ACLs. */
1480 if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1482 NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1484 mp->mnt_flag |= MNT_NFS4ACLS;
1489 * Lose the lock but keep the ref.
1491 NFSVOPUNLOCK(*vpp, 0);
1498 nfscl_clientrelease(clp);
1499 newnfs_disconnect(&nmp->nm_sockreq);
1500 crfree(nmp->nm_sockreq.nr_cred);
1501 if (nmp->nm_sockreq.nr_auth != NULL)
1502 AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1503 mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1504 mtx_destroy(&nmp->nm_mtx);
1505 if (nmp->nm_clp != NULL) {
1507 LIST_REMOVE(nmp->nm_clp, nfsc_list);
1509 free(nmp->nm_clp, M_NFSCLCLIENT);
1511 TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1512 nfscl_freenfsclds(dsp);
1513 FREE(nmp, M_NEWNFSMNT);
1514 FREE(nam, M_SONAME);
1519 * unmount system call
1522 nfs_unmount(struct mount *mp, int mntflags)
1525 struct nfsmount *nmp;
1526 int error, flags = 0, i, trycnt = 0;
1527 struct nfsclds *dsp, *tdsp;
1531 if (mntflags & MNT_FORCE)
1532 flags |= FORCECLOSE;
1535 * Goes something like this..
1536 * - Call vflush() to clear out vnodes for this filesystem
1537 * - Close the socket
1538 * - Free up the data structures
1540 /* In the forced case, cancel any outstanding requests. */
1541 if (mntflags & MNT_FORCE) {
1542 error = newnfs_nmcancelreqs(nmp);
1545 /* For a forced close, get rid of the renew thread now */
1546 nfscl_umount(nmp, td);
1548 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1550 error = vflush(mp, 1, flags, td);
1551 if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1552 (void) nfs_catnap(PSOCK, error, "newndm");
1553 } while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1558 * We are now committed to the unmount.
1560 if ((mntflags & MNT_FORCE) == 0)
1561 nfscl_umount(nmp, td);
1562 /* Make sure no nfsiods are assigned to this mount. */
1563 mtx_lock(&ncl_iod_mutex);
1564 for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1565 if (ncl_iodmount[i] == nmp) {
1566 ncl_iodwant[i] = NFSIOD_AVAILABLE;
1567 ncl_iodmount[i] = NULL;
1569 mtx_unlock(&ncl_iod_mutex);
1570 newnfs_disconnect(&nmp->nm_sockreq);
1571 crfree(nmp->nm_sockreq.nr_cred);
1572 FREE(nmp->nm_nam, M_SONAME);
1573 if (nmp->nm_sockreq.nr_auth != NULL)
1574 AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1575 mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1576 mtx_destroy(&nmp->nm_mtx);
1577 TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1578 nfscl_freenfsclds(dsp);
1579 FREE(nmp, M_NEWNFSMNT);
1585 * Return root of a filesystem
1588 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1591 struct nfsmount *nmp;
1596 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1601 * Get transfer parameters and attributes for root vnode once.
1603 mtx_lock(&nmp->nm_mtx);
1604 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1605 mtx_unlock(&nmp->nm_mtx);
1606 ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1608 mtx_unlock(&nmp->nm_mtx);
1609 if (vp->v_type == VNON)
1611 vp->v_vflag |= VV_ROOT;
1617 * Flush out the buffer cache
1621 nfs_sync(struct mount *mp, int waitfor)
1623 struct vnode *vp, *mvp;
1625 int error, allerror = 0;
1631 * If a forced dismount is in progress, return from here so that
1632 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1633 * calling VFS_UNMOUNT().
1635 if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1642 * Force stale buffer cache information to be flushed.
1645 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1646 /* XXX Racy bv_cnt check. */
1647 if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1648 waitfor == MNT_LAZY) {
1652 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1653 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1656 error = VOP_FSYNC(vp, waitfor, td);
1659 NFSVOPUNLOCK(vp, 0);
1666 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1668 struct nfsmount *nmp = VFSTONFS(mp);
1672 bzero(&vq, sizeof(vq));
1675 case VFS_CTL_NOLOCKS:
1676 val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1677 if (req->oldptr != NULL) {
1678 error = SYSCTL_OUT(req, &val, sizeof(val));
1682 if (req->newptr != NULL) {
1683 error = SYSCTL_IN(req, &val, sizeof(val));
1687 nmp->nm_flag |= NFSMNT_NOLOCKS;
1689 nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1694 mtx_lock(&nmp->nm_mtx);
1695 if (nmp->nm_state & NFSSTA_TIMEO)
1696 vq.vq_flags |= VQ_NOTRESP;
1697 mtx_unlock(&nmp->nm_mtx);
1699 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1700 (nmp->nm_state & NFSSTA_LOCKTIMEO))
1701 vq.vq_flags |= VQ_NOTRESPLOCK;
1703 error = SYSCTL_OUT(req, &vq, sizeof(vq));
1706 if (req->oldptr != NULL) {
1707 error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1708 sizeof(nmp->nm_tprintf_initial_delay));
1712 if (req->newptr != NULL) {
1713 error = vfs_suser(mp, req->td);
1716 error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1717 sizeof(nmp->nm_tprintf_initial_delay));
1720 if (nmp->nm_tprintf_initial_delay < 0)
1721 nmp->nm_tprintf_initial_delay = 0;
1731 * Purge any RPCs in progress, so that they will all return errors.
1732 * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1736 nfs_purge(struct mount *mp)
1738 struct nfsmount *nmp = VFSTONFS(mp);
1740 newnfs_nmcancelreqs(nmp);
1744 * Extract the information needed by the nlm from the nfs vnode.
1747 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1748 struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1749 struct timeval *timeop)
1751 struct nfsmount *nmp;
1752 struct nfsnode *np = VTONFS(vp);
1754 nmp = VFSTONFS(vp->v_mount);
1756 *fhlenp = (size_t)np->n_fhp->nfh_len;
1758 bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1760 bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1762 *is_v3p = NFS_ISV3(vp);
1764 *sizep = np->n_size;
1765 if (timeop != NULL) {
1766 timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1767 timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1772 * This function prints out an option name, based on the conditional
1775 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1776 char *opt, char **buf, size_t *blen)
1780 if (testval != 0 && *blen > strlen(opt)) {
1781 len = snprintf(*buf, *blen, "%s", opt);
1782 if (len != strlen(opt))
1790 * This function printf out an options integer value.
1792 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1793 char *opt, char **buf, size_t *blen)
1797 if (*blen > strlen(opt) + 1) {
1798 /* Could result in truncated output string. */
1799 len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1808 * Load the option flags and values into the buffer.
1810 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1817 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1819 if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1820 nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1822 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1825 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1827 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1828 "nfsv2", &buf, &blen);
1829 nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1830 nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1831 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1833 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1835 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1837 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1839 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1841 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1843 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1845 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1846 ",noncontigwr", &buf, &blen);
1847 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1848 0, ",lockd", &buf, &blen);
1849 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1850 NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1851 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1853 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1855 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1856 NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1857 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1858 NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1860 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1861 NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1863 nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1864 nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1865 nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1866 nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1867 nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1868 nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1870 nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1871 nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1872 nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1874 nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1875 nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1877 nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1878 nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);