2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
43 #include "opt_inet6.h"
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/dirent.h>
49 #include <sys/kernel.h>
51 #include <sys/malloc.h>
53 #include <sys/mount.h>
54 #include <sys/mutex.h>
56 #include <sys/rmlock.h>
57 #include <sys/refcount.h>
58 #include <sys/signalvar.h>
59 #include <sys/socket.h>
60 #include <sys/vnode.h>
62 #include <netinet/in.h>
63 #include <net/radix.h>
65 #include <rpc/types.h>
68 static MALLOC_DEFINE(M_NETADDR, "export_host", "Export host address structure");
70 #if defined(INET) || defined(INET6)
71 static struct radix_node_head *vfs_create_addrlist_af(
72 struct radix_node_head **prnh, int off);
74 static int vfs_free_netcred(struct radix_node *rn, void *w);
75 static void vfs_free_addrlist_af(struct radix_node_head **prnh);
76 static int vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
77 struct export_args *argp);
78 static struct netcred *vfs_export_lookup(struct mount *, struct sockaddr *);
81 * Network address lookup element
84 struct radix_node netc_rnodes[2];
85 uint64_t netc_exflags;
86 struct ucred *netc_anon;
87 int netc_numsecflavors;
88 int netc_secflavors[MAXSECFLAVORS];
92 * Network export information
95 struct netcred ne_defexported; /* Default export */
96 struct radix_node_head *ne4;
97 struct radix_node_head *ne6;
101 * Build hash lists of net addresses and hang them off the mount point.
102 * Called by vfs_export() to set up the lists of export addresses.
105 vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
106 struct export_args *argp)
109 struct radix_node_head *rnh;
111 struct radix_node *rn;
112 struct sockaddr *saddr, *smask = NULL;
113 #if defined(INET6) || defined(INET)
118 KASSERT(argp->ex_numsecflavors > 0,
119 ("%s: numsecflavors <= 0", __func__));
120 KASSERT(argp->ex_numsecflavors < MAXSECFLAVORS,
121 ("%s: numsecflavors >= MAXSECFLAVORS", __func__));
124 * XXX: This routine converts from a uid plus gid list
125 * to a `struct ucred' (np->netc_anon). This
126 * operation is questionable; for example, what should be done
127 * with fields like cr_uidinfo and cr_prison? Currently, this
128 * routine does not touch them (leaves them as NULL).
130 if (argp->ex_addrlen == 0) {
131 if (mp->mnt_flag & MNT_DEFEXPORTED) {
133 "MNT_DEFEXPORTED already set for mount %p", mp);
136 np = &nep->ne_defexported;
137 np->netc_exflags = argp->ex_flags;
138 np->netc_anon = crget();
139 np->netc_anon->cr_uid = argp->ex_uid;
140 crsetgroups(np->netc_anon, argp->ex_ngroups,
142 np->netc_anon->cr_prison = &prison0;
143 prison_hold(np->netc_anon->cr_prison);
144 np->netc_numsecflavors = argp->ex_numsecflavors;
145 bcopy(argp->ex_secflavors, np->netc_secflavors,
146 sizeof(np->netc_secflavors));
148 mp->mnt_flag |= MNT_DEFEXPORTED;
154 if (argp->ex_addrlen > MLEN) {
155 vfs_mount_error(mp, "ex_addrlen %d is greater than %d",
156 argp->ex_addrlen, MLEN);
161 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
162 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK | M_ZERO);
163 saddr = (struct sockaddr *) (np + 1);
164 if ((error = copyin(argp->ex_addr, saddr, argp->ex_addrlen)))
166 if (saddr->sa_family == AF_UNSPEC || saddr->sa_family > AF_MAX) {
168 vfs_mount_error(mp, "Invalid saddr->sa_family: %d");
171 if (saddr->sa_len > argp->ex_addrlen)
172 saddr->sa_len = argp->ex_addrlen;
173 if (argp->ex_masklen) {
174 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
175 error = copyin(argp->ex_mask, smask, argp->ex_masklen);
178 if (smask->sa_len > argp->ex_masklen)
179 smask->sa_len = argp->ex_masklen;
182 switch (saddr->sa_family) {
185 if ((rnh = nep->ne4) == NULL) {
186 off = offsetof(struct sockaddr_in, sin_addr) << 3;
187 rnh = vfs_create_addrlist_af(&nep->ne4, off);
193 if ((rnh = nep->ne6) == NULL) {
194 off = offsetof(struct sockaddr_in6, sin6_addr) << 3;
195 rnh = vfs_create_addrlist_af(&nep->ne6, off);
202 vfs_mount_error(mp, "%s %s %d",
203 "Unable to initialize radix node head ",
204 "for address family", saddr->sa_family);
207 RADIX_NODE_HEAD_LOCK(rnh);
208 rn = (*rnh->rnh_addaddr)(saddr, smask, &rnh->rh, np->netc_rnodes);
209 RADIX_NODE_HEAD_UNLOCK(rnh);
210 if (rn == NULL || np != (struct netcred *)rn) { /* already exists */
213 "netcred already exists for given addr/mask");
216 np->netc_exflags = argp->ex_flags;
217 np->netc_anon = crget();
218 np->netc_anon->cr_uid = argp->ex_uid;
219 crsetgroups(np->netc_anon, argp->ex_ngroups,
221 np->netc_anon->cr_prison = &prison0;
222 prison_hold(np->netc_anon->cr_prison);
223 np->netc_numsecflavors = argp->ex_numsecflavors;
224 bcopy(argp->ex_secflavors, np->netc_secflavors,
225 sizeof(np->netc_secflavors));
232 /* Helper for vfs_free_addrlist. */
235 vfs_free_netcred(struct radix_node *rn, void *w)
237 struct radix_node_head *rnh = (struct radix_node_head *) w;
240 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, &rnh->rh);
241 cred = ((struct netcred *)rn)->netc_anon;
248 #if defined(INET) || defined(INET6)
249 static struct radix_node_head *
250 vfs_create_addrlist_af(struct radix_node_head **prnh, int off)
253 if (rn_inithead((void **)prnh, off) == 0)
255 RADIX_NODE_HEAD_LOCK_INIT(*prnh);
261 vfs_free_addrlist_af(struct radix_node_head **prnh)
263 struct radix_node_head *rnh;
266 RADIX_NODE_HEAD_LOCK(rnh);
267 (*rnh->rnh_walktree)(&rnh->rh, vfs_free_netcred, rnh);
268 RADIX_NODE_HEAD_UNLOCK(rnh);
269 RADIX_NODE_HEAD_DESTROY(rnh);
270 rn_detachhead((void **)prnh);
275 * Free the net address hash lists that are hanging off the mount points.
278 vfs_free_addrlist(struct netexport *nep)
282 if (nep->ne4 != NULL)
283 vfs_free_addrlist_af(&nep->ne4);
284 if (nep->ne6 != NULL)
285 vfs_free_addrlist_af(&nep->ne6);
287 cred = nep->ne_defexported.netc_anon;
290 nep->ne_defexported.netc_anon = NULL;
296 * High level function to manipulate export options on a mount point
297 * and the passed in netexport.
298 * Struct export_args *argp is the variable used to twiddle options,
299 * the structure is described in sys/mount.h
300 * The do_exjail argument should be true if *mp is in the mountlist
301 * and false if not. It is not in the mountlist for the NFSv4 rootfs
302 * fake mount point just used for exports.
305 vfs_export(struct mount *mp, struct export_args *argp, bool do_exjail)
307 struct netexport *nep;
313 if ((argp->ex_flags & (MNT_DELEXPORT | MNT_EXPORTED)) == 0)
316 if ((argp->ex_flags & MNT_EXPORTED) != 0 &&
317 (argp->ex_numsecflavors < 0
318 || argp->ex_numsecflavors >= MAXSECFLAVORS))
322 pr = curthread->td_ucred->cr_prison;
323 lockmgr(&mp->mnt_explock, LK_EXCLUSIVE, NULL);
324 nep = mp->mnt_export;
325 if (argp->ex_flags & MNT_DELEXPORT) {
331 if (mp->mnt_exjail != NULL && mp->mnt_exjail->cr_prison != pr &&
334 /* EXDEV will not get logged by mountd(8). */
337 } else if (mp->mnt_exjail != NULL &&
338 mp->mnt_exjail->cr_prison != pr) {
340 /* EPERM will get logged by mountd(8). */
345 if (mp->mnt_flag & MNT_EXPUBLIC) {
346 vfs_setpublicfs(NULL, NULL, NULL);
348 mp->mnt_flag &= ~MNT_EXPUBLIC;
351 vfs_free_addrlist(nep);
352 mp->mnt_export = NULL;
357 mp->mnt_exjail = NULL;
358 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
361 atomic_subtract_int(&pr->pr_exportcnt, 1);
365 if (argp->ex_flags & MNT_EXPORTED) {
368 if (mp->mnt_exjail == NULL) {
370 if (do_exjail && nep != NULL) {
371 vfs_free_addrlist(nep);
372 memset(nep, 0, sizeof(*nep));
375 } else if (mp->mnt_exjail->cr_prison != pr) {
382 nep = malloc(sizeof(struct netexport), M_MOUNT,
384 mp->mnt_export = nep;
387 if (argp->ex_flags & MNT_EXPUBLIC) {
388 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) {
390 mp->mnt_export = NULL;
397 if (do_exjail && mp->mnt_exjail == NULL) {
398 mp->mnt_exjail = crhold(curthread->td_ucred);
399 atomic_add_int(&pr->pr_exportcnt, 1);
401 mp->mnt_flag |= MNT_EXPUBLIC;
404 if (argp->ex_numsecflavors == 0) {
405 argp->ex_numsecflavors = 1;
406 argp->ex_secflavors[0] = AUTH_SYS;
408 if ((error = vfs_hang_addrlist(mp, nep, argp))) {
410 mp->mnt_export = NULL;
416 if (do_exjail && mp->mnt_exjail == NULL) {
417 mp->mnt_exjail = crhold(curthread->td_ucred);
418 atomic_add_int(&pr->pr_exportcnt, 1);
420 mp->mnt_flag |= MNT_EXPORTED;
425 lockmgr(&mp->mnt_explock, LK_RELEASE, NULL);
427 * Once we have executed the vfs_export() command, we do
428 * not want to keep the "export" option around in the
429 * options list, since that will cause subsequent MNT_UPDATE
430 * calls to fail. The export information is saved in
431 * mp->mnt_export, so we can safely delete the "export" mount option
434 vfs_deleteopt(mp->mnt_optnew, "export");
435 vfs_deleteopt(mp->mnt_opt, "export");
440 * Get rid of credential references for this prison.
443 vfs_exjail_delete(struct prison *pr)
450 * Since this function is called from prison_cleanup() after
451 * all processes in the prison have exited, the value of
452 * pr_exportcnt can no longer increase. It is possible for
453 * a dismount of a file system exported within this prison
454 * to be in progress. In this case, the file system is no
455 * longer in the mountlist and the mnt_exjail will be free'd
456 * by vfs_mount_destroy() at some time. As such, pr_exportcnt
457 * and, therefore "i", is the upper bound on the number of
458 * mnt_exjail entries to be found by this function.
460 i = atomic_load_int(&pr->pr_exportcnt);
461 KASSERT(i >= 0, ("vfs_exjail_delete: pr_exportcnt negative"));
464 mtx_lock(&mountlist_mtx);
466 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
468 if (mp->mnt_exjail != NULL &&
469 mp->mnt_exjail->cr_prison == pr) {
471 error = vfs_busy(mp, MBF_MNTLSTLOCK | MBF_NOWAIT);
474 * If the vfs_busy() fails, we still want to
475 * get rid of mnt_exjail for two reasons:
476 * - a credential reference will result in
477 * a prison not being removed
478 * - setting mnt_exjail NULL indicates that
479 * the exports are no longer valid
480 * The now invalid exports will be deleted
481 * when the file system is dismounted or
482 * the file system is re-exported by mountd.
486 if (mp->mnt_exjail != NULL &&
487 mp->mnt_exjail->cr_prison == pr) {
489 mp->mnt_exjail = NULL;
501 lockmgr(&mp->mnt_explock, LK_EXCLUSIVE, NULL);
503 if (mp->mnt_exjail != NULL &&
504 mp->mnt_exjail->cr_prison == pr) {
506 mp->mnt_exjail = NULL;
507 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
509 vfs_free_addrlist(mp->mnt_export);
510 free(mp->mnt_export, M_MOUNT);
511 mp->mnt_export = NULL;
514 lockmgr(&mp->mnt_explock, LK_RELEASE, NULL);
519 mtx_lock(&mountlist_mtx);
527 mtx_unlock(&mountlist_mtx);
531 * Set the publicly exported filesystem (WebNFS). Currently, only
532 * one public filesystem is possible in the spec (RFC 2054 and 2055)
535 vfs_setpublicfs(struct mount *mp, struct netexport *nep,
536 struct export_args *argp)
543 * mp == NULL -> invalidate the current info, the FS is
544 * no longer exported. May be called from either vfs_export
545 * or unmount, so check if it hasn't already been done.
548 if (nfs_pub.np_valid) {
549 nfs_pub.np_valid = 0;
550 if (nfs_pub.np_index != NULL) {
551 free(nfs_pub.np_index, M_TEMP);
552 nfs_pub.np_index = NULL;
559 * Only one allowed at a time.
561 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
565 * Get real filehandle for root of exported FS.
567 bzero(&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
568 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
570 if ((error = VFS_ROOT(mp, LK_EXCLUSIVE, &rvp)))
573 if ((error = VOP_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
579 * If an indexfile was specified, pull it in.
581 if (argp->ex_indexfile != NULL) {
582 if (nfs_pub.np_index == NULL)
583 nfs_pub.np_index = malloc(MAXNAMLEN + 1, M_TEMP,
585 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
586 MAXNAMLEN, (size_t *)0);
589 * Check for illegal filenames.
591 for (cp = nfs_pub.np_index; *cp; cp++) {
599 free(nfs_pub.np_index, M_TEMP);
600 nfs_pub.np_index = NULL;
605 nfs_pub.np_mount = mp;
606 nfs_pub.np_valid = 1;
611 * Used by the filesystems to determine if a given network address
612 * (passed in 'nam') is present in their exports list, returns a pointer
613 * to struct netcred so that the filesystem can examine it for
614 * access rights (read/write/etc).
616 static struct netcred *
617 vfs_export_lookup(struct mount *mp, struct sockaddr *nam)
619 RADIX_NODE_HEAD_RLOCK_TRACKER;
620 struct netexport *nep;
621 struct netcred *np = NULL;
622 struct radix_node_head *rnh;
623 struct sockaddr *saddr;
625 nep = mp->mnt_export;
628 if ((mp->mnt_flag & MNT_EXPORTED) == 0)
632 * Lookup in the export list
637 switch (saddr->sa_family) {
646 RADIX_NODE_HEAD_RLOCK(rnh);
647 np = (struct netcred *) (*rnh->rnh_matchaddr)(saddr, &rnh->rh);
648 RADIX_NODE_HEAD_RUNLOCK(rnh);
649 if (np != NULL && (np->netc_rnodes->rn_flags & RNF_ROOT) != 0)
655 * If no address match, use the default if it exists.
657 if (np == NULL && (mp->mnt_flag & MNT_DEFEXPORTED) != 0)
658 return (&nep->ne_defexported);
664 * XXX: This comment comes from the deprecated ufs_check_export()
665 * XXX: and may not entirely apply, but lacking something better:
666 * This is the generic part of fhtovp called after the underlying
667 * filesystem has validated the file handle.
669 * Verify that a host should have access to a filesystem.
673 vfs_stdcheckexp(struct mount *mp, struct sockaddr *nam, uint64_t *extflagsp,
674 struct ucred **credanonp, int *numsecflavors, int *secflavors)
678 lockmgr(&mp->mnt_explock, LK_SHARED, NULL);
679 np = vfs_export_lookup(mp, nam);
681 lockmgr(&mp->mnt_explock, LK_RELEASE, NULL);
685 *extflagsp = np->netc_exflags;
686 if ((*credanonp = np->netc_anon) != NULL)
689 *numsecflavors = np->netc_numsecflavors;
690 KASSERT(*numsecflavors > 0,
691 ("%s: numsecflavors <= 0", __func__));
692 KASSERT(*numsecflavors < MAXSECFLAVORS,
693 ("%s: numsecflavors >= MAXSECFLAVORS", __func__));
695 if (secflavors && np->netc_numsecflavors > 0)
696 memcpy(secflavors, np->netc_secflavors, np->netc_numsecflavors *
698 lockmgr(&mp->mnt_explock, LK_RELEASE, NULL);