2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
39 #include <sys/cdefs.h>
41 #include "opt_inet6.h"
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/dirent.h>
47 #include <sys/kernel.h>
49 #include <sys/malloc.h>
51 #include <sys/mount.h>
52 #include <sys/mutex.h>
54 #include <sys/rmlock.h>
55 #include <sys/refcount.h>
56 #include <sys/signalvar.h>
57 #include <sys/socket.h>
58 #include <sys/vnode.h>
60 #include <netinet/in.h>
61 #include <net/radix.h>
63 #include <rpc/types.h>
66 static MALLOC_DEFINE(M_NETADDR, "export_host", "Export host address structure");
68 #if defined(INET) || defined(INET6)
69 static struct radix_node_head *vfs_create_addrlist_af(
70 struct radix_node_head **prnh, int off);
72 static int vfs_free_netcred(struct radix_node *rn, void *w);
73 static void vfs_free_addrlist_af(struct radix_node_head **prnh);
74 static int vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
75 struct export_args *argp);
76 static struct netcred *vfs_export_lookup(struct mount *, struct sockaddr *);
79 * Network address lookup element
82 struct radix_node netc_rnodes[2];
83 uint64_t netc_exflags;
84 struct ucred *netc_anon;
85 int netc_numsecflavors;
86 int netc_secflavors[MAXSECFLAVORS];
90 * Network export information
93 struct netcred ne_defexported; /* Default export */
94 struct radix_node_head *ne4;
95 struct radix_node_head *ne6;
99 * Build hash lists of net addresses and hang them off the mount point.
100 * Called by vfs_export() to set up the lists of export addresses.
103 vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
104 struct export_args *argp)
107 struct radix_node_head *rnh;
109 struct radix_node *rn;
110 struct sockaddr *saddr, *smask = NULL;
111 #if defined(INET6) || defined(INET)
116 KASSERT(argp->ex_numsecflavors > 0,
117 ("%s: numsecflavors <= 0", __func__));
118 KASSERT(argp->ex_numsecflavors < MAXSECFLAVORS,
119 ("%s: numsecflavors >= MAXSECFLAVORS", __func__));
122 * XXX: This routine converts from a uid plus gid list
123 * to a `struct ucred' (np->netc_anon). This
124 * operation is questionable; for example, what should be done
125 * with fields like cr_uidinfo and cr_prison? Currently, this
126 * routine does not touch them (leaves them as NULL).
128 if (argp->ex_addrlen == 0) {
129 if (mp->mnt_flag & MNT_DEFEXPORTED) {
131 "MNT_DEFEXPORTED already set for mount %p", mp);
134 np = &nep->ne_defexported;
135 np->netc_exflags = argp->ex_flags;
136 np->netc_anon = crget();
137 np->netc_anon->cr_uid = argp->ex_uid;
138 crsetgroups(np->netc_anon, argp->ex_ngroups,
140 np->netc_anon->cr_prison = &prison0;
141 prison_hold(np->netc_anon->cr_prison);
142 np->netc_numsecflavors = argp->ex_numsecflavors;
143 bcopy(argp->ex_secflavors, np->netc_secflavors,
144 sizeof(np->netc_secflavors));
146 mp->mnt_flag |= MNT_DEFEXPORTED;
152 if (argp->ex_addrlen > MLEN) {
153 vfs_mount_error(mp, "ex_addrlen %d is greater than %d",
154 argp->ex_addrlen, MLEN);
159 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
160 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK | M_ZERO);
161 saddr = (struct sockaddr *) (np + 1);
162 if ((error = copyin(argp->ex_addr, saddr, argp->ex_addrlen)))
164 if (saddr->sa_family == AF_UNSPEC || saddr->sa_family > AF_MAX) {
166 vfs_mount_error(mp, "Invalid saddr->sa_family: %d");
169 if (saddr->sa_len > argp->ex_addrlen)
170 saddr->sa_len = argp->ex_addrlen;
171 if (argp->ex_masklen) {
172 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
173 error = copyin(argp->ex_mask, smask, argp->ex_masklen);
176 if (smask->sa_len > argp->ex_masklen)
177 smask->sa_len = argp->ex_masklen;
180 switch (saddr->sa_family) {
183 if ((rnh = nep->ne4) == NULL) {
184 off = offsetof(struct sockaddr_in, sin_addr) << 3;
185 rnh = vfs_create_addrlist_af(&nep->ne4, off);
191 if ((rnh = nep->ne6) == NULL) {
192 off = offsetof(struct sockaddr_in6, sin6_addr) << 3;
193 rnh = vfs_create_addrlist_af(&nep->ne6, off);
200 vfs_mount_error(mp, "%s %s %d",
201 "Unable to initialize radix node head ",
202 "for address family", saddr->sa_family);
205 RADIX_NODE_HEAD_LOCK(rnh);
206 rn = (*rnh->rnh_addaddr)(saddr, smask, &rnh->rh, np->netc_rnodes);
207 RADIX_NODE_HEAD_UNLOCK(rnh);
208 if (rn == NULL || np != (struct netcred *)rn) { /* already exists */
211 "netcred already exists for given addr/mask");
214 np->netc_exflags = argp->ex_flags;
215 np->netc_anon = crget();
216 np->netc_anon->cr_uid = argp->ex_uid;
217 crsetgroups(np->netc_anon, argp->ex_ngroups,
219 np->netc_anon->cr_prison = &prison0;
220 prison_hold(np->netc_anon->cr_prison);
221 np->netc_numsecflavors = argp->ex_numsecflavors;
222 bcopy(argp->ex_secflavors, np->netc_secflavors,
223 sizeof(np->netc_secflavors));
230 /* Helper for vfs_free_addrlist. */
233 vfs_free_netcred(struct radix_node *rn, void *w)
235 struct radix_node_head *rnh = (struct radix_node_head *) w;
238 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, &rnh->rh);
239 cred = ((struct netcred *)rn)->netc_anon;
246 #if defined(INET) || defined(INET6)
247 static struct radix_node_head *
248 vfs_create_addrlist_af(struct radix_node_head **prnh, int off)
251 if (rn_inithead((void **)prnh, off) == 0)
253 RADIX_NODE_HEAD_LOCK_INIT(*prnh);
259 vfs_free_addrlist_af(struct radix_node_head **prnh)
261 struct radix_node_head *rnh;
264 RADIX_NODE_HEAD_LOCK(rnh);
265 (*rnh->rnh_walktree)(&rnh->rh, vfs_free_netcred, rnh);
266 RADIX_NODE_HEAD_UNLOCK(rnh);
267 RADIX_NODE_HEAD_DESTROY(rnh);
268 rn_detachhead((void **)prnh);
273 * Free the net address hash lists that are hanging off the mount points.
276 vfs_free_addrlist(struct netexport *nep)
280 if (nep->ne4 != NULL)
281 vfs_free_addrlist_af(&nep->ne4);
282 if (nep->ne6 != NULL)
283 vfs_free_addrlist_af(&nep->ne6);
285 cred = nep->ne_defexported.netc_anon;
288 nep->ne_defexported.netc_anon = NULL;
294 * High level function to manipulate export options on a mount point
295 * and the passed in netexport.
296 * Struct export_args *argp is the variable used to twiddle options,
297 * the structure is described in sys/mount.h
298 * The do_exjail argument should be true if *mp is in the mountlist
299 * and false if not. It is not in the mountlist for the NFSv4 rootfs
300 * fake mount point just used for exports.
303 vfs_export(struct mount *mp, struct export_args *argp, int do_exjail)
305 struct netexport *nep;
311 if ((argp->ex_flags & (MNT_DELEXPORT | MNT_EXPORTED)) == 0)
314 if ((argp->ex_flags & MNT_EXPORTED) != 0 &&
315 (argp->ex_numsecflavors < 0
316 || argp->ex_numsecflavors >= MAXSECFLAVORS))
320 pr = curthread->td_ucred->cr_prison;
321 lockmgr(&mp->mnt_explock, LK_EXCLUSIVE, NULL);
322 nep = mp->mnt_export;
323 if (argp->ex_flags & MNT_DELEXPORT) {
329 if (mp->mnt_exjail != NULL && mp->mnt_exjail->cr_prison != pr &&
332 /* EXDEV will not get logged by mountd(8). */
335 } else if (mp->mnt_exjail != NULL &&
336 mp->mnt_exjail->cr_prison != pr) {
338 /* EPERM will get logged by mountd(8). */
343 if (mp->mnt_flag & MNT_EXPUBLIC) {
344 vfs_setpublicfs(NULL, NULL, NULL);
346 mp->mnt_flag &= ~MNT_EXPUBLIC;
349 vfs_free_addrlist(nep);
350 mp->mnt_export = NULL;
355 mp->mnt_exjail = NULL;
356 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
359 atomic_subtract_int(&pr->pr_exportcnt, 1);
363 if (argp->ex_flags & MNT_EXPORTED) {
366 if (mp->mnt_exjail == NULL) {
368 if (do_exjail && nep != NULL) {
369 vfs_free_addrlist(nep);
370 memset(nep, 0, sizeof(*nep));
373 } else if (mp->mnt_exjail->cr_prison != pr) {
380 nep = malloc(sizeof(struct netexport), M_MOUNT,
382 mp->mnt_export = nep;
385 if (argp->ex_flags & MNT_EXPUBLIC) {
386 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) {
388 mp->mnt_export = NULL;
395 if (do_exjail && mp->mnt_exjail == NULL) {
396 mp->mnt_exjail = crhold(curthread->td_ucred);
397 atomic_add_int(&pr->pr_exportcnt, 1);
399 mp->mnt_flag |= MNT_EXPUBLIC;
402 if (argp->ex_numsecflavors == 0) {
403 argp->ex_numsecflavors = 1;
404 argp->ex_secflavors[0] = AUTH_SYS;
406 if ((error = vfs_hang_addrlist(mp, nep, argp))) {
408 mp->mnt_export = NULL;
414 if (do_exjail && mp->mnt_exjail == NULL) {
415 mp->mnt_exjail = crhold(curthread->td_ucred);
416 atomic_add_int(&pr->pr_exportcnt, 1);
418 mp->mnt_flag |= MNT_EXPORTED;
423 lockmgr(&mp->mnt_explock, LK_RELEASE, NULL);
425 * Once we have executed the vfs_export() command, we do
426 * not want to keep the "export" option around in the
427 * options list, since that will cause subsequent MNT_UPDATE
428 * calls to fail. The export information is saved in
429 * mp->mnt_export, so we can safely delete the "export" mount option
432 vfs_deleteopt(mp->mnt_optnew, "export");
433 vfs_deleteopt(mp->mnt_opt, "export");
438 * Get rid of credential references for this prison.
441 vfs_exjail_delete(struct prison *pr)
448 * Since this function is called from prison_cleanup() after
449 * all processes in the prison have exited, the value of
450 * pr_exportcnt can no longer increase. It is possible for
451 * a dismount of a file system exported within this prison
452 * to be in progress. In this case, the file system is no
453 * longer in the mountlist and the mnt_exjail will be free'd
454 * by vfs_mount_destroy() at some time. As such, pr_exportcnt
455 * and, therefore "i", is the upper bound on the number of
456 * mnt_exjail entries to be found by this function.
458 i = atomic_load_int(&pr->pr_exportcnt);
459 KASSERT(i >= 0, ("vfs_exjail_delete: pr_exportcnt negative"));
462 mtx_lock(&mountlist_mtx);
464 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
466 if (mp->mnt_exjail != NULL &&
467 mp->mnt_exjail->cr_prison == pr) {
469 error = vfs_busy(mp, MBF_MNTLSTLOCK | MBF_NOWAIT);
472 * If the vfs_busy() fails, we still want to
473 * get rid of mnt_exjail for two reasons:
474 * - a credential reference will result in
475 * a prison not being removed
476 * - setting mnt_exjail NULL indicates that
477 * the exports are no longer valid
478 * The now invalid exports will be deleted
479 * when the file system is dismounted or
480 * the file system is re-exported by mountd.
484 if (mp->mnt_exjail != NULL &&
485 mp->mnt_exjail->cr_prison == pr) {
487 mp->mnt_exjail = NULL;
499 lockmgr(&mp->mnt_explock, LK_EXCLUSIVE, NULL);
501 if (mp->mnt_exjail != NULL &&
502 mp->mnt_exjail->cr_prison == pr) {
504 mp->mnt_exjail = NULL;
505 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
507 vfs_free_addrlist(mp->mnt_export);
508 free(mp->mnt_export, M_MOUNT);
509 mp->mnt_export = NULL;
512 lockmgr(&mp->mnt_explock, LK_RELEASE, NULL);
517 mtx_lock(&mountlist_mtx);
525 mtx_unlock(&mountlist_mtx);
529 * Set the publicly exported filesystem (WebNFS). Currently, only
530 * one public filesystem is possible in the spec (RFC 2054 and 2055)
533 vfs_setpublicfs(struct mount *mp, struct netexport *nep,
534 struct export_args *argp)
541 * mp == NULL -> invalidate the current info, the FS is
542 * no longer exported. May be called from either vfs_export
543 * or unmount, so check if it hasn't already been done.
546 if (nfs_pub.np_valid) {
547 nfs_pub.np_valid = 0;
548 if (nfs_pub.np_index != NULL) {
549 free(nfs_pub.np_index, M_TEMP);
550 nfs_pub.np_index = NULL;
557 * Only one allowed at a time.
559 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
563 * Get real filehandle for root of exported FS.
565 bzero(&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
566 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
568 if ((error = VFS_ROOT(mp, LK_EXCLUSIVE, &rvp)))
571 if ((error = VOP_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
577 * If an indexfile was specified, pull it in.
579 if (argp->ex_indexfile != NULL) {
580 if (nfs_pub.np_index == NULL)
581 nfs_pub.np_index = malloc(MAXNAMLEN + 1, M_TEMP,
583 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
584 MAXNAMLEN, (size_t *)0);
587 * Check for illegal filenames.
589 for (cp = nfs_pub.np_index; *cp; cp++) {
597 free(nfs_pub.np_index, M_TEMP);
598 nfs_pub.np_index = NULL;
603 nfs_pub.np_mount = mp;
604 nfs_pub.np_valid = 1;
609 * Used by the filesystems to determine if a given network address
610 * (passed in 'nam') is present in their exports list, returns a pointer
611 * to struct netcred so that the filesystem can examine it for
612 * access rights (read/write/etc).
614 static struct netcred *
615 vfs_export_lookup(struct mount *mp, struct sockaddr *nam)
617 RADIX_NODE_HEAD_RLOCK_TRACKER;
618 struct netexport *nep;
619 struct netcred *np = NULL;
620 struct radix_node_head *rnh;
621 struct sockaddr *saddr;
623 nep = mp->mnt_export;
626 if ((mp->mnt_flag & MNT_EXPORTED) == 0)
630 * Lookup in the export list
635 switch (saddr->sa_family) {
644 RADIX_NODE_HEAD_RLOCK(rnh);
645 np = (struct netcred *) (*rnh->rnh_matchaddr)(saddr, &rnh->rh);
646 RADIX_NODE_HEAD_RUNLOCK(rnh);
647 if (np != NULL && (np->netc_rnodes->rn_flags & RNF_ROOT) != 0)
653 * If no address match, use the default if it exists.
655 if (np == NULL && (mp->mnt_flag & MNT_DEFEXPORTED) != 0)
656 return (&nep->ne_defexported);
662 * XXX: This comment comes from the deprecated ufs_check_export()
663 * XXX: and may not entirely apply, but lacking something better:
664 * This is the generic part of fhtovp called after the underlying
665 * filesystem has validated the file handle.
667 * Verify that a host should have access to a filesystem.
671 vfs_stdcheckexp(struct mount *mp, struct sockaddr *nam, uint64_t *extflagsp,
672 struct ucred **credanonp, int *numsecflavors, int *secflavors)
676 lockmgr(&mp->mnt_explock, LK_SHARED, NULL);
677 np = vfs_export_lookup(mp, nam);
679 lockmgr(&mp->mnt_explock, LK_RELEASE, NULL);
683 *extflagsp = np->netc_exflags;
684 if ((*credanonp = np->netc_anon) != NULL)
687 *numsecflavors = np->netc_numsecflavors;
688 KASSERT(*numsecflavors > 0,
689 ("%s: numsecflavors <= 0", __func__));
690 KASSERT(*numsecflavors < MAXSECFLAVORS,
691 ("%s: numsecflavors >= MAXSECFLAVORS", __func__));
693 if (secflavors && np->netc_numsecflavors > 0)
694 memcpy(secflavors, np->netc_secflavors, np->netc_numsecflavors *
696 lockmgr(&mp->mnt_explock, LK_RELEASE, NULL);