2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
38 #include "opt_inet6.h"
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/sysproto.h>
43 #include <sys/kernel.h>
44 #include <sys/sysctl.h>
46 #include <sys/filedesc.h>
47 #include <sys/vnode.h>
48 #include <sys/malloc.h>
49 #include <sys/mount.h>
55 #include <sys/socket.h>
56 #include <sys/socketvar.h>
57 #include <sys/domain.h>
58 #include <sys/protosw.h>
59 #include <sys/namei.h>
60 #include <sys/fcntl.h>
61 #include <sys/lockf.h>
63 #include <netinet/in.h>
64 #include <netinet/tcp.h>
67 #include <netinet6/in6_var.h>
69 #include <nfs/xdr_subs.h>
70 #include <nfs/rpcv2.h>
71 #include <nfs/nfsproto.h>
72 #include <nfsserver/nfs.h>
73 #include <nfsserver/nfsm_subs.h>
74 #include <nfsserver/nfsrvcache.h>
76 static MALLOC_DEFINE(M_NFSSVC, "nfss_srvsock", "Nfs server structure");
78 MALLOC_DEFINE(M_NFSRVDESC, "nfss_srvdesc", "NFS server socket descriptor");
79 MALLOC_DEFINE(M_NFSD, "nfss_daemon", "Nfs server daemon structure");
84 SYSCTL_DECL(_vfs_nfsrv);
87 int nfsrv_numnfsd = 0;
88 static int notstarted = 1;
90 static int nfs_privport = 0;
91 SYSCTL_INT(_vfs_nfsrv, NFS_NFSPRIVPORT, nfs_privport, CTLFLAG_RW,
92 &nfs_privport, 0, "");
93 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, gatherdelay, CTLFLAG_RW,
94 &nfsrvw_procrastinate, 0, "");
95 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, gatherdelay_v3, CTLFLAG_RW,
96 &nfsrvw_procrastinate_v3, 0, "");
98 static int nfssvc_addsock(struct file *, struct sockaddr *,
100 static void nfsrv_zapsock(struct nfssvc_sock *slp);
101 static int nfssvc_nfsd(struct thread *);
104 * NFS server system calls
108 * Nfs server psuedo system call for the nfsd's
109 * Based on the flag value it either:
110 * - adds a socket to the selection list
111 * - remains in the kernel as an nfsd
112 * - remains in the kernel as an nfsiod
113 * For INET6 we suppose that nfsd provides only IN6P_IPV6_V6ONLY sockets
114 * and that mountd provides
115 * - sockaddr with no IPv4-mapped addresses
116 * - mask for both INET and INET6 families if there is IPv4-mapped overlap
118 #ifndef _SYS_SYSPROTO_H_
125 nfssvc(struct thread *td, struct nfssvc_args *uap)
128 struct sockaddr *nam;
129 struct nfsd_args nfsdarg;
132 KASSERT(!mtx_owned(&Giant), ("nfssvc(): called with Giant"));
134 error = priv_check(td, PRIV_NFS_DAEMON);
139 while (nfssvc_sockhead_flag & SLP_INIT) {
140 nfssvc_sockhead_flag |= SLP_WANTINIT;
141 (void) msleep(&nfssvc_sockhead, &nfsd_mtx, PSOCK,
145 if (uap->flag & NFSSVC_ADDSOCK) {
146 error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg));
149 if ((error = fget(td, nfsdarg.sock, &fp)) != 0)
151 if (fp->f_type != DTYPE_SOCKET) {
156 * Get the client address for connected sockets.
158 if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
161 error = getsockaddr(&nam, nfsdarg.name,
168 error = nfssvc_addsock(fp, nam, td);
170 } else if (uap->flag & NFSSVC_NFSD) {
171 error = nfssvc_nfsd(td);
175 if (error == EINTR || error == ERESTART)
183 * Adds a socket to the list for servicing by nfsds.
186 nfssvc_addsock(struct file *fp, struct sockaddr *mynam, struct thread *td)
189 struct nfssvc_sock *slp;
198 * XXXRW: If this code is ever enabled, there's a race when running
203 * Add it to the list, as required.
205 if (so->so_proto->pr_protocol == IPPROTO_UDP) {
207 if (tslp->ns_flag & SLP_VALID) {
209 FREE(mynam, M_SONAME);
214 if (so->so_type == SOCK_STREAM)
215 siz = NFS_MAXPACKET + sizeof (u_long);
218 error = soreserve(so, siz, siz);
221 FREE(mynam, M_SONAME);
226 * Set protocol specific options { for now TCP only } and
227 * reserve some space. For datagram sockets, this can get called
228 * repeatedly for the same socket, but that isn't harmful.
230 if (so->so_type == SOCK_STREAM) {
234 bzero(&sopt, sizeof sopt);
235 sopt.sopt_dir = SOPT_SET;
236 sopt.sopt_level = SOL_SOCKET;
237 sopt.sopt_name = SO_KEEPALIVE;
238 sopt.sopt_val = &val;
239 sopt.sopt_valsize = sizeof val;
243 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
247 bzero(&sopt, sizeof sopt);
248 sopt.sopt_dir = SOPT_SET;
249 sopt.sopt_level = IPPROTO_TCP;
250 sopt.sopt_name = TCP_NODELAY;
251 sopt.sopt_val = &val;
252 sopt.sopt_valsize = sizeof val;
256 SOCKBUF_LOCK(&so->so_rcv);
257 so->so_rcv.sb_flags &= ~SB_NOINTR;
258 so->so_rcv.sb_timeo = 0;
259 SOCKBUF_UNLOCK(&so->so_rcv);
260 SOCKBUF_LOCK(&so->so_snd);
261 so->so_snd.sb_flags &= ~SB_NOINTR;
262 so->so_snd.sb_timeo = 0;
263 SOCKBUF_UNLOCK(&so->so_snd);
265 slp = (struct nfssvc_sock *)
266 malloc(sizeof (struct nfssvc_sock), M_NFSSVC,
268 STAILQ_INIT(&slp->ns_rec);
270 TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain);
277 * XXXRW: Socket locking here?
280 so->so_upcallarg = (caddr_t)slp;
281 so->so_upcall = nfsrv_rcv;
282 SOCKBUF_LOCK(&so->so_rcv);
283 so->so_rcv.sb_flags |= SB_UPCALL;
284 SOCKBUF_UNLOCK(&so->so_rcv);
285 slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
293 * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
294 * until it is killed by a signal.
297 nfssvc_nfsd(struct thread *td)
300 struct nfssvc_sock *slp;
302 struct nfsrv_descript *nd = NULL;
303 struct mbuf *m, *mreq;
304 int error = 0, cacherep, s, sotype, writes_todo;
314 nfsd = (struct nfsd *)
315 malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK | M_ZERO);
320 TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
324 * Loop getting rpc requests until SIGKILL.
327 if ((nfsd->nfsd_flag & NFSD_REQINPROG) == 0) {
328 while (nfsd->nfsd_slp == NULL &&
329 (nfsd_head_flag & NFSD_CHECKSLP) == 0) {
330 nfsd->nfsd_flag |= NFSD_WAITING;
332 error = msleep(nfsd, &nfsd_mtx,
333 PSOCK | PCATCH, "-", 0);
338 if (nfsd->nfsd_slp == NULL &&
339 (nfsd_head_flag & NFSD_CHECKSLP) != 0) {
340 TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
341 if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
342 == (SLP_VALID | SLP_DOREC)) {
343 slp->ns_flag &= ~SLP_DOREC;
345 nfsd->nfsd_slp = slp;
350 nfsd_head_flag &= ~NFSD_CHECKSLP;
352 if ((slp = nfsd->nfsd_slp) == NULL)
354 if (slp->ns_flag & SLP_VALID) {
355 if (slp->ns_flag & SLP_DISCONN)
357 else if (slp->ns_flag & SLP_NEEDQ) {
358 slp->ns_flag &= ~SLP_NEEDQ;
359 (void) nfs_slplock(slp, 1);
361 nfsrv_rcv(slp->ns_so, (caddr_t)slp,
366 error = nfsrv_dorec(slp, nfsd, &nd);
367 cur_usec = nfs_curusec();
368 if (error && LIST_FIRST(&slp->ns_tq) &&
369 LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec) {
375 nfsd->nfsd_flag |= NFSD_REQINPROG;
379 slp = nfsd->nfsd_slp;
381 if (error || (slp->ns_flag & SLP_VALID) == 0) {
383 if (nd->nd_cr != NULL)
385 free((caddr_t)nd, M_NFSRVDESC);
388 nfsd->nfsd_slp = NULL;
389 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
394 sotype = slp->ns_so->so_type;
396 getmicrotime(&nd->nd_starttime);
398 nd->nd_nam = nd->nd_nam2;
400 nd->nd_nam = slp->ns_nam;
403 * Check to see if authorization is needed.
405 cacherep = nfsrv_getcache(nd, &mreq);
408 /* Check if source port is privileged */
410 struct sockaddr *nam = nd->nd_nam;
411 struct sockaddr_in *sin;
413 sin = (struct sockaddr_in *)nam;
415 * INET/INET6 - same code:
416 * sin_port and sin6_port are at same offset
418 port = ntohs(sin->sin_port);
419 if (port >= IPPORT_RESERVED &&
420 nd->nd_procnum != NFSPROC_NULL) {
422 char b6[INET6_ADDRSTRLEN];
423 #if defined(KLD_MODULE)
424 /* Do not use ip6_sprintf: the nfs module should work without INET6. */
425 #define ip6_sprintf(buf, a) \
426 (sprintf((buf), "%x:%x:%x:%x:%x:%x:%x:%x", \
427 (a)->s6_addr16[0], (a)->s6_addr16[1], \
428 (a)->s6_addr16[2], (a)->s6_addr16[3], \
429 (a)->s6_addr16[4], (a)->s6_addr16[5], \
430 (a)->s6_addr16[6], (a)->s6_addr16[7]), \
434 nd->nd_procnum = NFSPROC_NOOP;
435 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
437 printf("NFS request from unprivileged port (%s:%d)\n",
439 sin->sin_family == AF_INET6 ?
440 ip6_sprintf(b6, &satosin6(sin)->sin6_addr) :
441 #if defined(KLD_MODULE)
445 inet_ntoa(sin->sin_addr), port);
452 * Loop to get all the write rpc relies that have been
458 if (nd && (nd->nd_flag & ND_NFSV3))
459 procrastinate = nfsrvw_procrastinate_v3;
461 procrastinate = nfsrvw_procrastinate;
463 if (writes_todo || (!(nd->nd_flag & ND_NFSV3) &&
464 nd->nd_procnum == NFSPROC_WRITE &&
465 procrastinate > 0 && !notstarted))
466 error = nfsrv_writegather(&nd, slp,
467 nfsd->nfsd_td, &mreq);
469 error = (*(nfsrv3_procs[nd->nd_procnum]))(nd,
470 slp, nfsd->nfsd_td, &mreq);
474 if (error != 0 && error != NFSERR_RETVOID) {
475 nfsrvstats.srv_errs++;
476 nfsrv_updatecache(nd, FALSE, mreq);
478 FREE(nd->nd_nam2, M_SONAME);
481 nfsrvstats.srvrpccnt[nd->nd_procnum]++;
482 nfsrv_updatecache(nd, TRUE, mreq);
487 siz = m_length(mreq, NULL);
488 if (siz <= 0 || siz > NFS_MAXPACKET) {
489 printf("mbuf siz=%d\n",siz);
490 panic("Bad nfs svc reply");
493 m->m_pkthdr.len = siz;
494 m->m_pkthdr.rcvif = NULL;
496 * For stream protocols, prepend a Sun RPC
499 if (sotype == SOCK_STREAM) {
500 M_PREPEND(m, NFSX_UNSIGNED, M_TRYWAIT);
501 *mtod(m, u_int32_t *) = htonl(0x80000000 | siz);
504 if (slp->ns_so->so_proto->pr_flags & PR_CONNREQUIRED)
505 (void) nfs_slplock(slp, 1);
506 if (slp->ns_flag & SLP_VALID) {
508 error = nfsrv_send(slp->ns_so, nd->nd_nam2, m);
515 FREE(nd->nd_nam2, M_SONAME);
517 m_freem(nd->nd_mrep);
520 if (slp->ns_so->so_proto->pr_flags & PR_CONNREQUIRED)
522 if (error == EINTR || error == ERESTART) {
523 if (nd->nd_cr != NULL)
525 free((caddr_t)nd, M_NFSRVDESC);
532 m_freem(nd->nd_mrep);
534 FREE(nd->nd_nam2, M_SONAME);
538 if (nd->nd_cr != NULL)
540 FREE((caddr_t)nd, M_NFSRVDESC);
545 * Check to see if there are outstanding writes that
546 * need to be serviced.
548 cur_usec = nfs_curusec();
550 if (LIST_FIRST(&slp->ns_tq) &&
551 LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec) {
557 } while (writes_todo);
559 if (nfsrv_dorec(slp, nfsd, &nd)) {
560 nfsd->nfsd_flag &= ~NFSD_REQINPROG;
561 nfsd->nfsd_slp = NULL;
564 KASSERT(!(debug_mpsafenet == 0 && !mtx_owned(&Giant)),
565 ("nfssvc_nfsd(): debug.mpsafenet=0 && !Giant"));
566 KASSERT(!(debug_mpsafenet == 1 && mtx_owned(&Giant)),
567 ("nfssvc_nfsd(): debug.mpsafenet=1 && Giant"));
570 KASSERT(!(debug_mpsafenet == 0 && !mtx_owned(&Giant)),
571 ("nfssvc_nfsd(): debug.mpsafenet=0 && !Giant"));
572 KASSERT(!(debug_mpsafenet == 1 && mtx_owned(&Giant)),
573 ("nfssvc_nfsd(): debug.mpsafenet=1 && Giant"));
574 TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
576 free((caddr_t)nfsd, M_NFSD);
577 if (--nfsrv_numnfsd == 0)
578 nfsrv_init(TRUE); /* Reinitialize everything */
584 * Shut down a socket associated with an nfssvc_sock structure.
585 * Should be called with the send lock set, if required.
586 * The trick here is to increment the sref at the start, so that the nfsds
587 * will stop using it and clear ns_flag at the end so that it will not be
588 * reassigned during cleanup.
591 nfsrv_zapsock(struct nfssvc_sock *slp)
593 struct nfsrv_descript *nwp, *nnwp;
596 struct nfsrv_rec *rec;
603 * XXXRW: By clearing all flags, other threads/etc should ignore
604 * this slp and we can safely release nfsd_mtx so we can clean
607 slp->ns_flag &= ~SLP_ALLFLAGS;
613 SOCKBUF_LOCK(&so->so_rcv);
614 so->so_rcv.sb_flags &= ~SB_UPCALL;
615 SOCKBUF_UNLOCK(&so->so_rcv);
616 so->so_upcall = NULL;
617 so->so_upcallarg = NULL;
618 soshutdown(so, SHUT_RDWR);
622 FREE(slp->ns_nam, M_SONAME);
623 m_freem(slp->ns_raw);
624 while ((rec = STAILQ_FIRST(&slp->ns_rec)) != NULL) {
625 STAILQ_REMOVE_HEAD(&slp->ns_rec, nr_link);
627 FREE(rec->nr_address, M_SONAME);
628 m_freem(rec->nr_packet);
629 free(rec, M_NFSRVDESC);
632 for (nwp = LIST_FIRST(&slp->ns_tq); nwp; nwp = nnwp) {
633 nnwp = LIST_NEXT(nwp, nd_tq);
634 LIST_REMOVE(nwp, nd_tq);
635 if (nwp->nd_cr != NULL)
637 free((caddr_t)nwp, M_NFSRVDESC);
639 LIST_INIT(&slp->ns_tq);
645 * Derefence a server socket structure. If it has no more references and
646 * is no longer valid, you can throw it away.
649 nfsrv_slpderef(struct nfssvc_sock *slp)
654 if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
655 TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
656 free((caddr_t)slp, M_NFSSVC);
661 * Lock a socket against others.
663 * XXXRW: Wait argument is always 1 in the caller. Replace with a real
667 nfs_slplock(struct nfssvc_sock *slp, int wait)
669 int *statep = &slp->ns_solock;
673 if (!wait && (*statep & NFSRV_SNDLOCK))
674 return(0); /* already locked, fail */
675 while (*statep & NFSRV_SNDLOCK) {
676 *statep |= NFSRV_WANTSND;
677 (void) msleep(statep, &nfsd_mtx, PZERO - 1, "nfsslplck", 0);
679 *statep |= NFSRV_SNDLOCK;
684 * Unlock the stream socket for others.
687 nfs_slpunlock(struct nfssvc_sock *slp)
689 int *statep = &slp->ns_solock;
693 if ((*statep & NFSRV_SNDLOCK) == 0)
694 panic("nfs slpunlock");
695 *statep &= ~NFSRV_SNDLOCK;
696 if (*statep & NFSRV_WANTSND) {
697 *statep &= ~NFSRV_WANTSND;
703 * Initialize the data structures for the server.
704 * Handshake with any new nfsds starting up to avoid any chance of
708 nfsrv_init(int terminating)
710 struct nfssvc_sock *slp, *nslp;
715 if (nfssvc_sockhead_flag & SLP_INIT)
717 nfssvc_sockhead_flag |= SLP_INIT;
719 TAILQ_FOREACH_SAFE(slp, &nfssvc_sockhead, ns_chain, nslp) {
720 if (slp->ns_flag & SLP_VALID)
722 TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
723 free((caddr_t)slp, M_NFSSVC);
725 nfsrv_cleancache(); /* And clear out server cache */
727 nfs_pub.np_valid = 0;
729 TAILQ_INIT(&nfssvc_sockhead);
730 nfssvc_sockhead_flag &= ~SLP_INIT;
731 if (nfssvc_sockhead_flag & SLP_WANTINIT) {
732 nfssvc_sockhead_flag &= ~SLP_WANTINIT;
733 wakeup(&nfssvc_sockhead);
736 TAILQ_INIT(&nfsd_head);
737 nfsd_head_flag &= ~NFSD_CHECKSLP;
740 nfs_udpsock = (struct nfssvc_sock *)
741 malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK | M_ZERO);
742 STAILQ_INIT(&nfs_udpsock->ns_rec);
743 TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain);
745 nfs_cltpsock = (struct nfssvc_sock *)
746 malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK | M_ZERO);
747 STAILQ_INIT(&nfs_cltpsock->ns_rec);
748 TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain);