]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/nfsserver/nfs_syscalls.c
This commit was generated by cvs2svn to compensate for changes in r165182,
[FreeBSD/FreeBSD.git] / sys / nfsserver / nfs_syscalls.c
1 /*-
2  * Copyright (c) 1989, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *      @(#)nfs_syscalls.c      8.5 (Berkeley) 3/30/95
33  */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 #include "opt_inet6.h"
39 #include "opt_mac.h"
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/sysproto.h>
44 #include <sys/kernel.h>
45 #include <sys/sysctl.h>
46 #include <sys/file.h>
47 #include <sys/filedesc.h>
48 #include <sys/vnode.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/priv.h>
52 #include <sys/proc.h>
53 #include <sys/bio.h>
54 #include <sys/buf.h>
55 #include <sys/mbuf.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/domain.h>
59 #include <sys/protosw.h>
60 #include <sys/namei.h>
61 #include <sys/fcntl.h>
62 #include <sys/lockf.h>
63
64 #include <netinet/in.h>
65 #include <netinet/tcp.h>
66 #ifdef INET6
67 #include <net/if.h>
68 #include <netinet6/in6_var.h>
69 #endif
70 #include <nfs/xdr_subs.h>
71 #include <nfs/rpcv2.h>
72 #include <nfs/nfsproto.h>
73 #include <nfsserver/nfs.h>
74 #include <nfsserver/nfsm_subs.h>
75 #include <nfsserver/nfsrvcache.h>
76
77 #include <security/mac/mac_framework.h>
78
79 static MALLOC_DEFINE(M_NFSSVC, "nfsserver_srvsock", "Nfs server structure");
80
81 MALLOC_DEFINE(M_NFSRVDESC, "nfsserver_srvdesc", "NFS server socket descriptor");
82 MALLOC_DEFINE(M_NFSD, "nfsserver_daemon", "Nfs server daemon structure");
83
84
85 #define TRUE    1
86 #define FALSE   0
87
88 SYSCTL_DECL(_vfs_nfsrv);
89
90 int             nfsd_waiting = 0;
91 int             nfsrv_numnfsd = 0;
92 static int      notstarted = 1;
93
94 static int      nfs_privport = 0;
95 SYSCTL_INT(_vfs_nfsrv, NFS_NFSPRIVPORT, nfs_privport, CTLFLAG_RW,
96             &nfs_privport, 0, "");
97 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, gatherdelay, CTLFLAG_RW,
98             &nfsrvw_procrastinate, 0, "");
99 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, gatherdelay_v3, CTLFLAG_RW,
100             &nfsrvw_procrastinate_v3, 0, "");
101
102 static int      nfssvc_addsock(struct file *, struct sockaddr *,
103                     struct thread *);
104 static void     nfsrv_zapsock(struct nfssvc_sock *slp);
105 static int      nfssvc_nfsd(struct thread *);
106
107 /*
108  * NFS server system calls
109  */
110
111 /*
112  * Nfs server psuedo system call for the nfsd's
113  * Based on the flag value it either:
114  * - adds a socket to the selection list
115  * - remains in the kernel as an nfsd
116  * - remains in the kernel as an nfsiod
117  * For INET6 we suppose that nfsd provides only IN6P_IPV6_V6ONLY sockets
118  * and that mountd provides
119  *  - sockaddr with no IPv4-mapped addresses
120  *  - mask for both INET and INET6 families if there is IPv4-mapped overlap
121  */
122 #ifndef _SYS_SYSPROTO_H_
123 struct nfssvc_args {
124         int flag;
125         caddr_t argp;
126 };
127 #endif
128 /*
129  * MPSAFE
130  */
131 int
132 nfssvc(struct thread *td, struct nfssvc_args *uap)
133 {
134         struct file *fp;
135         struct sockaddr *nam;
136         struct nfsd_args nfsdarg;
137         int error;
138
139         KASSERT(!mtx_owned(&Giant), ("nfssvc(): called with Giant"));
140
141 #ifdef MAC
142         error = mac_check_system_nfsd(td->td_ucred);
143         if (error)
144                 return (error);
145 #endif
146         error = priv_check(td, PRIV_NFSD);
147         if (error)
148                 return (error);
149         NET_LOCK_GIANT();
150         NFSD_LOCK();
151         while (nfssvc_sockhead_flag & SLP_INIT) {
152                  nfssvc_sockhead_flag |= SLP_WANTINIT;
153                 (void) msleep(&nfssvc_sockhead, &nfsd_mtx, PSOCK,
154                     "nfsd init", 0);
155         }
156         NFSD_UNLOCK();
157         if (uap->flag & NFSSVC_ADDSOCK) {
158                 error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg));
159                 if (error)
160                         goto done2;
161                 if ((error = fget(td, nfsdarg.sock, &fp)) != 0)
162                         goto done2;
163                 if (fp->f_type != DTYPE_SOCKET) {
164                         fdrop(fp, td);
165                         goto done2;
166                 }
167                 /*
168                  * Get the client address for connected sockets.
169                  */
170                 if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
171                         nam = NULL;
172                 else {
173                         error = getsockaddr(&nam, nfsdarg.name,
174                                             nfsdarg.namelen);
175                         if (error) {
176                                 fdrop(fp, td);
177                                 goto done2;
178                         }
179                 }
180                 error = nfssvc_addsock(fp, nam, td);
181                 fdrop(fp, td);
182         } else if (uap->flag & NFSSVC_NFSD) {
183                 error = nfssvc_nfsd(td);
184         } else {
185                 error = ENXIO;
186         }
187         if (error == EINTR || error == ERESTART)
188                 error = 0;
189 done2:
190         NET_UNLOCK_GIANT();
191         return (error);
192 }
193
194 /*
195  * Adds a socket to the list for servicing by nfsds.
196  */
197 static int
198 nfssvc_addsock(struct file *fp, struct sockaddr *mynam, struct thread *td)
199 {
200         int siz;
201         struct nfssvc_sock *slp;
202         struct socket *so;
203         int error, s;
204
205         NET_ASSERT_GIANT();
206
207         so = fp->f_data;
208 #if 0
209         /*
210          * XXXRW: If this code is ever enabled, there's a race when running
211          * MPSAFE.
212          */
213         tslp = NULL;
214         /*
215          * Add it to the list, as required.
216          */
217         if (so->so_proto->pr_protocol == IPPROTO_UDP) {
218                 tslp = nfs_udpsock;
219                 if (tslp->ns_flag & SLP_VALID) {
220                         if (mynam != NULL)
221                                 FREE(mynam, M_SONAME);
222                         return (EPERM);
223                 }
224         }
225 #endif
226         if (so->so_type == SOCK_STREAM)
227                 siz = NFS_MAXPACKET + sizeof (u_long);
228         else
229                 siz = NFS_MAXPACKET;
230         error = soreserve(so, siz, siz);
231         if (error) {
232                 if (mynam != NULL)
233                         FREE(mynam, M_SONAME);
234                 return (error);
235         }
236
237         /*
238          * Set protocol specific options { for now TCP only } and
239          * reserve some space. For datagram sockets, this can get called
240          * repeatedly for the same socket, but that isn't harmful.
241          */
242         if (so->so_type == SOCK_STREAM) {
243                 struct sockopt sopt;
244                 int val;
245
246                 bzero(&sopt, sizeof sopt);
247                 sopt.sopt_dir = SOPT_SET;
248                 sopt.sopt_level = SOL_SOCKET;
249                 sopt.sopt_name = SO_KEEPALIVE;
250                 sopt.sopt_val = &val;
251                 sopt.sopt_valsize = sizeof val;
252                 val = 1;
253                 sosetopt(so, &sopt);
254         }
255         if (so->so_proto->pr_protocol == IPPROTO_TCP) {
256                 struct sockopt sopt;
257                 int val;
258
259                 bzero(&sopt, sizeof sopt);
260                 sopt.sopt_dir = SOPT_SET;
261                 sopt.sopt_level = IPPROTO_TCP;
262                 sopt.sopt_name = TCP_NODELAY;
263                 sopt.sopt_val = &val;
264                 sopt.sopt_valsize = sizeof val;
265                 val = 1;
266                 sosetopt(so, &sopt);
267         }
268         SOCKBUF_LOCK(&so->so_rcv);
269         so->so_rcv.sb_flags &= ~SB_NOINTR;
270         so->so_rcv.sb_timeo = 0;
271         SOCKBUF_UNLOCK(&so->so_rcv);
272         SOCKBUF_LOCK(&so->so_snd);
273         so->so_snd.sb_flags &= ~SB_NOINTR;
274         so->so_snd.sb_timeo = 0;
275         SOCKBUF_UNLOCK(&so->so_snd);
276
277         slp = (struct nfssvc_sock *)
278                 malloc(sizeof (struct nfssvc_sock), M_NFSSVC,
279                 M_WAITOK | M_ZERO);
280         STAILQ_INIT(&slp->ns_rec);
281         NFSD_LOCK();
282         TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain);
283
284         slp->ns_so = so;
285         slp->ns_nam = mynam;
286         fhold(fp);
287         slp->ns_fp = fp;
288         /*
289          * XXXRW: Socket locking here?
290          */
291         s = splnet();
292         so->so_upcallarg = (caddr_t)slp;
293         so->so_upcall = nfsrv_rcv;
294         SOCKBUF_LOCK(&so->so_rcv);
295         so->so_rcv.sb_flags |= SB_UPCALL;
296         SOCKBUF_UNLOCK(&so->so_rcv);
297         slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
298         nfsrv_wakenfsd(slp);
299         splx(s);
300         NFSD_UNLOCK();
301         return (0);
302 }
303
304 /*
305  * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
306  * until it is killed by a signal.
307  */
308 static int
309 nfssvc_nfsd(struct thread *td)
310 {
311         int siz;
312         struct nfssvc_sock *slp;
313         struct nfsd *nfsd;
314         struct nfsrv_descript *nd = NULL;
315         struct mbuf *m, *mreq;
316         int error = 0, cacherep, s, sotype, writes_todo;
317         int procrastinate;
318         u_quad_t cur_usec;
319
320         NET_ASSERT_GIANT();
321
322 #ifndef nolint
323         cacherep = RC_DOIT;
324         writes_todo = 0;
325 #endif
326         nfsd = (struct nfsd *)
327                 malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK | M_ZERO);
328         s = splnet();
329         NFSD_LOCK();
330
331         nfsd->nfsd_td = td;
332         TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
333         nfsrv_numnfsd++;
334
335         /*
336          * Loop getting rpc requests until SIGKILL.
337          */
338         for (;;) {
339                 if ((nfsd->nfsd_flag & NFSD_REQINPROG) == 0) {
340                         while (nfsd->nfsd_slp == NULL &&
341                             (nfsd_head_flag & NFSD_CHECKSLP) == 0) {
342                                 nfsd->nfsd_flag |= NFSD_WAITING;
343                                 nfsd_waiting++;
344                                 error = msleep(nfsd, &nfsd_mtx,
345                                     PSOCK | PCATCH, "-", 0);
346                                 nfsd_waiting--;
347                                 if (error)
348                                         goto done;
349                         }
350                         if (nfsd->nfsd_slp == NULL &&
351                             (nfsd_head_flag & NFSD_CHECKSLP) != 0) {
352                                 TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
353                                     if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
354                                         == (SLP_VALID | SLP_DOREC)) {
355                                             slp->ns_flag &= ~SLP_DOREC;
356                                             slp->ns_sref++;
357                                             nfsd->nfsd_slp = slp;
358                                             break;
359                                     }
360                                 }
361                                 if (slp == NULL)
362                                         nfsd_head_flag &= ~NFSD_CHECKSLP;
363                         }
364                         if ((slp = nfsd->nfsd_slp) == NULL)
365                                 continue;
366                         if (slp->ns_flag & SLP_VALID) {
367                                 if (slp->ns_flag & SLP_DISCONN)
368                                         nfsrv_zapsock(slp);
369                                 else if (slp->ns_flag & SLP_NEEDQ) {
370                                         slp->ns_flag &= ~SLP_NEEDQ;
371                                         (void) nfs_slplock(slp, 1);
372                                         NFSD_UNLOCK();
373                                         nfsrv_rcv(slp->ns_so, (caddr_t)slp,
374                                                 M_TRYWAIT);
375                                         NFSD_LOCK();
376                                         nfs_slpunlock(slp);
377                                 }
378                                 error = nfsrv_dorec(slp, nfsd, &nd);
379                                 cur_usec = nfs_curusec();
380                                 if (error && LIST_FIRST(&slp->ns_tq) &&
381                                     LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec) {
382                                         error = 0;
383                                         cacherep = RC_DOIT;
384                                         writes_todo = 1;
385                                 } else
386                                         writes_todo = 0;
387                                 nfsd->nfsd_flag |= NFSD_REQINPROG;
388                         }
389                 } else {
390                         error = 0;
391                         slp = nfsd->nfsd_slp;
392                 }
393                 if (error || (slp->ns_flag & SLP_VALID) == 0) {
394                         if (nd) {
395                                 if (nd->nd_cr != NULL)
396                                         crfree(nd->nd_cr);
397                                 free((caddr_t)nd, M_NFSRVDESC);
398                                 nd = NULL;
399                         }
400                         nfsd->nfsd_slp = NULL;
401                         nfsd->nfsd_flag &= ~NFSD_REQINPROG;
402                         nfsrv_slpderef(slp);
403                         continue;
404                 }
405                 splx(s);
406                 sotype = slp->ns_so->so_type;
407                 if (nd) {
408                     getmicrotime(&nd->nd_starttime);
409                     if (nd->nd_nam2)
410                         nd->nd_nam = nd->nd_nam2;
411                     else
412                         nd->nd_nam = slp->ns_nam;
413
414                     /*
415                      * Check to see if authorization is needed.
416                      */
417                     cacherep = nfsrv_getcache(nd, &mreq);
418
419                     if (nfs_privport) {
420                         /* Check if source port is privileged */
421                         u_short port;
422                         struct sockaddr *nam = nd->nd_nam;
423                         struct sockaddr_in *sin;
424
425                         sin = (struct sockaddr_in *)nam;
426                         /*
427                          * INET/INET6 - same code:
428                          *    sin_port and sin6_port are at same offset
429                          */
430                         port = ntohs(sin->sin_port);
431                         if (port >= IPPORT_RESERVED &&
432                             nd->nd_procnum != NFSPROC_NULL) {
433 #ifdef INET6
434                             char b6[INET6_ADDRSTRLEN];
435 #if defined(KLD_MODULE)
436         /* Do not use ip6_sprintf: the nfs module should work without INET6. */
437 #define ip6_sprintf(buf, a) \
438          (sprintf((buf), "%x:%x:%x:%x:%x:%x:%x:%x", \
439                   (a)->s6_addr16[0], (a)->s6_addr16[1], \
440                   (a)->s6_addr16[2], (a)->s6_addr16[3], \
441                   (a)->s6_addr16[4], (a)->s6_addr16[5], \
442                   (a)->s6_addr16[6], (a)->s6_addr16[7]), \
443          (buf))
444 #endif
445 #endif
446                             nd->nd_procnum = NFSPROC_NOOP;
447                             nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
448                             cacherep = RC_DOIT;
449                             printf("NFS request from unprivileged port (%s:%d)\n",
450 #ifdef INET6
451                                 sin->sin_family == AF_INET6 ?
452                                     ip6_sprintf(b6, &satosin6(sin)->sin6_addr) :
453 #if defined(KLD_MODULE)
454 #undef ip6_sprintf
455 #endif
456 #endif
457                                     inet_ntoa(sin->sin_addr), port);
458                         }
459                     }
460
461                 }
462
463                 /*
464                  * Loop to get all the write rpc relies that have been
465                  * gathered together.
466                  */
467                 do {
468                     switch (cacherep) {
469                     case RC_DOIT:
470                         if (nd && (nd->nd_flag & ND_NFSV3))
471                             procrastinate = nfsrvw_procrastinate_v3;
472                         else
473                             procrastinate = nfsrvw_procrastinate;
474                         if (writes_todo || (!(nd->nd_flag & ND_NFSV3) &&
475                             nd->nd_procnum == NFSPROC_WRITE &&
476                             procrastinate > 0 && !notstarted))
477                             error = nfsrv_writegather(&nd, slp,
478                                 nfsd->nfsd_td, &mreq);
479                         else
480                             error = (*(nfsrv3_procs[nd->nd_procnum]))(nd,
481                                 slp, nfsd->nfsd_td, &mreq);
482                         if (mreq == NULL)
483                                 break;
484                         if (error != 0 && error != NFSERR_RETVOID) {
485                                 nfsrvstats.srv_errs++;
486                                 nfsrv_updatecache(nd, FALSE, mreq);
487                                 if (nd->nd_nam2)
488                                         FREE(nd->nd_nam2, M_SONAME);
489                                 break;
490                         }
491                         nfsrvstats.srvrpccnt[nd->nd_procnum]++;
492                         nfsrv_updatecache(nd, TRUE, mreq);
493                         nd->nd_mrep = NULL;
494                         /* FALLTHROUGH */
495                     case RC_REPLY:
496                         NFSD_UNLOCK();
497                         siz = m_length(mreq, NULL);
498                         if (siz <= 0 || siz > NFS_MAXPACKET) {
499                                 printf("mbuf siz=%d\n",siz);
500                                 panic("Bad nfs svc reply");
501                         }
502                         m = mreq;
503                         m->m_pkthdr.len = siz;
504                         m->m_pkthdr.rcvif = NULL;
505                         /*
506                          * For stream protocols, prepend a Sun RPC
507                          * Record Mark.
508                          */
509                         if (sotype == SOCK_STREAM) {
510                                 M_PREPEND(m, NFSX_UNSIGNED, M_TRYWAIT);
511                                 *mtod(m, u_int32_t *) = htonl(0x80000000 | siz);
512                         }
513                         NFSD_LOCK();
514                         if (slp->ns_so->so_proto->pr_flags & PR_CONNREQUIRED)
515                                 (void) nfs_slplock(slp, 1);
516                         if (slp->ns_flag & SLP_VALID) {
517                             NFSD_UNLOCK();
518                             error = nfsrv_send(slp->ns_so, nd->nd_nam2, m);
519                             NFSD_LOCK();
520                         } else {
521                             error = EPIPE;
522                             m_freem(m);
523                         }
524                         if (nd->nd_nam2)
525                                 FREE(nd->nd_nam2, M_SONAME);
526                         if (nd->nd_mrep)
527                                 m_freem(nd->nd_mrep);
528                         if (error == EPIPE)
529                                 nfsrv_zapsock(slp);
530                         if (slp->ns_so->so_proto->pr_flags & PR_CONNREQUIRED)
531                                 nfs_slpunlock(slp);
532                         if (error == EINTR || error == ERESTART) {
533                                 if (nd->nd_cr != NULL)
534                                         crfree(nd->nd_cr);
535                                 free((caddr_t)nd, M_NFSRVDESC);
536                                 nfsrv_slpderef(slp);
537                                 s = splnet();
538                                 goto done;
539                         }
540                         break;
541                     case RC_DROPIT:
542                         m_freem(nd->nd_mrep);
543                         if (nd->nd_nam2)
544                                 FREE(nd->nd_nam2, M_SONAME);
545                         break;
546                     };
547                     if (nd) {
548                         if (nd->nd_cr != NULL)
549                                 crfree(nd->nd_cr);
550                         FREE((caddr_t)nd, M_NFSRVDESC);
551                         nd = NULL;
552                     }
553
554                     /*
555                      * Check to see if there are outstanding writes that
556                      * need to be serviced.
557                      */
558                     cur_usec = nfs_curusec();
559                     s = splsoftclock();
560                     if (LIST_FIRST(&slp->ns_tq) &&
561                         LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec) {
562                         cacherep = RC_DOIT;
563                         writes_todo = 1;
564                     } else
565                         writes_todo = 0;
566                     splx(s);
567                 } while (writes_todo);
568                 s = splnet();
569                 if (nfsrv_dorec(slp, nfsd, &nd)) {
570                         nfsd->nfsd_flag &= ~NFSD_REQINPROG;
571                         nfsd->nfsd_slp = NULL;
572                         nfsrv_slpderef(slp);
573                 }
574                 KASSERT(!(debug_mpsafenet == 0 && !mtx_owned(&Giant)),
575                     ("nfssvc_nfsd(): debug.mpsafenet=0 && !Giant"));
576                 KASSERT(!(debug_mpsafenet == 1 && mtx_owned(&Giant)),
577                     ("nfssvc_nfsd(): debug.mpsafenet=1 && Giant"));
578         }
579 done:
580         KASSERT(!(debug_mpsafenet == 0 && !mtx_owned(&Giant)),
581             ("nfssvc_nfsd(): debug.mpsafenet=0 && !Giant"));
582         KASSERT(!(debug_mpsafenet == 1 && mtx_owned(&Giant)),
583             ("nfssvc_nfsd(): debug.mpsafenet=1 && Giant"));
584         TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
585         splx(s);
586         free((caddr_t)nfsd, M_NFSD);
587         if (--nfsrv_numnfsd == 0)
588                 nfsrv_init(TRUE);       /* Reinitialize everything */
589         NFSD_UNLOCK();
590         return (error);
591 }
592
593 /*
594  * Shut down a socket associated with an nfssvc_sock structure.
595  * Should be called with the send lock set, if required.
596  * The trick here is to increment the sref at the start, so that the nfsds
597  * will stop using it and clear ns_flag at the end so that it will not be
598  * reassigned during cleanup.
599  */
600 static void
601 nfsrv_zapsock(struct nfssvc_sock *slp)
602 {
603         struct nfsrv_descript *nwp, *nnwp;
604         struct socket *so;
605         struct file *fp;
606         struct nfsrv_rec *rec;
607         int s;
608
609         NET_ASSERT_GIANT();
610         NFSD_LOCK_ASSERT();
611
612         /*
613          * XXXRW: By clearing all flags, other threads/etc should ignore
614          * this slp and we can safely release nfsd_mtx so we can clean
615          * up the slp safely.
616          */
617         slp->ns_flag &= ~SLP_ALLFLAGS;
618         fp = slp->ns_fp;
619         if (fp) {
620                 NFSD_UNLOCK();
621                 slp->ns_fp = NULL;
622                 so = slp->ns_so;
623                 SOCKBUF_LOCK(&so->so_rcv);
624                 so->so_rcv.sb_flags &= ~SB_UPCALL;
625                 SOCKBUF_UNLOCK(&so->so_rcv);
626                 so->so_upcall = NULL;
627                 so->so_upcallarg = NULL;
628                 soshutdown(so, SHUT_RDWR);
629                 closef(fp, NULL);
630                 NFSD_LOCK();
631                 if (slp->ns_nam)
632                         FREE(slp->ns_nam, M_SONAME);
633                 m_freem(slp->ns_raw);
634                 while ((rec = STAILQ_FIRST(&slp->ns_rec)) != NULL) {
635                         STAILQ_REMOVE_HEAD(&slp->ns_rec, nr_link);
636                         if (rec->nr_address)
637                                 FREE(rec->nr_address, M_SONAME);
638                         m_freem(rec->nr_packet);
639                         free(rec, M_NFSRVDESC);
640                 }
641                 s = splsoftclock();
642                 for (nwp = LIST_FIRST(&slp->ns_tq); nwp; nwp = nnwp) {
643                         nnwp = LIST_NEXT(nwp, nd_tq);
644                         LIST_REMOVE(nwp, nd_tq);
645                         if (nwp->nd_cr != NULL)
646                                 crfree(nwp->nd_cr);
647                         free((caddr_t)nwp, M_NFSRVDESC);
648                 }
649                 LIST_INIT(&slp->ns_tq);
650                 splx(s);
651         }
652 }
653
654 /*
655  * Derefence a server socket structure. If it has no more references and
656  * is no longer valid, you can throw it away.
657  */
658 void
659 nfsrv_slpderef(struct nfssvc_sock *slp)
660 {
661
662         NFSD_LOCK_ASSERT();
663
664         if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
665                 TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
666                 free((caddr_t)slp, M_NFSSVC);
667         }
668 }
669
670 /*
671  * Lock a socket against others.
672  *
673  * XXXRW: Wait argument is always 1 in the caller.  Replace with a real
674  * sleep lock?
675  */
676 int
677 nfs_slplock(struct nfssvc_sock *slp, int wait)
678 {
679         int *statep = &slp->ns_solock;
680
681         NFSD_LOCK_ASSERT();
682
683         if (!wait && (*statep & NFSRV_SNDLOCK))
684                 return(0);      /* already locked, fail */
685         while (*statep & NFSRV_SNDLOCK) {
686                 *statep |= NFSRV_WANTSND;
687                 (void) msleep(statep, &nfsd_mtx, PZERO - 1, "nfsslplck", 0);
688         }
689         *statep |= NFSRV_SNDLOCK;
690         return (1);
691 }
692
693 /*
694  * Unlock the stream socket for others.
695  */
696 void
697 nfs_slpunlock(struct nfssvc_sock *slp)
698 {
699         int *statep = &slp->ns_solock;
700
701         NFSD_LOCK_ASSERT();
702
703         if ((*statep & NFSRV_SNDLOCK) == 0)
704                 panic("nfs slpunlock");
705         *statep &= ~NFSRV_SNDLOCK;
706         if (*statep & NFSRV_WANTSND) {
707                 *statep &= ~NFSRV_WANTSND;
708                 wakeup(statep);
709         }
710 }
711
712 /*
713  * Initialize the data structures for the server.
714  * Handshake with any new nfsds starting up to avoid any chance of
715  * corruption.
716  */
717 void
718 nfsrv_init(int terminating)
719 {
720         struct nfssvc_sock *slp, *nslp;
721
722         NET_ASSERT_GIANT();
723         NFSD_LOCK_ASSERT();
724
725         if (nfssvc_sockhead_flag & SLP_INIT)
726                 panic("nfsd init");
727         nfssvc_sockhead_flag |= SLP_INIT;
728         if (terminating) {
729                 TAILQ_FOREACH_SAFE(slp, &nfssvc_sockhead, ns_chain, nslp) {
730                         if (slp->ns_flag & SLP_VALID)
731                                 nfsrv_zapsock(slp);
732                         TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
733                         free((caddr_t)slp, M_NFSSVC);
734                 }
735                 nfsrv_cleancache();     /* And clear out server cache */
736         } else
737                 nfs_pub.np_valid = 0;
738
739         TAILQ_INIT(&nfssvc_sockhead);
740         nfssvc_sockhead_flag &= ~SLP_INIT;
741         if (nfssvc_sockhead_flag & SLP_WANTINIT) {
742                 nfssvc_sockhead_flag &= ~SLP_WANTINIT;
743                 wakeup(&nfssvc_sockhead);
744         }
745
746         TAILQ_INIT(&nfsd_head);
747         nfsd_head_flag &= ~NFSD_CHECKSLP;
748
749 #if 0
750         nfs_udpsock = (struct nfssvc_sock *)
751             malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK | M_ZERO);
752         STAILQ_INIT(&nfs_udpsock->ns_rec);
753         TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain);
754
755         nfs_cltpsock = (struct nfssvc_sock *)
756             malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK | M_ZERO);
757         STAILQ_INIT(&nfs_cltpsock->ns_rec);
758         TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain);
759 #endif
760 }