]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/fs/nfsserver/nfs_nfsdstate.c
MFC: r276193
[FreeBSD/stable/10.git] / sys / fs / nfsserver / nfs_nfsdstate.c
1 /*-
2  * Copyright (c) 2009 Rick Macklem, University of Guelph
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #ifndef APPLEKEXT
32 #include <fs/nfs/nfsport.h>
33
34 struct nfsrv_stablefirst nfsrv_stablefirst;
35 int nfsrv_issuedelegs = 0;
36 int nfsrv_dolocallocks = 0;
37 struct nfsv4lock nfsv4rootfs_lock;
38
39 extern int newnfs_numnfsd;
40 extern struct nfsstats newnfsstats;
41 extern int nfsrv_lease;
42 extern struct timeval nfsboottime;
43 extern u_int32_t newnfs_true, newnfs_false;
44 NFSV4ROOTLOCKMUTEX;
45 NFSSTATESPINLOCK;
46
47 /*
48  * Hash lists for nfs V4.
49  * (Some would put them in the .h file, but I don't like declaring storage
50  *  in a .h)
51  */
52 struct nfsclienthashhead nfsclienthash[NFSCLIENTHASHSIZE];
53 struct nfslockhashhead nfslockhash[NFSLOCKHASHSIZE];
54 struct nfssessionhash nfssessionhash[NFSSESSIONHASHSIZE];
55 #endif  /* !APPLEKEXT */
56
57 static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
58 static time_t nfsrvboottime;
59 static int nfsrv_writedelegifpos = 1;
60 static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
61 static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
62 static int nfsrv_nogsscallback = 0;
63
64 /* local functions */
65 static void nfsrv_dumpaclient(struct nfsclient *clp,
66     struct nfsd_dumpclients *dumpp);
67 static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
68     NFSPROC_T *p);
69 static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
70     NFSPROC_T *p);
71 static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
72     NFSPROC_T *p);
73 static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp,
74     int cansleep, NFSPROC_T *p);
75 static void nfsrv_freenfslock(struct nfslock *lop);
76 static void nfsrv_freenfslockfile(struct nfslockfile *lfp);
77 static void nfsrv_freedeleg(struct nfsstate *);
78 static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, 
79     u_int32_t flags, struct nfsstate **stpp);
80 static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
81     struct nfsstate **stpp);
82 static int nfsrv_getlockfh(vnode_t vp, u_short flags,
83     struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p);
84 static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
85     struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
86 static void nfsrv_insertlock(struct nfslock *new_lop,
87     struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp);
88 static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
89     struct nfslock **other_lopp, struct nfslockfile *lfp);
90 static int nfsrv_getipnumber(u_char *cp);
91 static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
92     nfsv4stateid_t *stateidp, int specialid);
93 static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
94     u_int32_t flags);
95 static int nfsrv_docallback(struct nfsclient *clp, int procnum,
96     nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
97     struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p);
98 static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
99     uint32_t callback, int op, const char *optag, struct nfsdsession **sepp);
100 static u_int32_t nfsrv_nextclientindex(void);
101 static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
102 static void nfsrv_markstable(struct nfsclient *clp);
103 static int nfsrv_checkstable(struct nfsclient *clp);
104 static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct 
105     vnode *vp, NFSPROC_T *p);
106 static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp,
107     NFSPROC_T *p, vnode_t vp);
108 static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
109     struct nfsclient *clp, int *haslockp, NFSPROC_T *p);
110 static int nfsrv_notsamecredname(struct nfsrv_descript *nd,
111     struct nfsclient *clp);
112 static time_t nfsrv_leaseexpiry(void);
113 static void nfsrv_delaydelegtimeout(struct nfsstate *stp);
114 static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
115     struct nfsstate *stp, struct nfsrvcache *op);
116 static int nfsrv_nootherstate(struct nfsstate *stp);
117 static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
118     uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p);
119 static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp,
120     uint64_t init_first, uint64_t init_end, NFSPROC_T *p);
121 static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags,
122     int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp,
123     NFSPROC_T *p);
124 static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp,
125     NFSPROC_T *p);
126 static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags,
127     uint64_t first, uint64_t end);
128 static void nfsrv_locklf(struct nfslockfile *lfp);
129 static void nfsrv_unlocklf(struct nfslockfile *lfp);
130 static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
131 static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid);
132 static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
133     int dont_replycache, struct nfsdsession **sepp);
134 static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
135
136 /*
137  * Scan the client list for a match and either return the current one,
138  * create a new entry or return an error.
139  * If returning a non-error, the clp structure must either be linked into
140  * the client list or free'd.
141  */
142 APPLESTATIC int
143 nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
144     nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p)
145 {
146         struct nfsclient *clp = NULL, *new_clp = *new_clpp;
147         int i, error = 0;
148         struct nfsstate *stp, *tstp;
149         struct sockaddr_in *sad, *rad;
150         int zapit = 0, gotit, hasstate = 0, igotlock;
151         static u_int64_t confirm_index = 0;
152
153         /*
154          * Check for state resource limit exceeded.
155          */
156         if (nfsrv_openpluslock > NFSRV_V4STATELIMIT) {
157                 error = NFSERR_RESOURCE;
158                 goto out;
159         }
160
161         if (nfsrv_issuedelegs == 0 ||
162             ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0))
163                 /*
164                  * Don't do callbacks when delegations are disabled or
165                  * for AUTH_GSS unless enabled via nfsrv_nogsscallback.
166                  * If establishing a callback connection is attempted
167                  * when a firewall is blocking the callback path, the
168                  * server may wait too long for the connect attempt to
169                  * succeed during the Open. Some clients, such as Linux,
170                  * may timeout and give up on the Open before the server
171                  * replies. Also, since AUTH_GSS callbacks are not
172                  * yet interoperability tested, they might cause the
173                  * server to crap out, if they get past the Init call to
174                  * the client.
175                  */
176                 new_clp->lc_program = 0;
177
178         /* Lock out other nfsd threads */
179         NFSLOCKV4ROOTMUTEX();
180         nfsv4_relref(&nfsv4rootfs_lock);
181         do {
182                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
183                     NFSV4ROOTLOCKMUTEXPTR, NULL);
184         } while (!igotlock);
185         NFSUNLOCKV4ROOTMUTEX();
186
187         /*
188          * Search for a match in the client list.
189          */
190         gotit = i = 0;
191         while (i < NFSCLIENTHASHSIZE && !gotit) {
192             LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
193                 if (new_clp->lc_idlen == clp->lc_idlen &&
194                     !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
195                         gotit = 1;
196                         break;
197                 }
198             }
199             i++;
200         }
201         if (!gotit ||
202             (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
203                 if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
204                         /*
205                          * For NFSv4.1, if confirmp->lval[1] is non-zero, the
206                          * client is trying to update a confirmed clientid.
207                          */
208                         NFSLOCKV4ROOTMUTEX();
209                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
210                         NFSUNLOCKV4ROOTMUTEX();
211                         confirmp->lval[1] = 0;
212                         error = NFSERR_NOENT;
213                         goto out;
214                 }
215                 /*
216                  * Get rid of the old one.
217                  */
218                 if (i != NFSCLIENTHASHSIZE) {
219                         LIST_REMOVE(clp, lc_hash);
220                         nfsrv_cleanclient(clp, p);
221                         nfsrv_freedeleglist(&clp->lc_deleg);
222                         nfsrv_freedeleglist(&clp->lc_olddeleg);
223                         zapit = 1;
224                 }
225                 /*
226                  * Add it after assigning a client id to it.
227                  */
228                 new_clp->lc_flags |= LCL_NEEDSCONFIRM;
229                 if ((nd->nd_flag & ND_NFSV41) != 0)
230                         new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
231                             ++confirm_index;
232                 else
233                         confirmp->qval = new_clp->lc_confirm.qval =
234                             ++confirm_index;
235                 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
236                     (u_int32_t)nfsrvboottime;
237                 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
238                     nfsrv_nextclientindex();
239                 new_clp->lc_stateindex = 0;
240                 new_clp->lc_statemaxindex = 0;
241                 new_clp->lc_cbref = 0;
242                 new_clp->lc_expiry = nfsrv_leaseexpiry();
243                 LIST_INIT(&new_clp->lc_open);
244                 LIST_INIT(&new_clp->lc_deleg);
245                 LIST_INIT(&new_clp->lc_olddeleg);
246                 LIST_INIT(&new_clp->lc_session);
247                 for (i = 0; i < NFSSTATEHASHSIZE; i++)
248                         LIST_INIT(&new_clp->lc_stateid[i]);
249                 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
250                     lc_hash);
251                 newnfsstats.srvclients++;
252                 nfsrv_openpluslock++;
253                 nfsrv_clients++;
254                 NFSLOCKV4ROOTMUTEX();
255                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
256                 NFSUNLOCKV4ROOTMUTEX();
257                 if (zapit)
258                         nfsrv_zapclient(clp, p);
259                 *new_clpp = NULL;
260                 goto out;
261         }
262
263         /*
264          * Now, handle the cases where the id is already issued.
265          */
266         if (nfsrv_notsamecredname(nd, clp)) {
267             /*
268              * Check to see if there is expired state that should go away.
269              */
270             if (clp->lc_expiry < NFSD_MONOSEC &&
271                 (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
272                 nfsrv_cleanclient(clp, p);
273                 nfsrv_freedeleglist(&clp->lc_deleg);
274             }
275
276             /*
277              * If there is outstanding state, then reply NFSERR_CLIDINUSE per
278              * RFC3530 Sec. 8.1.2 last para.
279              */
280             if (!LIST_EMPTY(&clp->lc_deleg)) {
281                 hasstate = 1;
282             } else if (LIST_EMPTY(&clp->lc_open)) {
283                 hasstate = 0;
284             } else {
285                 hasstate = 0;
286                 /* Look for an Open on the OpenOwner */
287                 LIST_FOREACH(stp, &clp->lc_open, ls_list) {
288                     if (!LIST_EMPTY(&stp->ls_open)) {
289                         hasstate = 1;
290                         break;
291                     }
292                 }
293             }
294             if (hasstate) {
295                 /*
296                  * If the uid doesn't match, return NFSERR_CLIDINUSE after
297                  * filling out the correct ipaddr and portnum.
298                  */
299                 sad = NFSSOCKADDR(new_clp->lc_req.nr_nam, struct sockaddr_in *);
300                 rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *);
301                 sad->sin_addr.s_addr = rad->sin_addr.s_addr;
302                 sad->sin_port = rad->sin_port;
303                 NFSLOCKV4ROOTMUTEX();
304                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
305                 NFSUNLOCKV4ROOTMUTEX();
306                 error = NFSERR_CLIDINUSE;
307                 goto out;
308             }
309         }
310
311         if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) {
312                 /*
313                  * If the verifier has changed, the client has rebooted
314                  * and a new client id is issued. The old state info
315                  * can be thrown away once the SETCLIENTID_CONFIRM occurs.
316                  */
317                 LIST_REMOVE(clp, lc_hash);
318                 new_clp->lc_flags |= LCL_NEEDSCONFIRM;
319                 if ((nd->nd_flag & ND_NFSV41) != 0)
320                         new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
321                             ++confirm_index;
322                 else
323                         confirmp->qval = new_clp->lc_confirm.qval =
324                             ++confirm_index;
325                 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
326                     nfsrvboottime;
327                 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
328                     nfsrv_nextclientindex();
329                 new_clp->lc_stateindex = 0;
330                 new_clp->lc_statemaxindex = 0;
331                 new_clp->lc_cbref = 0;
332                 new_clp->lc_expiry = nfsrv_leaseexpiry();
333
334                 /*
335                  * Save the state until confirmed.
336                  */
337                 LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
338                 LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
339                         tstp->ls_clp = new_clp;
340                 LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
341                 LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
342                         tstp->ls_clp = new_clp;
343                 LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg,
344                     ls_list);
345                 LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
346                         tstp->ls_clp = new_clp;
347                 for (i = 0; i < NFSSTATEHASHSIZE; i++) {
348                         LIST_NEWHEAD(&new_clp->lc_stateid[i],
349                             &clp->lc_stateid[i], ls_hash);
350                         LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
351                                 tstp->ls_clp = new_clp;
352                 }
353                 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
354                     lc_hash);
355                 newnfsstats.srvclients++;
356                 nfsrv_openpluslock++;
357                 nfsrv_clients++;
358                 NFSLOCKV4ROOTMUTEX();
359                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
360                 NFSUNLOCKV4ROOTMUTEX();
361
362                 /*
363                  * Must wait until any outstanding callback on the old clp
364                  * completes.
365                  */
366                 NFSLOCKSTATE();
367                 while (clp->lc_cbref) {
368                         clp->lc_flags |= LCL_WAKEUPWANTED;
369                         (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
370                             "nfsd clp", 10 * hz);
371                 }
372                 NFSUNLOCKSTATE();
373                 nfsrv_zapclient(clp, p);
374                 *new_clpp = NULL;
375                 goto out;
376         }
377
378         /* For NFSv4.1, mark that we found a confirmed clientid. */
379         if ((nd->nd_flag & ND_NFSV41) != 0)
380                 confirmp->lval[1] = 1;
381         else {
382                 /*
383                  * id and verifier match, so update the net address info
384                  * and get rid of any existing callback authentication
385                  * handle, so a new one will be acquired.
386                  */
387                 LIST_REMOVE(clp, lc_hash);
388                 new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
389                 new_clp->lc_expiry = nfsrv_leaseexpiry();
390                 confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
391                 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
392                     clp->lc_clientid.lval[0];
393                 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
394                     clp->lc_clientid.lval[1];
395                 new_clp->lc_delegtime = clp->lc_delegtime;
396                 new_clp->lc_stateindex = clp->lc_stateindex;
397                 new_clp->lc_statemaxindex = clp->lc_statemaxindex;
398                 new_clp->lc_cbref = 0;
399                 LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
400                 LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
401                         tstp->ls_clp = new_clp;
402                 LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
403                 LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
404                         tstp->ls_clp = new_clp;
405                 LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
406                 LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
407                         tstp->ls_clp = new_clp;
408                 for (i = 0; i < NFSSTATEHASHSIZE; i++) {
409                         LIST_NEWHEAD(&new_clp->lc_stateid[i],
410                             &clp->lc_stateid[i], ls_hash);
411                         LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
412                                 tstp->ls_clp = new_clp;
413                 }
414                 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
415                     lc_hash);
416                 newnfsstats.srvclients++;
417                 nfsrv_openpluslock++;
418                 nfsrv_clients++;
419         }
420         NFSLOCKV4ROOTMUTEX();
421         nfsv4_unlock(&nfsv4rootfs_lock, 1);
422         NFSUNLOCKV4ROOTMUTEX();
423
424         if ((nd->nd_flag & ND_NFSV41) == 0) {
425                 /*
426                  * Must wait until any outstanding callback on the old clp
427                  * completes.
428                  */
429                 NFSLOCKSTATE();
430                 while (clp->lc_cbref) {
431                         clp->lc_flags |= LCL_WAKEUPWANTED;
432                         (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
433                             "nfsdclp", 10 * hz);
434                 }
435                 NFSUNLOCKSTATE();
436                 nfsrv_zapclient(clp, p);
437                 *new_clpp = NULL;
438         }
439
440 out:
441         NFSEXITCODE2(error, nd);
442         return (error);
443 }
444
445 /*
446  * Check to see if the client id exists and optionally confirm it.
447  */
448 APPLESTATIC int
449 nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
450     struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram,
451     struct nfsrv_descript *nd, NFSPROC_T *p)
452 {
453         struct nfsclient *clp;
454         struct nfsstate *stp;
455         int i;
456         struct nfsclienthashhead *hp;
457         int error = 0, igotlock, doneok;
458         struct nfssessionhash *shp;
459         struct nfsdsession *sep;
460         uint64_t sessid[2];
461         static uint64_t next_sess = 0;
462
463         if (clpp)
464                 *clpp = NULL;
465         if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 ||
466             opflags != CLOPS_RENEW) && nfsrvboottime != clientid.lval[0]) {
467                 error = NFSERR_STALECLIENTID;
468                 goto out;
469         }
470
471         /*
472          * If called with opflags == CLOPS_RENEW, the State Lock is
473          * already held. Otherwise, we need to get either that or,
474          * for the case of Confirm, lock out the nfsd threads.
475          */
476         if (opflags & CLOPS_CONFIRM) {
477                 NFSLOCKV4ROOTMUTEX();
478                 nfsv4_relref(&nfsv4rootfs_lock);
479                 do {
480                         igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
481                             NFSV4ROOTLOCKMUTEXPTR, NULL);
482                 } while (!igotlock);
483                 /*
484                  * Create a new sessionid here, since we need to do it where
485                  * there is a mutex held to serialize update of next_sess.
486                  */
487                 if ((nd->nd_flag & ND_NFSV41) != 0) {
488                         sessid[0] = ++next_sess;
489                         sessid[1] = clientid.qval;
490                 }
491                 NFSUNLOCKV4ROOTMUTEX();
492         } else if (opflags != CLOPS_RENEW) {
493                 NFSLOCKSTATE();
494         }
495
496         /* For NFSv4.1, the clp is acquired from the associated session. */
497         if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 &&
498             opflags == CLOPS_RENEW) {
499                 clp = NULL;
500                 if ((nd->nd_flag & ND_HASSEQUENCE) != 0) {
501                         shp = NFSSESSIONHASH(nd->nd_sessionid);
502                         NFSLOCKSESSION(shp);
503                         sep = nfsrv_findsession(nd->nd_sessionid);
504                         if (sep != NULL)
505                                 clp = sep->sess_clp;
506                         NFSUNLOCKSESSION(shp);
507                 }
508         } else {
509                 hp = NFSCLIENTHASH(clientid);
510                 LIST_FOREACH(clp, hp, lc_hash) {
511                         if (clp->lc_clientid.lval[1] == clientid.lval[1])
512                                 break;
513                 }
514         }
515         if (clp == NULL) {
516                 if (opflags & CLOPS_CONFIRM)
517                         error = NFSERR_STALECLIENTID;
518                 else
519                         error = NFSERR_EXPIRED;
520         } else if (clp->lc_flags & LCL_ADMINREVOKED) {
521                 /*
522                  * If marked admin revoked, just return the error.
523                  */
524                 error = NFSERR_ADMINREVOKED;
525         }
526         if (error) {
527                 if (opflags & CLOPS_CONFIRM) {
528                         NFSLOCKV4ROOTMUTEX();
529                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
530                         NFSUNLOCKV4ROOTMUTEX();
531                 } else if (opflags != CLOPS_RENEW) {
532                         NFSUNLOCKSTATE();
533                 }
534                 goto out;
535         }
536
537         /*
538          * Perform any operations specified by the opflags.
539          */
540         if (opflags & CLOPS_CONFIRM) {
541                 if (((nd->nd_flag & ND_NFSV41) != 0 &&
542                      clp->lc_confirm.lval[0] != confirm.lval[0]) ||
543                     ((nd->nd_flag & ND_NFSV41) == 0 &&
544                      clp->lc_confirm.qval != confirm.qval))
545                         error = NFSERR_STALECLIENTID;
546                 else if (nfsrv_notsamecredname(nd, clp))
547                         error = NFSERR_CLIDINUSE;
548
549                 if (!error) {
550                     if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) ==
551                         LCL_NEEDSCONFIRM) {
552                         /*
553                          * Hang onto the delegations (as old delegations)
554                          * for an Open with CLAIM_DELEGATE_PREV unless in
555                          * grace, but get rid of the rest of the state.
556                          */
557                         nfsrv_cleanclient(clp, p);
558                         nfsrv_freedeleglist(&clp->lc_olddeleg);
559                         if (nfsrv_checkgrace(nd, clp, 0)) {
560                             /* In grace, so just delete delegations */
561                             nfsrv_freedeleglist(&clp->lc_deleg);
562                         } else {
563                             LIST_FOREACH(stp, &clp->lc_deleg, ls_list)
564                                 stp->ls_flags |= NFSLCK_OLDDELEG;
565                             clp->lc_delegtime = NFSD_MONOSEC +
566                                 nfsrv_lease + NFSRV_LEASEDELTA;
567                             LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
568                                 ls_list);
569                         }
570                         if ((nd->nd_flag & ND_NFSV41) != 0)
571                             clp->lc_program = cbprogram;
572                     }
573                     clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
574                     if (clp->lc_program)
575                         clp->lc_flags |= LCL_NEEDSCBNULL;
576                     /* For NFSv4.1, link the session onto the client. */
577                     if (nsep != NULL) {
578                         /* Hold a reference on the xprt for a backchannel. */
579                         if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
580                             != 0 && clp->lc_req.nr_client == NULL) {
581                             clp->lc_req.nr_client = (struct __rpc_client *)
582                                 clnt_bck_create(nd->nd_xprt->xp_socket,
583                                 cbprogram, NFSV4_CBVERS);
584                             if (clp->lc_req.nr_client != NULL) {
585                                 SVC_ACQUIRE(nd->nd_xprt);
586                                 nd->nd_xprt->xp_p2 =
587                                     clp->lc_req.nr_client->cl_private;
588                                 /* Disable idle timeout. */
589                                 nd->nd_xprt->xp_idletimeout = 0;
590                                 nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
591                             } else
592                                 nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
593                         }
594                         NFSBCOPY(sessid, nsep->sess_sessionid,
595                             NFSX_V4SESSIONID);
596                         NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid,
597                             NFSX_V4SESSIONID);
598                         shp = NFSSESSIONHASH(nsep->sess_sessionid);
599                         NFSLOCKSESSION(shp);
600                         LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
601                         NFSLOCKSTATE();
602                         LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
603                         nsep->sess_clp = clp;
604                         NFSUNLOCKSTATE();
605                         NFSUNLOCKSESSION(shp);
606                     }
607                 }
608         } else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
609                 error = NFSERR_EXPIRED;
610         }
611
612         /*
613          * If called by the Renew Op, we must check the principal.
614          */
615         if (!error && (opflags & CLOPS_RENEWOP)) {
616             if (nfsrv_notsamecredname(nd, clp)) {
617                 doneok = 0;
618                 for (i = 0; i < NFSSTATEHASHSIZE && doneok == 0; i++) {
619                     LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
620                         if ((stp->ls_flags & NFSLCK_OPEN) &&
621                             stp->ls_uid == nd->nd_cred->cr_uid) {
622                                 doneok = 1;
623                                 break;
624                         }
625                     }
626                 }
627                 if (!doneok)
628                         error = NFSERR_ACCES;
629             }
630             if (!error && (clp->lc_flags & LCL_CBDOWN))
631                 error = NFSERR_CBPATHDOWN;
632         }
633         if ((!error || error == NFSERR_CBPATHDOWN) &&
634              (opflags & CLOPS_RENEW)) {
635                 clp->lc_expiry = nfsrv_leaseexpiry();
636         }
637         if (opflags & CLOPS_CONFIRM) {
638                 NFSLOCKV4ROOTMUTEX();
639                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
640                 NFSUNLOCKV4ROOTMUTEX();
641         } else if (opflags != CLOPS_RENEW) {
642                 NFSUNLOCKSTATE();
643         }
644         if (clpp)
645                 *clpp = clp;
646
647 out:
648         NFSEXITCODE2(error, nd);
649         return (error);
650 }
651
652 /*
653  * Perform the NFSv4.1 destroy clientid.
654  */
655 int
656 nfsrv_destroyclient(nfsquad_t clientid, NFSPROC_T *p)
657 {
658         struct nfsclient *clp;
659         struct nfsclienthashhead *hp;
660         int error = 0, i, igotlock;
661
662         if (nfsrvboottime != clientid.lval[0]) {
663                 error = NFSERR_STALECLIENTID;
664                 goto out;
665         }
666
667         /* Lock out other nfsd threads */
668         NFSLOCKV4ROOTMUTEX();
669         nfsv4_relref(&nfsv4rootfs_lock);
670         do {
671                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
672                     NFSV4ROOTLOCKMUTEXPTR, NULL);
673         } while (igotlock == 0);
674         NFSUNLOCKV4ROOTMUTEX();
675
676         hp = NFSCLIENTHASH(clientid);
677         LIST_FOREACH(clp, hp, lc_hash) {
678                 if (clp->lc_clientid.lval[1] == clientid.lval[1])
679                         break;
680         }
681         if (clp == NULL) {
682                 NFSLOCKV4ROOTMUTEX();
683                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
684                 NFSUNLOCKV4ROOTMUTEX();
685                 /* Just return ok, since it is gone. */
686                 goto out;
687         }
688
689         /* Scan for state on the clientid. */
690         for (i = 0; i < NFSSTATEHASHSIZE; i++)
691                 if (!LIST_EMPTY(&clp->lc_stateid[i])) {
692                         NFSLOCKV4ROOTMUTEX();
693                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
694                         NFSUNLOCKV4ROOTMUTEX();
695                         error = NFSERR_CLIENTIDBUSY;
696                         goto out;
697                 }
698         if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
699                 NFSLOCKV4ROOTMUTEX();
700                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
701                 NFSUNLOCKV4ROOTMUTEX();
702                 error = NFSERR_CLIENTIDBUSY;
703                 goto out;
704         }
705
706         /* Destroy the clientid and return ok. */
707         nfsrv_cleanclient(clp, p);
708         nfsrv_freedeleglist(&clp->lc_deleg);
709         nfsrv_freedeleglist(&clp->lc_olddeleg);
710         LIST_REMOVE(clp, lc_hash);
711         NFSLOCKV4ROOTMUTEX();
712         nfsv4_unlock(&nfsv4rootfs_lock, 1);
713         NFSUNLOCKV4ROOTMUTEX();
714         nfsrv_zapclient(clp, p);
715 out:
716         NFSEXITCODE2(error, nd);
717         return (error);
718 }
719
720 /*
721  * Called from the new nfssvc syscall to admin revoke a clientid.
722  * Returns 0 for success, error otherwise.
723  */
724 APPLESTATIC int
725 nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
726 {
727         struct nfsclient *clp = NULL;
728         int i, error = 0;
729         int gotit, igotlock;
730
731         /*
732          * First, lock out the nfsd so that state won't change while the
733          * revocation record is being written to the stable storage restart
734          * file.
735          */
736         NFSLOCKV4ROOTMUTEX();
737         do {
738                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
739                     NFSV4ROOTLOCKMUTEXPTR, NULL);
740         } while (!igotlock);
741         NFSUNLOCKV4ROOTMUTEX();
742
743         /*
744          * Search for a match in the client list.
745          */
746         gotit = i = 0;
747         while (i < NFSCLIENTHASHSIZE && !gotit) {
748             LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
749                 if (revokep->nclid_idlen == clp->lc_idlen &&
750                     !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
751                         gotit = 1;
752                         break;
753                 }
754             }
755             i++;
756         }
757         if (!gotit) {
758                 NFSLOCKV4ROOTMUTEX();
759                 nfsv4_unlock(&nfsv4rootfs_lock, 0);
760                 NFSUNLOCKV4ROOTMUTEX();
761                 error = EPERM;
762                 goto out;
763         }
764
765         /*
766          * Now, write out the revocation record
767          */
768         nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
769         nfsrv_backupstable();
770
771         /*
772          * and clear out the state, marking the clientid revoked.
773          */
774         clp->lc_flags &= ~LCL_CALLBACKSON;
775         clp->lc_flags |= LCL_ADMINREVOKED;
776         nfsrv_cleanclient(clp, p);
777         nfsrv_freedeleglist(&clp->lc_deleg);
778         nfsrv_freedeleglist(&clp->lc_olddeleg);
779         NFSLOCKV4ROOTMUTEX();
780         nfsv4_unlock(&nfsv4rootfs_lock, 0);
781         NFSUNLOCKV4ROOTMUTEX();
782
783 out:
784         NFSEXITCODE(error);
785         return (error);
786 }
787
788 /*
789  * Dump out stats for all clients. Called from nfssvc(2), that is used
790  * newnfsstats.
791  */
792 APPLESTATIC void
793 nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt)
794 {
795         struct nfsclient *clp;
796         int i = 0, cnt = 0;
797
798         /*
799          * First, get a reference on the nfsv4rootfs_lock so that an
800          * exclusive lock cannot be acquired while dumping the clients.
801          */
802         NFSLOCKV4ROOTMUTEX();
803         nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
804         NFSUNLOCKV4ROOTMUTEX();
805         NFSLOCKSTATE();
806         /*
807          * Rattle through the client lists until done.
808          */
809         while (i < NFSCLIENTHASHSIZE && cnt < maxcnt) {
810             clp = LIST_FIRST(&nfsclienthash[i]);
811             while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) {
812                 nfsrv_dumpaclient(clp, &dumpp[cnt]);
813                 cnt++;
814                 clp = LIST_NEXT(clp, lc_hash);
815             }
816             i++;
817         }
818         if (cnt < maxcnt)
819             dumpp[cnt].ndcl_clid.nclid_idlen = 0;
820         NFSUNLOCKSTATE();
821         NFSLOCKV4ROOTMUTEX();
822         nfsv4_relref(&nfsv4rootfs_lock);
823         NFSUNLOCKV4ROOTMUTEX();
824 }
825
826 /*
827  * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd.
828  */
829 static void
830 nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp)
831 {
832         struct nfsstate *stp, *openstp, *lckownstp;
833         struct nfslock *lop;
834         struct sockaddr *sad;
835         struct sockaddr_in *rad;
836         struct sockaddr_in6 *rad6;
837
838         dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0;
839         dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0;
840         dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0;
841         dumpp->ndcl_flags = clp->lc_flags;
842         dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen;
843         NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen);
844         sad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr *);
845         dumpp->ndcl_addrfam = sad->sa_family;
846         if (sad->sa_family == AF_INET) {
847                 rad = (struct sockaddr_in *)sad;
848                 dumpp->ndcl_cbaddr.sin_addr = rad->sin_addr;
849         } else {
850                 rad6 = (struct sockaddr_in6 *)sad;
851                 dumpp->ndcl_cbaddr.sin6_addr = rad6->sin6_addr;
852         }
853
854         /*
855          * Now, scan the state lists and total up the opens and locks.
856          */
857         LIST_FOREACH(stp, &clp->lc_open, ls_list) {
858             dumpp->ndcl_nopenowners++;
859             LIST_FOREACH(openstp, &stp->ls_open, ls_list) {
860                 dumpp->ndcl_nopens++;
861                 LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) {
862                     dumpp->ndcl_nlockowners++;
863                     LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) {
864                         dumpp->ndcl_nlocks++;
865                     }
866                 }
867             }
868         }
869
870         /*
871          * and the delegation lists.
872          */
873         LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
874             dumpp->ndcl_ndelegs++;
875         }
876         LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
877             dumpp->ndcl_nolddelegs++;
878         }
879 }
880
881 /*
882  * Dump out lock stats for a file.
883  */
884 APPLESTATIC void
885 nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt,
886     NFSPROC_T *p)
887 {
888         struct nfsstate *stp;
889         struct nfslock *lop;
890         int cnt = 0;
891         struct nfslockfile *lfp;
892         struct sockaddr *sad;
893         struct sockaddr_in *rad;
894         struct sockaddr_in6 *rad6;
895         int ret;
896         fhandle_t nfh;
897
898         ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p);
899         /*
900          * First, get a reference on the nfsv4rootfs_lock so that an
901          * exclusive lock on it cannot be acquired while dumping the locks.
902          */
903         NFSLOCKV4ROOTMUTEX();
904         nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
905         NFSUNLOCKV4ROOTMUTEX();
906         NFSLOCKSTATE();
907         if (!ret)
908                 ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0);
909         if (ret) {
910                 ldumpp[0].ndlck_clid.nclid_idlen = 0;
911                 NFSUNLOCKSTATE();
912                 NFSLOCKV4ROOTMUTEX();
913                 nfsv4_relref(&nfsv4rootfs_lock);
914                 NFSUNLOCKV4ROOTMUTEX();
915                 return;
916         }
917
918         /*
919          * For each open share on file, dump it out.
920          */
921         stp = LIST_FIRST(&lfp->lf_open);
922         while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) {
923                 ldumpp[cnt].ndlck_flags = stp->ls_flags;
924                 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
925                 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
926                 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
927                 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
928                 ldumpp[cnt].ndlck_owner.nclid_idlen =
929                     stp->ls_openowner->ls_ownerlen;
930                 NFSBCOPY(stp->ls_openowner->ls_owner,
931                     ldumpp[cnt].ndlck_owner.nclid_id,
932                     stp->ls_openowner->ls_ownerlen);
933                 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
934                 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
935                     stp->ls_clp->lc_idlen);
936                 sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
937                 ldumpp[cnt].ndlck_addrfam = sad->sa_family;
938                 if (sad->sa_family == AF_INET) {
939                         rad = (struct sockaddr_in *)sad;
940                         ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
941                 } else {
942                         rad6 = (struct sockaddr_in6 *)sad;
943                         ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
944                 }
945                 stp = LIST_NEXT(stp, ls_file);
946                 cnt++;
947         }
948
949         /*
950          * and all locks.
951          */
952         lop = LIST_FIRST(&lfp->lf_lock);
953         while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) {
954                 stp = lop->lo_stp;
955                 ldumpp[cnt].ndlck_flags = lop->lo_flags;
956                 ldumpp[cnt].ndlck_first = lop->lo_first;
957                 ldumpp[cnt].ndlck_end = lop->lo_end;
958                 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
959                 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
960                 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
961                 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
962                 ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen;
963                 NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id,
964                     stp->ls_ownerlen);
965                 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
966                 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
967                     stp->ls_clp->lc_idlen);
968                 sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
969                 ldumpp[cnt].ndlck_addrfam = sad->sa_family;
970                 if (sad->sa_family == AF_INET) {
971                         rad = (struct sockaddr_in *)sad;
972                         ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
973                 } else {
974                         rad6 = (struct sockaddr_in6 *)sad;
975                         ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
976                 }
977                 lop = LIST_NEXT(lop, lo_lckfile);
978                 cnt++;
979         }
980
981         /*
982          * and the delegations.
983          */
984         stp = LIST_FIRST(&lfp->lf_deleg);
985         while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) {
986                 ldumpp[cnt].ndlck_flags = stp->ls_flags;
987                 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
988                 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
989                 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
990                 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
991                 ldumpp[cnt].ndlck_owner.nclid_idlen = 0;
992                 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
993                 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
994                     stp->ls_clp->lc_idlen);
995                 sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
996                 ldumpp[cnt].ndlck_addrfam = sad->sa_family;
997                 if (sad->sa_family == AF_INET) {
998                         rad = (struct sockaddr_in *)sad;
999                         ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
1000                 } else {
1001                         rad6 = (struct sockaddr_in6 *)sad;
1002                         ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
1003                 }
1004                 stp = LIST_NEXT(stp, ls_file);
1005                 cnt++;
1006         }
1007
1008         /*
1009          * If list isn't full, mark end of list by setting the client name
1010          * to zero length.
1011          */
1012         if (cnt < maxcnt)
1013                 ldumpp[cnt].ndlck_clid.nclid_idlen = 0;
1014         NFSUNLOCKSTATE();
1015         NFSLOCKV4ROOTMUTEX();
1016         nfsv4_relref(&nfsv4rootfs_lock);
1017         NFSUNLOCKV4ROOTMUTEX();
1018 }
1019
1020 /*
1021  * Server timer routine. It can scan any linked list, so long
1022  * as it holds the spin/mutex lock and there is no exclusive lock on
1023  * nfsv4rootfs_lock.
1024  * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok
1025  *  to do this from a callout, since the spin locks work. For
1026  *  Darwin, I'm not sure what will work correctly yet.)
1027  * Should be called once per second.
1028  */
1029 APPLESTATIC void
1030 nfsrv_servertimer(void)
1031 {
1032         struct nfsclient *clp, *nclp;
1033         struct nfsstate *stp, *nstp;
1034         int got_ref, i;
1035
1036         /*
1037          * Make sure nfsboottime is set. This is used by V3 as well
1038          * as V4. Note that nfsboottime is not nfsrvboottime, which is
1039          * only used by the V4 server for leases.
1040          */
1041         if (nfsboottime.tv_sec == 0)
1042                 NFSSETBOOTTIME(nfsboottime);
1043
1044         /*
1045          * If server hasn't started yet, just return.
1046          */
1047         NFSLOCKSTATE();
1048         if (nfsrv_stablefirst.nsf_eograce == 0) {
1049                 NFSUNLOCKSTATE();
1050                 return;
1051         }
1052         if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) {
1053                 if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) &&
1054                     NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce)
1055                         nfsrv_stablefirst.nsf_flags |=
1056                             (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
1057                 NFSUNLOCKSTATE();
1058                 return;
1059         }
1060
1061         /*
1062          * Try and get a reference count on the nfsv4rootfs_lock so that
1063          * no nfsd thread can acquire an exclusive lock on it before this
1064          * call is done. If it is already exclusively locked, just return.
1065          */
1066         NFSLOCKV4ROOTMUTEX();
1067         got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock);
1068         NFSUNLOCKV4ROOTMUTEX();
1069         if (got_ref == 0) {
1070                 NFSUNLOCKSTATE();
1071                 return;
1072         }
1073
1074         /*
1075          * For each client...
1076          */
1077         for (i = 0; i < NFSCLIENTHASHSIZE; i++) {
1078             clp = LIST_FIRST(&nfsclienthash[i]);
1079             while (clp != LIST_END(&nfsclienthash[i])) {
1080                 nclp = LIST_NEXT(clp, lc_hash);
1081                 if (!(clp->lc_flags & LCL_EXPIREIT)) {
1082                     if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC
1083                          && ((LIST_EMPTY(&clp->lc_deleg)
1084                               && LIST_EMPTY(&clp->lc_open)) ||
1085                              nfsrv_clients > nfsrv_clienthighwater)) ||
1086                         (clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
1087                         (clp->lc_expiry < NFSD_MONOSEC &&
1088                          (nfsrv_openpluslock * 10 / 9) > NFSRV_V4STATELIMIT)) {
1089                         /*
1090                          * Lease has expired several nfsrv_lease times ago:
1091                          * PLUS
1092                          *    - no state is associated with it
1093                          *    OR
1094                          *    - above high water mark for number of clients
1095                          *      (nfsrv_clienthighwater should be large enough
1096                          *       that this only occurs when clients fail to
1097                          *       use the same nfs_client_id4.id. Maybe somewhat
1098                          *       higher that the maximum number of clients that
1099                          *       will mount this server?)
1100                          * OR
1101                          * Lease has expired a very long time ago
1102                          * OR
1103                          * Lease has expired PLUS the number of opens + locks
1104                          * has exceeded 90% of capacity
1105                          *
1106                          * --> Mark for expiry. The actual expiry will be done
1107                          *     by an nfsd sometime soon.
1108                          */
1109                         clp->lc_flags |= LCL_EXPIREIT;
1110                         nfsrv_stablefirst.nsf_flags |=
1111                             (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT);
1112                     } else {
1113                         /*
1114                          * If there are no opens, increment no open tick cnt
1115                          * If time exceeds NFSNOOPEN, mark it to be thrown away
1116                          * otherwise, if there is an open, reset no open time
1117                          * Hopefully, this will avoid excessive re-creation
1118                          * of open owners and subsequent open confirms.
1119                          */
1120                         stp = LIST_FIRST(&clp->lc_open);
1121                         while (stp != LIST_END(&clp->lc_open)) {
1122                                 nstp = LIST_NEXT(stp, ls_list);
1123                                 if (LIST_EMPTY(&stp->ls_open)) {
1124                                         stp->ls_noopens++;
1125                                         if (stp->ls_noopens > NFSNOOPEN ||
1126                                             (nfsrv_openpluslock * 2) >
1127                                             NFSRV_V4STATELIMIT)
1128                                                 nfsrv_stablefirst.nsf_flags |=
1129                                                         NFSNSF_NOOPENS;
1130                                 } else {
1131                                         stp->ls_noopens = 0;
1132                                 }
1133                                 stp = nstp;
1134                         }
1135                     }
1136                 }
1137                 clp = nclp;
1138             }
1139         }
1140         NFSUNLOCKSTATE();
1141         NFSLOCKV4ROOTMUTEX();
1142         nfsv4_relref(&nfsv4rootfs_lock);
1143         NFSUNLOCKV4ROOTMUTEX();
1144 }
1145
1146 /*
1147  * The following set of functions free up the various data structures.
1148  */
1149 /*
1150  * Clear out all open/lock state related to this nfsclient.
1151  * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that
1152  * there are no other active nfsd threads.
1153  */
1154 APPLESTATIC void
1155 nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p)
1156 {
1157         struct nfsstate *stp, *nstp;
1158         struct nfsdsession *sep, *nsep;
1159
1160         LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp)
1161                 nfsrv_freeopenowner(stp, 1, p);
1162         if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
1163                 LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
1164                         (void)nfsrv_freesession(sep, NULL);
1165 }
1166
1167 /*
1168  * Free a client that has been cleaned. It should also already have been
1169  * removed from the lists.
1170  * (Just to be safe w.r.t. newnfs_disconnect(), call this function when
1171  *  softclock interrupts are enabled.)
1172  */
1173 APPLESTATIC void
1174 nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
1175 {
1176
1177 #ifdef notyet
1178         if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) ==
1179              (LCL_GSS | LCL_CALLBACKSON) &&
1180             (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) &&
1181             clp->lc_handlelen > 0) {
1182                 clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE;
1183                 clp->lc_hand.nfsh_flag |= NFSG_DESTROYED;
1184                 (void) nfsrv_docallback(clp, NFSV4PROC_CBNULL,
1185                         NULL, 0, NULL, NULL, NULL, p);
1186         }
1187 #endif
1188         newnfs_disconnect(&clp->lc_req);
1189         NFSSOCKADDRFREE(clp->lc_req.nr_nam);
1190         NFSFREEMUTEX(&clp->lc_req.nr_mtx);
1191         free((caddr_t)clp, M_NFSDCLIENT);
1192         NFSLOCKSTATE();
1193         newnfsstats.srvclients--;
1194         nfsrv_openpluslock--;
1195         nfsrv_clients--;
1196         NFSUNLOCKSTATE();
1197 }
1198
1199 /*
1200  * Free a list of delegation state structures.
1201  * (This function will also free all nfslockfile structures that no
1202  *  longer have associated state.)
1203  */
1204 APPLESTATIC void
1205 nfsrv_freedeleglist(struct nfsstatehead *sthp)
1206 {
1207         struct nfsstate *stp, *nstp;
1208
1209         LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) {
1210                 nfsrv_freedeleg(stp);
1211         }
1212         LIST_INIT(sthp);
1213 }
1214
1215 /*
1216  * Free up a delegation.
1217  */
1218 static void
1219 nfsrv_freedeleg(struct nfsstate *stp)
1220 {
1221         struct nfslockfile *lfp;
1222
1223         LIST_REMOVE(stp, ls_hash);
1224         LIST_REMOVE(stp, ls_list);
1225         LIST_REMOVE(stp, ls_file);
1226         lfp = stp->ls_lfp;
1227         if (LIST_EMPTY(&lfp->lf_open) &&
1228             LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
1229             LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1230             lfp->lf_usecount == 0 &&
1231             nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
1232                 nfsrv_freenfslockfile(lfp);
1233         FREE((caddr_t)stp, M_NFSDSTATE);
1234         newnfsstats.srvdelegates--;
1235         nfsrv_openpluslock--;
1236         nfsrv_delegatecnt--;
1237 }
1238
1239 /*
1240  * This function frees an open owner and all associated opens.
1241  */
1242 static void
1243 nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
1244 {
1245         struct nfsstate *nstp, *tstp;
1246
1247         LIST_REMOVE(stp, ls_list);
1248         /*
1249          * Now, free all associated opens.
1250          */
1251         nstp = LIST_FIRST(&stp->ls_open);
1252         while (nstp != LIST_END(&stp->ls_open)) {
1253                 tstp = nstp;
1254                 nstp = LIST_NEXT(nstp, ls_list);
1255                 (void) nfsrv_freeopen(tstp, NULL, cansleep, p);
1256         }
1257         if (stp->ls_op)
1258                 nfsrvd_derefcache(stp->ls_op);
1259         FREE((caddr_t)stp, M_NFSDSTATE);
1260         newnfsstats.srvopenowners--;
1261         nfsrv_openpluslock--;
1262 }
1263
1264 /*
1265  * This function frees an open (nfsstate open structure) with all associated
1266  * lock_owners and locks. It also frees the nfslockfile structure iff there
1267  * are no other opens on the file.
1268  * Returns 1 if it free'd the nfslockfile, 0 otherwise.
1269  */
1270 static int
1271 nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
1272 {
1273         struct nfsstate *nstp, *tstp;
1274         struct nfslockfile *lfp;
1275         int ret;
1276
1277         LIST_REMOVE(stp, ls_hash);
1278         LIST_REMOVE(stp, ls_list);
1279         LIST_REMOVE(stp, ls_file);
1280
1281         lfp = stp->ls_lfp;
1282         /*
1283          * Now, free all lockowners associated with this open.
1284          */
1285         LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
1286                 nfsrv_freelockowner(tstp, vp, cansleep, p);
1287
1288         /*
1289          * The nfslockfile is freed here if there are no locks
1290          * associated with the open.
1291          * If there are locks associated with the open, the
1292          * nfslockfile structure can be freed via nfsrv_freelockowner().
1293          * Acquire the state mutex to avoid races with calls to
1294          * nfsrv_getlockfile().
1295          */
1296         if (cansleep != 0)
1297                 NFSLOCKSTATE();
1298         if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
1299             LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
1300             LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1301             lfp->lf_usecount == 0 &&
1302             (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) {
1303                 nfsrv_freenfslockfile(lfp);
1304                 ret = 1;
1305         } else
1306                 ret = 0;
1307         if (cansleep != 0)
1308                 NFSUNLOCKSTATE();
1309         FREE((caddr_t)stp, M_NFSDSTATE);
1310         newnfsstats.srvopens--;
1311         nfsrv_openpluslock--;
1312         return (ret);
1313 }
1314
1315 /*
1316  * Frees a lockowner and all associated locks.
1317  */
1318 static void
1319 nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
1320     NFSPROC_T *p)
1321 {
1322
1323         LIST_REMOVE(stp, ls_hash);
1324         LIST_REMOVE(stp, ls_list);
1325         nfsrv_freeallnfslocks(stp, vp, cansleep, p);
1326         if (stp->ls_op)
1327                 nfsrvd_derefcache(stp->ls_op);
1328         FREE((caddr_t)stp, M_NFSDSTATE);
1329         newnfsstats.srvlockowners--;
1330         nfsrv_openpluslock--;
1331 }
1332
1333 /*
1334  * Free all the nfs locks on a lockowner.
1335  */
1336 static void
1337 nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep,
1338     NFSPROC_T *p)
1339 {
1340         struct nfslock *lop, *nlop;
1341         struct nfsrollback *rlp, *nrlp;
1342         struct nfslockfile *lfp = NULL;
1343         int gottvp = 0;
1344         vnode_t tvp = NULL;
1345         uint64_t first, end;
1346
1347         if (vp != NULL)
1348                 ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked");
1349         lop = LIST_FIRST(&stp->ls_lock);
1350         while (lop != LIST_END(&stp->ls_lock)) {
1351                 nlop = LIST_NEXT(lop, lo_lckowner);
1352                 /*
1353                  * Since all locks should be for the same file, lfp should
1354                  * not change.
1355                  */
1356                 if (lfp == NULL)
1357                         lfp = lop->lo_lfp;
1358                 else if (lfp != lop->lo_lfp)
1359                         panic("allnfslocks");
1360                 /*
1361                  * If vp is NULL and cansleep != 0, a vnode must be acquired
1362                  * from the file handle. This only occurs when called from
1363                  * nfsrv_cleanclient().
1364                  */
1365                 if (gottvp == 0) {
1366                         if (nfsrv_dolocallocks == 0)
1367                                 tvp = NULL;
1368                         else if (vp == NULL && cansleep != 0) {
1369                                 tvp = nfsvno_getvp(&lfp->lf_fh);
1370                                 NFSVOPUNLOCK(tvp, 0);
1371                         } else
1372                                 tvp = vp;
1373                         gottvp = 1;
1374                 }
1375
1376                 if (tvp != NULL) {
1377                         if (cansleep == 0)
1378                                 panic("allnfs2");
1379                         first = lop->lo_first;
1380                         end = lop->lo_end;
1381                         nfsrv_freenfslock(lop);
1382                         nfsrv_localunlock(tvp, lfp, first, end, p);
1383                         LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list,
1384                             nrlp)
1385                                 free(rlp, M_NFSDROLLBACK);
1386                         LIST_INIT(&lfp->lf_rollback);
1387                 } else
1388                         nfsrv_freenfslock(lop);
1389                 lop = nlop;
1390         }
1391         if (vp == NULL && tvp != NULL)
1392                 vrele(tvp);
1393 }
1394
1395 /*
1396  * Free an nfslock structure.
1397  */
1398 static void
1399 nfsrv_freenfslock(struct nfslock *lop)
1400 {
1401
1402         if (lop->lo_lckfile.le_prev != NULL) {
1403                 LIST_REMOVE(lop, lo_lckfile);
1404                 newnfsstats.srvlocks--;
1405                 nfsrv_openpluslock--;
1406         }
1407         LIST_REMOVE(lop, lo_lckowner);
1408         FREE((caddr_t)lop, M_NFSDLOCK);
1409 }
1410
1411 /*
1412  * This function frees an nfslockfile structure.
1413  */
1414 static void
1415 nfsrv_freenfslockfile(struct nfslockfile *lfp)
1416 {
1417
1418         LIST_REMOVE(lfp, lf_hash);
1419         FREE((caddr_t)lfp, M_NFSDLOCKFILE);
1420 }
1421
1422 /*
1423  * This function looks up an nfsstate structure via stateid.
1424  */
1425 static int
1426 nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags,
1427     struct nfsstate **stpp)
1428 {
1429         struct nfsstate *stp;
1430         struct nfsstatehead *hp;
1431         int error = 0;
1432
1433         *stpp = NULL;
1434         hp = NFSSTATEHASH(clp, *stateidp);
1435         LIST_FOREACH(stp, hp, ls_hash) {
1436                 if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
1437                         NFSX_STATEIDOTHER))
1438                         break;
1439         }
1440
1441         /*
1442          * If no state id in list, return NFSERR_BADSTATEID.
1443          */
1444         if (stp == LIST_END(hp)) {
1445                 error = NFSERR_BADSTATEID;
1446                 goto out;
1447         }
1448         *stpp = stp;
1449
1450 out:
1451         NFSEXITCODE(error);
1452         return (error);
1453 }
1454
1455 /*
1456  * This function gets an nfsstate structure via owner string.
1457  */
1458 static void
1459 nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
1460     struct nfsstate **stpp)
1461 {
1462         struct nfsstate *stp;
1463
1464         *stpp = NULL;
1465         LIST_FOREACH(stp, hp, ls_list) {
1466                 if (new_stp->ls_ownerlen == stp->ls_ownerlen &&
1467                   !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) {
1468                         *stpp = stp;
1469                         return;
1470                 }
1471         }
1472 }
1473
1474 /*
1475  * Lock control function called to update lock status.
1476  * Returns 0 upon success, -1 if there is no lock and the flags indicate
1477  * that one isn't to be created and an NFSERR_xxx for other errors.
1478  * The structures new_stp and new_lop are passed in as pointers that should
1479  * be set to NULL if the structure is used and shouldn't be free'd.
1480  * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are
1481  * never used and can safely be allocated on the stack. For all other
1482  * cases, *new_stpp and *new_lopp should be malloc'd before the call,
1483  * in case they are used.
1484  */
1485 APPLESTATIC int
1486 nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp,
1487     struct nfslock **new_lopp, struct nfslockconflict *cfp,
1488     nfsquad_t clientid, nfsv4stateid_t *stateidp,
1489     __unused struct nfsexstuff *exp,
1490     struct nfsrv_descript *nd, NFSPROC_T *p)
1491 {
1492         struct nfslock *lop;
1493         struct nfsstate *new_stp = *new_stpp;
1494         struct nfslock *new_lop = *new_lopp;
1495         struct nfsstate *tstp, *mystp, *nstp;
1496         int specialid = 0;
1497         struct nfslockfile *lfp;
1498         struct nfslock *other_lop = NULL;
1499         struct nfsstate *stp, *lckstp = NULL;
1500         struct nfsclient *clp = NULL;
1501         u_int32_t bits;
1502         int error = 0, haslock = 0, ret, reterr;
1503         int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0;
1504         fhandle_t nfh;
1505         uint64_t first, end;
1506         uint32_t lock_flags;
1507
1508         if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1509                 /*
1510                  * Note the special cases of "all 1s" or "all 0s" stateids and
1511                  * let reads with all 1s go ahead.
1512                  */
1513                 if (new_stp->ls_stateid.seqid == 0x0 &&
1514                     new_stp->ls_stateid.other[0] == 0x0 &&
1515                     new_stp->ls_stateid.other[1] == 0x0 &&
1516                     new_stp->ls_stateid.other[2] == 0x0)
1517                         specialid = 1;
1518                 else if (new_stp->ls_stateid.seqid == 0xffffffff &&
1519                     new_stp->ls_stateid.other[0] == 0xffffffff &&
1520                     new_stp->ls_stateid.other[1] == 0xffffffff &&
1521                     new_stp->ls_stateid.other[2] == 0xffffffff)
1522                         specialid = 2;
1523         }
1524
1525         /*
1526          * Check for restart conditions (client and server).
1527          */
1528         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
1529             &new_stp->ls_stateid, specialid);
1530         if (error)
1531                 goto out;
1532
1533         /*
1534          * Check for state resource limit exceeded.
1535          */
1536         if ((new_stp->ls_flags & NFSLCK_LOCK) &&
1537             nfsrv_openpluslock > NFSRV_V4STATELIMIT) {
1538                 error = NFSERR_RESOURCE;
1539                 goto out;
1540         }
1541
1542         /*
1543          * For the lock case, get another nfslock structure,
1544          * just in case we need it.
1545          * Malloc now, before we start sifting through the linked lists,
1546          * in case we have to wait for memory.
1547          */
1548 tryagain:
1549         if (new_stp->ls_flags & NFSLCK_LOCK)
1550                 MALLOC(other_lop, struct nfslock *, sizeof (struct nfslock),
1551                     M_NFSDLOCK, M_WAITOK);
1552         filestruct_locked = 0;
1553         reterr = 0;
1554         lfp = NULL;
1555
1556         /*
1557          * Get the lockfile structure for CFH now, so we can do a sanity
1558          * check against the stateid, before incrementing the seqid#, since
1559          * we want to return NFSERR_BADSTATEID on failure and the seqid#
1560          * shouldn't be incremented for this case.
1561          * If nfsrv_getlockfile() returns -1, it means "not found", which
1562          * will be handled later.
1563          * If we are doing Lock/LockU and local locking is enabled, sleep
1564          * lock the nfslockfile structure.
1565          */
1566         getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p);
1567         NFSLOCKSTATE();
1568         if (getlckret == 0) {
1569                 if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 &&
1570                     nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) {
1571                         getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1572                             &lfp, &nfh, 1);
1573                         if (getlckret == 0)
1574                                 filestruct_locked = 1;
1575                 } else
1576                         getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1577                             &lfp, &nfh, 0);
1578         }
1579         if (getlckret != 0 && getlckret != -1)
1580                 reterr = getlckret;
1581
1582         if (filestruct_locked != 0) {
1583                 LIST_INIT(&lfp->lf_rollback);
1584                 if ((new_stp->ls_flags & NFSLCK_LOCK)) {
1585                         /*
1586                          * For local locking, do the advisory locking now, so
1587                          * that any conflict can be detected. A failure later
1588                          * can be rolled back locally. If an error is returned,
1589                          * struct nfslockfile has been unlocked and any local
1590                          * locking rolled back.
1591                          */
1592                         NFSUNLOCKSTATE();
1593                         if (vnode_unlocked == 0) {
1594                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1");
1595                                 vnode_unlocked = 1;
1596                                 NFSVOPUNLOCK(vp, 0);
1597                         }
1598                         reterr = nfsrv_locallock(vp, lfp,
1599                             (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
1600                             new_lop->lo_first, new_lop->lo_end, cfp, p);
1601                         NFSLOCKSTATE();
1602                 }
1603         }
1604
1605         if (specialid == 0) {
1606             if (new_stp->ls_flags & NFSLCK_TEST) {
1607                 /*
1608                  * RFC 3530 does not list LockT as an op that renews a
1609                  * lease, but the concensus seems to be that it is ok
1610                  * for a server to do so.
1611                  */
1612                 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1613                     (nfsquad_t)((u_quad_t)0), 0, nd, p);
1614
1615                 /*
1616                  * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
1617                  * error returns for LockT, just go ahead and test for a lock,
1618                  * since there are no locks for this client, but other locks
1619                  * can conflict. (ie. same client will always be false)
1620                  */
1621                 if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED)
1622                     error = 0;
1623                 lckstp = new_stp;
1624             } else {
1625               error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1626                 (nfsquad_t)((u_quad_t)0), 0, nd, p);
1627               if (error == 0)
1628                 /*
1629                  * Look up the stateid
1630                  */
1631                 error = nfsrv_getstate(clp, &new_stp->ls_stateid,
1632                   new_stp->ls_flags, &stp);
1633               /*
1634                * do some sanity checks for an unconfirmed open or a
1635                * stateid that refers to the wrong file, for an open stateid
1636                */
1637               if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) &&
1638                   ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) ||
1639                    (getlckret == 0 && stp->ls_lfp != lfp)))
1640                         error = NFSERR_BADSTATEID;
1641               if (error == 0 &&
1642                   (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) &&
1643                   getlckret == 0 && stp->ls_lfp != lfp)
1644                         error = NFSERR_BADSTATEID;
1645
1646               /*
1647                * If the lockowner stateid doesn't refer to the same file,
1648                * I believe that is considered ok, since some clients will
1649                * only create a single lockowner and use that for all locks
1650                * on all files.
1651                * For now, log it as a diagnostic, instead of considering it
1652                * a BadStateid.
1653                */
1654               if (error == 0 && (stp->ls_flags &
1655                   (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 &&
1656                   getlckret == 0 && stp->ls_lfp != lfp) {
1657 #ifdef DIAGNOSTIC
1658                   printf("Got a lock statid for different file open\n");
1659 #endif
1660                   /*
1661                   error = NFSERR_BADSTATEID;
1662                   */
1663               }
1664
1665               if (error == 0) {
1666                     if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) {
1667                         /*
1668                          * If haslock set, we've already checked the seqid.
1669                          */
1670                         if (!haslock) {
1671                             if (stp->ls_flags & NFSLCK_OPEN)
1672                                 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1673                                     stp->ls_openowner, new_stp->ls_op);
1674                             else
1675                                 error = NFSERR_BADSTATEID;
1676                         }
1677                         if (!error)
1678                             nfsrv_getowner(&stp->ls_open, new_stp, &lckstp);
1679                         if (lckstp)
1680                             /*
1681                              * I believe this should be an error, but it
1682                              * isn't obvious what NFSERR_xxx would be
1683                              * appropriate, so I'll use NFSERR_INVAL for now.
1684                              */
1685                             error = NFSERR_INVAL;
1686                         else
1687                             lckstp = new_stp;
1688                     } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) {
1689                         /*
1690                          * If haslock set, ditto above.
1691                          */
1692                         if (!haslock) {
1693                             if (stp->ls_flags & NFSLCK_OPEN)
1694                                 error = NFSERR_BADSTATEID;
1695                             else
1696                                 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1697                                     stp, new_stp->ls_op);
1698                         }
1699                         lckstp = stp;
1700                     } else {
1701                         lckstp = stp;
1702                     }
1703               }
1704               /*
1705                * If the seqid part of the stateid isn't the same, return
1706                * NFSERR_OLDSTATEID for cases other than I/O Ops.
1707                * For I/O Ops, only return NFSERR_OLDSTATEID if
1708                * nfsrv_returnoldstateid is set. (The concensus on the email
1709                * list was that most clients would prefer to not receive
1710                * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that
1711                * is what will happen, so I use the nfsrv_returnoldstateid to
1712                * allow for either server configuration.)
1713                */
1714               if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
1715                   (((nd->nd_flag & ND_NFSV41) == 0 &&
1716                    (!(new_stp->ls_flags & NFSLCK_CHECK) ||
1717                     nfsrv_returnoldstateid)) ||
1718                    ((nd->nd_flag & ND_NFSV41) != 0 &&
1719                     new_stp->ls_stateid.seqid != 0)))
1720                     error = NFSERR_OLDSTATEID;
1721             }
1722         }
1723
1724         /*
1725          * Now we can check for grace.
1726          */
1727         if (!error)
1728                 error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
1729         if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
1730                 nfsrv_checkstable(clp))
1731                 error = NFSERR_NOGRACE;
1732         /*
1733          * If we successfully Reclaimed state, note that.
1734          */
1735         if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error)
1736                 nfsrv_markstable(clp);
1737
1738         /*
1739          * At this point, either error == NFSERR_BADSTATEID or the
1740          * seqid# has been updated, so we can return any error.
1741          * If error == 0, there may be an error in:
1742          *    nd_repstat - Set by the calling function.
1743          *    reterr - Set above, if getting the nfslockfile structure
1744          *       or acquiring the local lock failed.
1745          *    (If both of these are set, nd_repstat should probably be
1746          *     returned, since that error was detected before this
1747          *     function call.)
1748          */
1749         if (error != 0 || nd->nd_repstat != 0 || reterr != 0) {
1750                 if (error == 0) {
1751                         if (nd->nd_repstat != 0)
1752                                 error = nd->nd_repstat;
1753                         else
1754                                 error = reterr;
1755                 }
1756                 if (filestruct_locked != 0) {
1757                         /* Roll back local locks. */
1758                         NFSUNLOCKSTATE();
1759                         if (vnode_unlocked == 0) {
1760                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2");
1761                                 vnode_unlocked = 1;
1762                                 NFSVOPUNLOCK(vp, 0);
1763                         }
1764                         nfsrv_locallock_rollback(vp, lfp, p);
1765                         NFSLOCKSTATE();
1766                         nfsrv_unlocklf(lfp);
1767                 }
1768                 NFSUNLOCKSTATE();
1769                 goto out;
1770         }
1771
1772         /*
1773          * Check the nfsrv_getlockfile return.
1774          * Returned -1 if no structure found.
1775          */
1776         if (getlckret == -1) {
1777                 error = NFSERR_EXPIRED;
1778                 /*
1779                  * Called from lockt, so no lock is OK.
1780                  */
1781                 if (new_stp->ls_flags & NFSLCK_TEST) {
1782                         error = 0;
1783                 } else if (new_stp->ls_flags &
1784                     (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1785                         /*
1786                          * Called to check for a lock, OK if the stateid is all
1787                          * 1s or all 0s, but there should be an nfsstate
1788                          * otherwise.
1789                          * (ie. If there is no open, I'll assume no share
1790                          *  deny bits.)
1791                          */
1792                         if (specialid)
1793                                 error = 0;
1794                         else
1795                                 error = NFSERR_BADSTATEID;
1796                 }
1797                 NFSUNLOCKSTATE();
1798                 goto out;
1799         }
1800
1801         /*
1802          * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict.
1803          * For NFSLCK_CHECK, allow a read if write access is granted,
1804          * but check for a deny. For NFSLCK_LOCK, require correct access,
1805          * which implies a conflicting deny can't exist.
1806          */
1807         if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) {
1808             /*
1809              * Four kinds of state id:
1810              * - specialid (all 0s or all 1s), only for NFSLCK_CHECK
1811              * - stateid for an open
1812              * - stateid for a delegation
1813              * - stateid for a lock owner
1814              */
1815             if (!specialid) {
1816                 if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
1817                     delegation = 1;
1818                     mystp = stp;
1819                     nfsrv_delaydelegtimeout(stp);
1820                 } else if (stp->ls_flags & NFSLCK_OPEN) {
1821                     mystp = stp;
1822                 } else {
1823                     mystp = stp->ls_openstp;
1824                 }
1825                 /*
1826                  * If locking or checking, require correct access
1827                  * bit set.
1828                  */
1829                 if (((new_stp->ls_flags & NFSLCK_LOCK) &&
1830                      !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) &
1831                        mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
1832                     ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
1833                       (NFSLCK_CHECK | NFSLCK_READACCESS) &&
1834                      !(mystp->ls_flags & NFSLCK_READACCESS)) ||
1835                     ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
1836                       (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
1837                      !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
1838                         if (filestruct_locked != 0) {
1839                                 /* Roll back local locks. */
1840                                 NFSUNLOCKSTATE();
1841                                 if (vnode_unlocked == 0) {
1842                                         ASSERT_VOP_ELOCKED(vp,
1843                                             "nfsrv_lockctrl3");
1844                                         vnode_unlocked = 1;
1845                                         NFSVOPUNLOCK(vp, 0);
1846                                 }
1847                                 nfsrv_locallock_rollback(vp, lfp, p);
1848                                 NFSLOCKSTATE();
1849                                 nfsrv_unlocklf(lfp);
1850                         }
1851                         NFSUNLOCKSTATE();
1852                         error = NFSERR_OPENMODE;
1853                         goto out;
1854                 }
1855             } else
1856                 mystp = NULL;
1857             if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) {
1858                 /*
1859                  * Check for a conflicting deny bit.
1860                  */
1861                 LIST_FOREACH(tstp, &lfp->lf_open, ls_file) {
1862                     if (tstp != mystp) {
1863                         bits = tstp->ls_flags;
1864                         bits >>= NFSLCK_SHIFT;
1865                         if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
1866                             KASSERT(vnode_unlocked == 0,
1867                                 ("nfsrv_lockctrl: vnode unlocked1"));
1868                             ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
1869                                 vp, p);
1870                             if (ret == 1) {
1871                                 /*
1872                                 * nfsrv_clientconflict unlocks state
1873                                  * when it returns non-zero.
1874                                  */
1875                                 lckstp = NULL;
1876                                 goto tryagain;
1877                             }
1878                             if (ret == 0)
1879                                 NFSUNLOCKSTATE();
1880                             if (ret == 2)
1881                                 error = NFSERR_PERM;
1882                             else
1883                                 error = NFSERR_OPENMODE;
1884                             goto out;
1885                         }
1886                     }
1887                 }
1888
1889                 /* We're outta here */
1890                 NFSUNLOCKSTATE();
1891                 goto out;
1892             }
1893         }
1894
1895         /*
1896          * For setattr, just get rid of all the Delegations for other clients.
1897          */
1898         if (new_stp->ls_flags & NFSLCK_SETATTR) {
1899                 KASSERT(vnode_unlocked == 0,
1900                     ("nfsrv_lockctrl: vnode unlocked2"));
1901                 ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
1902                 if (ret) {
1903                         /*
1904                          * nfsrv_cleandeleg() unlocks state when it
1905                          * returns non-zero.
1906                          */
1907                         if (ret == -1) {
1908                                 lckstp = NULL;
1909                                 goto tryagain;
1910                         }
1911                         error = ret;
1912                         goto out;
1913                 }
1914                 if (!(new_stp->ls_flags & NFSLCK_CHECK) ||
1915                     (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) &&
1916                      LIST_EMPTY(&lfp->lf_deleg))) {
1917                         NFSUNLOCKSTATE();
1918                         goto out;
1919                 }
1920         }
1921
1922         /*
1923          * Check for a conflicting delegation. If one is found, call
1924          * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
1925          * been set yet, it will get the lock. Otherwise, it will recall
1926          * the delegation. Then, we try try again...
1927          * I currently believe the conflict algorithm to be:
1928          * For Lock Ops (Lock/LockT/LockU)
1929          * - there is a conflict iff a different client has a write delegation
1930          * For Reading (Read Op)
1931          * - there is a conflict iff a different client has a write delegation
1932          *   (the specialids are always a different client)
1933          * For Writing (Write/Setattr of size)
1934          * - there is a conflict if a different client has any delegation
1935          * - there is a conflict if the same client has a read delegation
1936          *   (I don't understand why this isn't allowed, but that seems to be
1937          *    the current concensus?)
1938          */
1939         tstp = LIST_FIRST(&lfp->lf_deleg);
1940         while (tstp != LIST_END(&lfp->lf_deleg)) {
1941             nstp = LIST_NEXT(tstp, ls_file);
1942             if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))||
1943                  ((new_stp->ls_flags & NFSLCK_CHECK) &&
1944                   (new_lop->lo_flags & NFSLCK_READ))) &&
1945                   clp != tstp->ls_clp &&
1946                  (tstp->ls_flags & NFSLCK_DELEGWRITE)) ||
1947                  ((new_stp->ls_flags & NFSLCK_CHECK) &&
1948                    (new_lop->lo_flags & NFSLCK_WRITE) &&
1949                   (clp != tstp->ls_clp ||
1950                    (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
1951                 ret = 0;
1952                 if (filestruct_locked != 0) {
1953                         /* Roll back local locks. */
1954                         NFSUNLOCKSTATE();
1955                         if (vnode_unlocked == 0) {
1956                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4");
1957                                 NFSVOPUNLOCK(vp, 0);
1958                         }
1959                         nfsrv_locallock_rollback(vp, lfp, p);
1960                         NFSLOCKSTATE();
1961                         nfsrv_unlocklf(lfp);
1962                         NFSUNLOCKSTATE();
1963                         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
1964                         vnode_unlocked = 0;
1965                         if ((vp->v_iflag & VI_DOOMED) != 0)
1966                                 ret = NFSERR_SERVERFAULT;
1967                         NFSLOCKSTATE();
1968                 }
1969                 if (ret == 0)
1970                         ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
1971                 if (ret) {
1972                     /*
1973                      * nfsrv_delegconflict unlocks state when it
1974                      * returns non-zero, which it always does.
1975                      */
1976                     if (other_lop) {
1977                         FREE((caddr_t)other_lop, M_NFSDLOCK);
1978                         other_lop = NULL;
1979                     }
1980                     if (ret == -1) {
1981                         lckstp = NULL;
1982                         goto tryagain;
1983                     }
1984                     error = ret;
1985                     goto out;
1986                 }
1987                 /* Never gets here. */
1988             }
1989             tstp = nstp;
1990         }
1991
1992         /*
1993          * Handle the unlock case by calling nfsrv_updatelock().
1994          * (Should I have done some access checking above for unlock? For now,
1995          *  just let it happen.)
1996          */
1997         if (new_stp->ls_flags & NFSLCK_UNLOCK) {
1998                 first = new_lop->lo_first;
1999                 end = new_lop->lo_end;
2000                 nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
2001                 stateidp->seqid = ++(stp->ls_stateid.seqid);
2002                 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2003                         stateidp->seqid = stp->ls_stateid.seqid = 1;
2004                 stateidp->other[0] = stp->ls_stateid.other[0];
2005                 stateidp->other[1] = stp->ls_stateid.other[1];
2006                 stateidp->other[2] = stp->ls_stateid.other[2];
2007                 if (filestruct_locked != 0) {
2008                         NFSUNLOCKSTATE();
2009                         if (vnode_unlocked == 0) {
2010                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5");
2011                                 vnode_unlocked = 1;
2012                                 NFSVOPUNLOCK(vp, 0);
2013                         }
2014                         /* Update the local locks. */
2015                         nfsrv_localunlock(vp, lfp, first, end, p);
2016                         NFSLOCKSTATE();
2017                         nfsrv_unlocklf(lfp);
2018                 }
2019                 NFSUNLOCKSTATE();
2020                 goto out;
2021         }
2022
2023         /*
2024          * Search for a conflicting lock. A lock conflicts if:
2025          * - the lock range overlaps and
2026          * - at least one lock is a write lock and
2027          * - it is not owned by the same lock owner
2028          */
2029         if (!delegation) {
2030           LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
2031             if (new_lop->lo_end > lop->lo_first &&
2032                 new_lop->lo_first < lop->lo_end &&
2033                 (new_lop->lo_flags == NFSLCK_WRITE ||
2034                  lop->lo_flags == NFSLCK_WRITE) &&
2035                 lckstp != lop->lo_stp &&
2036                 (clp != lop->lo_stp->ls_clp ||
2037                  lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen ||
2038                  NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner,
2039                     lckstp->ls_ownerlen))) {
2040                 if (other_lop) {
2041                     FREE((caddr_t)other_lop, M_NFSDLOCK);
2042                     other_lop = NULL;
2043                 }
2044                 if (vnode_unlocked != 0)
2045                     ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2046                         NULL, p);
2047                 else
2048                     ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2049                         vp, p);
2050                 if (ret == 1) {
2051                     if (filestruct_locked != 0) {
2052                         if (vnode_unlocked == 0) {
2053                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6");
2054                                 NFSVOPUNLOCK(vp, 0);
2055                         }
2056                         /* Roll back local locks. */
2057                         nfsrv_locallock_rollback(vp, lfp, p);
2058                         NFSLOCKSTATE();
2059                         nfsrv_unlocklf(lfp);
2060                         NFSUNLOCKSTATE();
2061                         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2062                         vnode_unlocked = 0;
2063                         if ((vp->v_iflag & VI_DOOMED) != 0) {
2064                                 error = NFSERR_SERVERFAULT;
2065                                 goto out;
2066                         }
2067                     }
2068                     /*
2069                      * nfsrv_clientconflict() unlocks state when it
2070                      * returns non-zero.
2071                      */
2072                     lckstp = NULL;
2073                     goto tryagain;
2074                 }
2075                 /*
2076                  * Found a conflicting lock, so record the conflict and
2077                  * return the error.
2078                  */
2079                 if (cfp != NULL && ret == 0) {
2080                     cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0];
2081                     cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1];
2082                     cfp->cl_first = lop->lo_first;
2083                     cfp->cl_end = lop->lo_end;
2084                     cfp->cl_flags = lop->lo_flags;
2085                     cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen;
2086                     NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner,
2087                         cfp->cl_ownerlen);
2088                 }
2089                 if (ret == 2)
2090                     error = NFSERR_PERM;
2091                 else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2092                     error = NFSERR_RECLAIMCONFLICT;
2093                 else if (new_stp->ls_flags & NFSLCK_CHECK)
2094                     error = NFSERR_LOCKED;
2095                 else
2096                     error = NFSERR_DENIED;
2097                 if (filestruct_locked != 0 && ret == 0) {
2098                         /* Roll back local locks. */
2099                         NFSUNLOCKSTATE();
2100                         if (vnode_unlocked == 0) {
2101                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7");
2102                                 vnode_unlocked = 1;
2103                                 NFSVOPUNLOCK(vp, 0);
2104                         }
2105                         nfsrv_locallock_rollback(vp, lfp, p);
2106                         NFSLOCKSTATE();
2107                         nfsrv_unlocklf(lfp);
2108                 }
2109                 if (ret == 0)
2110                         NFSUNLOCKSTATE();
2111                 goto out;
2112             }
2113           }
2114         }
2115
2116         /*
2117          * We only get here if there was no lock that conflicted.
2118          */
2119         if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) {
2120                 NFSUNLOCKSTATE();
2121                 goto out;
2122         }
2123
2124         /*
2125          * We only get here when we are creating or modifying a lock.
2126          * There are two variants:
2127          * - exist_lock_owner where lock_owner exists
2128          * - open_to_lock_owner with new lock_owner
2129          */
2130         first = new_lop->lo_first;
2131         end = new_lop->lo_end;
2132         lock_flags = new_lop->lo_flags;
2133         if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
2134                 nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
2135                 stateidp->seqid = ++(lckstp->ls_stateid.seqid);
2136                 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2137                         stateidp->seqid = lckstp->ls_stateid.seqid = 1;
2138                 stateidp->other[0] = lckstp->ls_stateid.other[0];
2139                 stateidp->other[1] = lckstp->ls_stateid.other[1];
2140                 stateidp->other[2] = lckstp->ls_stateid.other[2];
2141         } else {
2142                 /*
2143                  * The new open_to_lock_owner case.
2144                  * Link the new nfsstate into the lists.
2145                  */
2146                 new_stp->ls_seq = new_stp->ls_opentolockseq;
2147                 nfsrvd_refcache(new_stp->ls_op);
2148                 stateidp->seqid = new_stp->ls_stateid.seqid = 1;
2149                 stateidp->other[0] = new_stp->ls_stateid.other[0] =
2150                     clp->lc_clientid.lval[0];
2151                 stateidp->other[1] = new_stp->ls_stateid.other[1] =
2152                     clp->lc_clientid.lval[1];
2153                 stateidp->other[2] = new_stp->ls_stateid.other[2] =
2154                     nfsrv_nextstateindex(clp);
2155                 new_stp->ls_clp = clp;
2156                 LIST_INIT(&new_stp->ls_lock);
2157                 new_stp->ls_openstp = stp;
2158                 new_stp->ls_lfp = lfp;
2159                 nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp,
2160                     lfp);
2161                 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid),
2162                     new_stp, ls_hash);
2163                 LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list);
2164                 *new_lopp = NULL;
2165                 *new_stpp = NULL;
2166                 newnfsstats.srvlockowners++;
2167                 nfsrv_openpluslock++;
2168         }
2169         if (filestruct_locked != 0) {
2170                 NFSUNLOCKSTATE();
2171                 nfsrv_locallock_commit(lfp, lock_flags, first, end);
2172                 NFSLOCKSTATE();
2173                 nfsrv_unlocklf(lfp);
2174         }
2175         NFSUNLOCKSTATE();
2176
2177 out:
2178         if (haslock) {
2179                 NFSLOCKV4ROOTMUTEX();
2180                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2181                 NFSUNLOCKV4ROOTMUTEX();
2182         }
2183         if (vnode_unlocked != 0) {
2184                 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2185                 if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
2186                         error = NFSERR_SERVERFAULT;
2187         }
2188         if (other_lop)
2189                 FREE((caddr_t)other_lop, M_NFSDLOCK);
2190         NFSEXITCODE2(error, nd);
2191         return (error);
2192 }
2193
2194 /*
2195  * Check for state errors for Open.
2196  * repstat is passed back out as an error if more critical errors
2197  * are not detected.
2198  */
2199 APPLESTATIC int
2200 nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp,
2201     struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd,
2202     NFSPROC_T *p, int repstat)
2203 {
2204         struct nfsstate *stp, *nstp;
2205         struct nfsclient *clp;
2206         struct nfsstate *ownerstp;
2207         struct nfslockfile *lfp, *new_lfp;
2208         int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0;
2209
2210         if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2211                 readonly = 1;
2212         /*
2213          * Check for restart conditions (client and server).
2214          */
2215         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2216                 &new_stp->ls_stateid, 0);
2217         if (error)
2218                 goto out;
2219
2220         /*
2221          * Check for state resource limit exceeded.
2222          * Technically this should be SMP protected, but the worst
2223          * case error is "out by one or two" on the count when it
2224          * returns NFSERR_RESOURCE and the limit is just a rather
2225          * arbitrary high water mark, so no harm is done.
2226          */
2227         if (nfsrv_openpluslock > NFSRV_V4STATELIMIT) {
2228                 error = NFSERR_RESOURCE;
2229                 goto out;
2230         }
2231
2232 tryagain:
2233         MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile),
2234             M_NFSDLOCKFILE, M_WAITOK);
2235         if (vp)
2236                 getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2237                     NULL, p);
2238         NFSLOCKSTATE();
2239         /*
2240          * Get the nfsclient structure.
2241          */
2242         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2243             (nfsquad_t)((u_quad_t)0), 0, nd, p);
2244
2245         /*
2246          * Look up the open owner. See if it needs confirmation and
2247          * check the seq#, as required.
2248          */
2249         if (!error)
2250                 nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2251
2252         if (!error && ownerstp) {
2253                 error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp,
2254                     new_stp->ls_op);
2255                 /*
2256                  * If the OpenOwner hasn't been confirmed, assume the
2257                  * old one was a replay and this one is ok.
2258                  * See: RFC3530 Sec. 14.2.18.
2259                  */
2260                 if (error == NFSERR_BADSEQID &&
2261                     (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM))
2262                         error = 0;
2263         }
2264
2265         /*
2266          * Check for grace.
2267          */
2268         if (!error)
2269                 error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
2270         if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
2271                 nfsrv_checkstable(clp))
2272                 error = NFSERR_NOGRACE;
2273
2274         /*
2275          * If none of the above errors occurred, let repstat be
2276          * returned.
2277          */
2278         if (repstat && !error)
2279                 error = repstat;
2280         if (error) {
2281                 NFSUNLOCKSTATE();
2282                 if (haslock) {
2283                         NFSLOCKV4ROOTMUTEX();
2284                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
2285                         NFSUNLOCKV4ROOTMUTEX();
2286                 }
2287                 free((caddr_t)new_lfp, M_NFSDLOCKFILE);
2288                 goto out;
2289         }
2290
2291         /*
2292          * If vp == NULL, the file doesn't exist yet, so return ok.
2293          * (This always happens on the first pass, so haslock must be 0.)
2294          */
2295         if (vp == NULL) {
2296                 NFSUNLOCKSTATE();
2297                 FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2298                 goto out;
2299         }
2300
2301         /*
2302          * Get the structure for the underlying file.
2303          */
2304         if (getfhret)
2305                 error = getfhret;
2306         else
2307                 error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2308                     NULL, 0);
2309         if (new_lfp)
2310                 FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2311         if (error) {
2312                 NFSUNLOCKSTATE();
2313                 if (haslock) {
2314                         NFSLOCKV4ROOTMUTEX();
2315                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
2316                         NFSUNLOCKV4ROOTMUTEX();
2317                 }
2318                 goto out;
2319         }
2320
2321         /*
2322          * Search for a conflicting open/share.
2323          */
2324         if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2325             /*
2326              * For Delegate_Cur, search for the matching Delegation,
2327              * which indicates no conflict.
2328              * An old delegation should have been recovered by the
2329              * client doing a Claim_DELEGATE_Prev, so I won't let
2330              * it match and return NFSERR_EXPIRED. Should I let it
2331              * match?
2332              */
2333             LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2334                 if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2335                     (((nd->nd_flag & ND_NFSV41) != 0 &&
2336                     stateidp->seqid == 0) ||
2337                     stateidp->seqid == stp->ls_stateid.seqid) &&
2338                     !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2339                           NFSX_STATEIDOTHER))
2340                         break;
2341             }
2342             if (stp == LIST_END(&lfp->lf_deleg) ||
2343                 ((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2344                  (stp->ls_flags & NFSLCK_DELEGREAD))) {
2345                 NFSUNLOCKSTATE();
2346                 if (haslock) {
2347                         NFSLOCKV4ROOTMUTEX();
2348                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
2349                         NFSUNLOCKV4ROOTMUTEX();
2350                 }
2351                 error = NFSERR_EXPIRED;
2352                 goto out;
2353             }
2354         }
2355
2356         /*
2357          * Check for access/deny bit conflicts. I check for the same
2358          * owner as well, in case the client didn't bother.
2359          */
2360         LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2361                 if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) &&
2362                     (((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2363                       ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2364                      ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2365                       ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){
2366                         ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2367                         if (ret == 1) {
2368                                 /*
2369                                  * nfsrv_clientconflict() unlocks
2370                                  * state when it returns non-zero.
2371                                  */
2372                                 goto tryagain;
2373                         }
2374                         if (ret == 2)
2375                                 error = NFSERR_PERM;
2376                         else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2377                                 error = NFSERR_RECLAIMCONFLICT;
2378                         else
2379                                 error = NFSERR_SHAREDENIED;
2380                         if (ret == 0)
2381                                 NFSUNLOCKSTATE();
2382                         if (haslock) {
2383                                 NFSLOCKV4ROOTMUTEX();
2384                                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2385                                 NFSUNLOCKV4ROOTMUTEX();
2386                         }
2387                         goto out;
2388                 }
2389         }
2390
2391         /*
2392          * Check for a conflicting delegation. If one is found, call
2393          * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2394          * been set yet, it will get the lock. Otherwise, it will recall
2395          * the delegation. Then, we try try again...
2396          * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2397          *  isn't a conflict.)
2398          * I currently believe the conflict algorithm to be:
2399          * For Open with Read Access and Deny None
2400          * - there is a conflict iff a different client has a write delegation
2401          * For Open with other Write Access or any Deny except None
2402          * - there is a conflict if a different client has any delegation
2403          * - there is a conflict if the same client has a read delegation
2404          *   (The current concensus is that this last case should be
2405          *    considered a conflict since the client with a read delegation
2406          *    could have done an Open with ReadAccess and WriteDeny
2407          *    locally and then not have checked for the WriteDeny.)
2408          * Don't check for a Reclaim, since that will be dealt with
2409          * by nfsrv_openctrl().
2410          */
2411         if (!(new_stp->ls_flags &
2412                 (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) {
2413             stp = LIST_FIRST(&lfp->lf_deleg);
2414             while (stp != LIST_END(&lfp->lf_deleg)) {
2415                 nstp = LIST_NEXT(stp, ls_file);
2416                 if ((readonly && stp->ls_clp != clp &&
2417                        (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
2418                     (!readonly && (stp->ls_clp != clp ||
2419                          (stp->ls_flags & NFSLCK_DELEGREAD)))) {
2420                         ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2421                         if (ret) {
2422                             /*
2423                              * nfsrv_delegconflict() unlocks state
2424                              * when it returns non-zero.
2425                              */
2426                             if (ret == -1)
2427                                 goto tryagain;
2428                             error = ret;
2429                             goto out;
2430                         }
2431                 }
2432                 stp = nstp;
2433             }
2434         }
2435         NFSUNLOCKSTATE();
2436         if (haslock) {
2437                 NFSLOCKV4ROOTMUTEX();
2438                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2439                 NFSUNLOCKV4ROOTMUTEX();
2440         }
2441
2442 out:
2443         NFSEXITCODE2(error, nd);
2444         return (error);
2445 }
2446
2447 /*
2448  * Open control function to create/update open state for an open.
2449  */
2450 APPLESTATIC int
2451 nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp,
2452     struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp,
2453     nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp,
2454     NFSPROC_T *p, u_quad_t filerev)
2455 {
2456         struct nfsstate *new_stp = *new_stpp;
2457         struct nfsstate *stp, *nstp;
2458         struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg;
2459         struct nfslockfile *lfp, *new_lfp;
2460         struct nfsclient *clp;
2461         int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
2462         int readonly = 0, cbret = 1, getfhret = 0;
2463
2464         if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2465                 readonly = 1;
2466         /*
2467          * Check for restart conditions (client and server).
2468          * (Paranoia, should have been detected by nfsrv_opencheck().)
2469          * If an error does show up, return NFSERR_EXPIRED, since the
2470          * the seqid# has already been incremented.
2471          */
2472         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2473             &new_stp->ls_stateid, 0);
2474         if (error) {
2475                 printf("Nfsd: openctrl unexpected restart err=%d\n",
2476                     error);
2477                 error = NFSERR_EXPIRED;
2478                 goto out;
2479         }
2480
2481 tryagain:
2482         MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile),
2483             M_NFSDLOCKFILE, M_WAITOK);
2484         MALLOC(new_open, struct nfsstate *, sizeof (struct nfsstate),
2485             M_NFSDSTATE, M_WAITOK);
2486         MALLOC(new_deleg, struct nfsstate *, sizeof (struct nfsstate),
2487             M_NFSDSTATE, M_WAITOK);
2488         getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2489             NULL, p);
2490         NFSLOCKSTATE();
2491         /*
2492          * Get the client structure. Since the linked lists could be changed
2493          * by other nfsd processes if this process does a tsleep(), one of
2494          * two things must be done.
2495          * 1 - don't tsleep()
2496          * or
2497          * 2 - get the nfsv4_lock() { indicated by haslock == 1 }
2498          *     before using the lists, since this lock stops the other
2499          *     nfsd. This should only be used for rare cases, since it
2500          *     essentially single threads the nfsd.
2501          *     At this time, it is only done for cases where the stable
2502          *     storage file must be written prior to completion of state
2503          *     expiration.
2504          */
2505         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2506             (nfsquad_t)((u_quad_t)0), 0, nd, p);
2507         if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
2508             clp->lc_program) {
2509                 /*
2510                  * This happens on the first open for a client
2511                  * that supports callbacks.
2512                  */
2513                 NFSUNLOCKSTATE();
2514                 /*
2515                  * Although nfsrv_docallback() will sleep, clp won't
2516                  * go away, since they are only removed when the
2517                  * nfsv4_lock() has blocked the nfsd threads. The
2518                  * fields in clp can change, but having multiple
2519                  * threads do this Null callback RPC should be
2520                  * harmless.
2521                  */
2522                 cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL,
2523                     NULL, 0, NULL, NULL, NULL, p);
2524                 NFSLOCKSTATE();
2525                 clp->lc_flags &= ~LCL_NEEDSCBNULL;
2526                 if (!cbret)
2527                         clp->lc_flags |= LCL_CALLBACKSON;
2528         }
2529
2530         /*
2531          * Look up the open owner. See if it needs confirmation and
2532          * check the seq#, as required.
2533          */
2534         if (!error)
2535                 nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2536
2537         if (error) {
2538                 NFSUNLOCKSTATE();
2539                 printf("Nfsd: openctrl unexpected state err=%d\n",
2540                         error);
2541                 free((caddr_t)new_lfp, M_NFSDLOCKFILE);
2542                 free((caddr_t)new_open, M_NFSDSTATE);
2543                 free((caddr_t)new_deleg, M_NFSDSTATE);
2544                 if (haslock) {
2545                         NFSLOCKV4ROOTMUTEX();
2546                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
2547                         NFSUNLOCKV4ROOTMUTEX();
2548                 }
2549                 error = NFSERR_EXPIRED;
2550                 goto out;
2551         }
2552
2553         if (new_stp->ls_flags & NFSLCK_RECLAIM)
2554                 nfsrv_markstable(clp);
2555
2556         /*
2557          * Get the structure for the underlying file.
2558          */
2559         if (getfhret)
2560                 error = getfhret;
2561         else
2562                 error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2563                     NULL, 0);
2564         if (new_lfp)
2565                 FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2566         if (error) {
2567                 NFSUNLOCKSTATE();
2568                 printf("Nfsd openctrl unexpected getlockfile err=%d\n",
2569                     error);
2570                 free((caddr_t)new_open, M_NFSDSTATE);
2571                 free((caddr_t)new_deleg, M_NFSDSTATE);
2572                 if (haslock) {
2573                         NFSLOCKV4ROOTMUTEX();
2574                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
2575                         NFSUNLOCKV4ROOTMUTEX();
2576                 }
2577                 goto out;
2578         }
2579
2580         /*
2581          * Search for a conflicting open/share.
2582          */
2583         if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2584             /*
2585              * For Delegate_Cur, search for the matching Delegation,
2586              * which indicates no conflict.
2587              * An old delegation should have been recovered by the
2588              * client doing a Claim_DELEGATE_Prev, so I won't let
2589              * it match and return NFSERR_EXPIRED. Should I let it
2590              * match?
2591              */
2592             LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2593                 if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2594                     (((nd->nd_flag & ND_NFSV41) != 0 &&
2595                     stateidp->seqid == 0) ||
2596                     stateidp->seqid == stp->ls_stateid.seqid) &&
2597                     !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2598                         NFSX_STATEIDOTHER))
2599                         break;
2600             }
2601             if (stp == LIST_END(&lfp->lf_deleg) ||
2602                 ((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2603                  (stp->ls_flags & NFSLCK_DELEGREAD))) {
2604                 NFSUNLOCKSTATE();
2605                 printf("Nfsd openctrl unexpected expiry\n");
2606                 free((caddr_t)new_open, M_NFSDSTATE);
2607                 free((caddr_t)new_deleg, M_NFSDSTATE);
2608                 if (haslock) {
2609                         NFSLOCKV4ROOTMUTEX();
2610                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
2611                         NFSUNLOCKV4ROOTMUTEX();
2612                 }
2613                 error = NFSERR_EXPIRED;
2614                 goto out;
2615             }
2616
2617             /*
2618              * Don't issue a Delegation, since one already exists and
2619              * delay delegation timeout, as required.
2620              */
2621             delegate = 0;
2622             nfsrv_delaydelegtimeout(stp);
2623         }
2624
2625         /*
2626          * Check for access/deny bit conflicts. I also check for the
2627          * same owner, since the client might not have bothered to check.
2628          * Also, note an open for the same file and owner, if found,
2629          * which is all we do here for Delegate_Cur, since conflict
2630          * checking is already done.
2631          */
2632         LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2633                 if (ownerstp && stp->ls_openowner == ownerstp)
2634                         openstp = stp;
2635                 if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) {
2636                     /*
2637                      * If another client has the file open, the only
2638                      * delegation that can be issued is a Read delegation
2639                      * and only if it is a Read open with Deny none.
2640                      */
2641                     if (clp != stp->ls_clp) {
2642                         if ((stp->ls_flags & NFSLCK_SHAREBITS) ==
2643                             NFSLCK_READACCESS)
2644                             writedeleg = 0;
2645                         else
2646                             delegate = 0;
2647                     }
2648                     if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2649                         ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2650                        ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2651                         ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){
2652                         ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2653                         if (ret == 1) {
2654                                 /*
2655                                  * nfsrv_clientconflict() unlocks state
2656                                  * when it returns non-zero.
2657                                  */
2658                                 free((caddr_t)new_open, M_NFSDSTATE);
2659                                 free((caddr_t)new_deleg, M_NFSDSTATE);
2660                                 openstp = NULL;
2661                                 goto tryagain;
2662                         }
2663                         if (ret == 2)
2664                                 error = NFSERR_PERM;
2665                         else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2666                                 error = NFSERR_RECLAIMCONFLICT;
2667                         else
2668                                 error = NFSERR_SHAREDENIED;
2669                         if (ret == 0)
2670                                 NFSUNLOCKSTATE();
2671                         if (haslock) {
2672                                 NFSLOCKV4ROOTMUTEX();
2673                                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2674                                 NFSUNLOCKV4ROOTMUTEX();
2675                         }
2676                         free((caddr_t)new_open, M_NFSDSTATE);
2677                         free((caddr_t)new_deleg, M_NFSDSTATE);
2678                         printf("nfsd openctrl unexpected client cnfl\n");
2679                         goto out;
2680                     }
2681                 }
2682         }
2683
2684         /*
2685          * Check for a conflicting delegation. If one is found, call
2686          * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2687          * been set yet, it will get the lock. Otherwise, it will recall
2688          * the delegation. Then, we try try again...
2689          * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2690          *  isn't a conflict.)
2691          * I currently believe the conflict algorithm to be:
2692          * For Open with Read Access and Deny None
2693          * - there is a conflict iff a different client has a write delegation
2694          * For Open with other Write Access or any Deny except None
2695          * - there is a conflict if a different client has any delegation
2696          * - there is a conflict if the same client has a read delegation
2697          *   (The current concensus is that this last case should be
2698          *    considered a conflict since the client with a read delegation
2699          *    could have done an Open with ReadAccess and WriteDeny
2700          *    locally and then not have checked for the WriteDeny.)
2701          */
2702         if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
2703             stp = LIST_FIRST(&lfp->lf_deleg);
2704             while (stp != LIST_END(&lfp->lf_deleg)) {
2705                 nstp = LIST_NEXT(stp, ls_file);
2706                 if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
2707                         writedeleg = 0;
2708                 else
2709                         delegate = 0;
2710                 if ((readonly && stp->ls_clp != clp &&
2711                        (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
2712                     (!readonly && (stp->ls_clp != clp ||
2713                          (stp->ls_flags & NFSLCK_DELEGREAD)))) {
2714                     if (new_stp->ls_flags & NFSLCK_RECLAIM) {
2715                         delegate = 2;
2716                     } else {
2717                         ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2718                         if (ret) {
2719                             /*
2720                              * nfsrv_delegconflict() unlocks state
2721                              * when it returns non-zero.
2722                              */
2723                             printf("Nfsd openctrl unexpected deleg cnfl\n");
2724                             free((caddr_t)new_open, M_NFSDSTATE);
2725                             free((caddr_t)new_deleg, M_NFSDSTATE);
2726                             if (ret == -1) {
2727                                 openstp = NULL;
2728                                 goto tryagain;
2729                             }
2730                             error = ret;
2731                             goto out;
2732                         }
2733                     }
2734                 }
2735                 stp = nstp;
2736             }
2737         }
2738
2739         /*
2740          * We only get here if there was no open that conflicted.
2741          * If an open for the owner exists, or in the access/deny bits.
2742          * Otherwise it is a new open. If the open_owner hasn't been
2743          * confirmed, replace the open with the new one needing confirmation,
2744          * otherwise add the open.
2745          */
2746         if (new_stp->ls_flags & NFSLCK_DELEGPREV) {
2747             /*
2748              * Handle NFSLCK_DELEGPREV by searching the old delegations for
2749              * a match. If found, just move the old delegation to the current
2750              * delegation list and issue open. If not found, return
2751              * NFSERR_EXPIRED.
2752              */
2753             LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
2754                 if (stp->ls_lfp == lfp) {
2755                     /* Found it */
2756                     if (stp->ls_clp != clp)
2757                         panic("olddeleg clp");
2758                     LIST_REMOVE(stp, ls_list);
2759                     LIST_REMOVE(stp, ls_hash);
2760                     stp->ls_flags &= ~NFSLCK_OLDDELEG;
2761                     stp->ls_stateid.seqid = delegstateidp->seqid = 1;
2762                     stp->ls_stateid.other[0] = delegstateidp->other[0] =
2763                         clp->lc_clientid.lval[0];
2764                     stp->ls_stateid.other[1] = delegstateidp->other[1] =
2765                         clp->lc_clientid.lval[1];
2766                     stp->ls_stateid.other[2] = delegstateidp->other[2] =
2767                         nfsrv_nextstateindex(clp);
2768                     stp->ls_compref = nd->nd_compref;
2769                     LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list);
2770                     LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2771                         stp->ls_stateid), stp, ls_hash);
2772                     if (stp->ls_flags & NFSLCK_DELEGWRITE)
2773                         *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2774                     else
2775                         *rflagsp |= NFSV4OPEN_READDELEGATE;
2776                     clp->lc_delegtime = NFSD_MONOSEC +
2777                         nfsrv_lease + NFSRV_LEASEDELTA;
2778
2779                     /*
2780                      * Now, do the associated open.
2781                      */
2782                     new_open->ls_stateid.seqid = 1;
2783                     new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2784                     new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2785                     new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2786                     new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)|
2787                         NFSLCK_OPEN;
2788                     if (stp->ls_flags & NFSLCK_DELEGWRITE)
2789                         new_open->ls_flags |= (NFSLCK_READACCESS |
2790                             NFSLCK_WRITEACCESS);
2791                     else
2792                         new_open->ls_flags |= NFSLCK_READACCESS;
2793                     new_open->ls_uid = new_stp->ls_uid;
2794                     new_open->ls_lfp = lfp;
2795                     new_open->ls_clp = clp;
2796                     LIST_INIT(&new_open->ls_open);
2797                     LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2798                     LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2799                         new_open, ls_hash);
2800                     /*
2801                      * and handle the open owner
2802                      */
2803                     if (ownerstp) {
2804                         new_open->ls_openowner = ownerstp;
2805                         LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list);
2806                     } else {
2807                         new_open->ls_openowner = new_stp;
2808                         new_stp->ls_flags = 0;
2809                         nfsrvd_refcache(new_stp->ls_op);
2810                         new_stp->ls_noopens = 0;
2811                         LIST_INIT(&new_stp->ls_open);
2812                         LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
2813                         LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
2814                         *new_stpp = NULL;
2815                         newnfsstats.srvopenowners++;
2816                         nfsrv_openpluslock++;
2817                     }
2818                     openstp = new_open;
2819                     new_open = NULL;
2820                     newnfsstats.srvopens++;
2821                     nfsrv_openpluslock++;
2822                     break;
2823                 }
2824             }
2825             if (stp == LIST_END(&clp->lc_olddeleg))
2826                 error = NFSERR_EXPIRED;
2827         } else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
2828             /*
2829              * Scan to see that no delegation for this client and file
2830              * doesn't already exist.
2831              * There also shouldn't yet be an Open for this file and
2832              * openowner.
2833              */
2834             LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2835                 if (stp->ls_clp == clp)
2836                     break;
2837             }
2838             if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) {
2839                 /*
2840                  * This is the Claim_Previous case with a delegation
2841                  * type != Delegate_None.
2842                  */
2843                 /*
2844                  * First, add the delegation. (Although we must issue the
2845                  * delegation, we can also ask for an immediate return.)
2846                  */
2847                 new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
2848                 new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
2849                     clp->lc_clientid.lval[0];
2850                 new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
2851                     clp->lc_clientid.lval[1];
2852                 new_deleg->ls_stateid.other[2] = delegstateidp->other[2] =
2853                     nfsrv_nextstateindex(clp);
2854                 if (new_stp->ls_flags & NFSLCK_DELEGWRITE) {
2855                     new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
2856                         NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
2857                     *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2858                 } else {
2859                     new_deleg->ls_flags = (NFSLCK_DELEGREAD |
2860                         NFSLCK_READACCESS);
2861                     *rflagsp |= NFSV4OPEN_READDELEGATE;
2862                 }
2863                 new_deleg->ls_uid = new_stp->ls_uid;
2864                 new_deleg->ls_lfp = lfp;
2865                 new_deleg->ls_clp = clp;
2866                 new_deleg->ls_filerev = filerev;
2867                 new_deleg->ls_compref = nd->nd_compref;
2868                 LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
2869                 LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2870                     new_deleg->ls_stateid), new_deleg, ls_hash);
2871                 LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
2872                 new_deleg = NULL;
2873                 if (delegate == 2 || nfsrv_issuedelegs == 0 ||
2874                     (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
2875                      LCL_CALLBACKSON ||
2876                     NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) ||
2877                     !NFSVNO_DELEGOK(vp))
2878                     *rflagsp |= NFSV4OPEN_RECALL;
2879                 newnfsstats.srvdelegates++;
2880                 nfsrv_openpluslock++;
2881                 nfsrv_delegatecnt++;
2882
2883                 /*
2884                  * Now, do the associated open.
2885                  */
2886                 new_open->ls_stateid.seqid = 1;
2887                 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2888                 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2889                 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2890                 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) |
2891                     NFSLCK_OPEN;
2892                 if (new_stp->ls_flags & NFSLCK_DELEGWRITE)
2893                         new_open->ls_flags |= (NFSLCK_READACCESS |
2894                             NFSLCK_WRITEACCESS);
2895                 else
2896                         new_open->ls_flags |= NFSLCK_READACCESS;
2897                 new_open->ls_uid = new_stp->ls_uid;
2898                 new_open->ls_lfp = lfp;
2899                 new_open->ls_clp = clp;
2900                 LIST_INIT(&new_open->ls_open);
2901                 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2902                 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2903                    new_open, ls_hash);
2904                 /*
2905                  * and handle the open owner
2906                  */
2907                 if (ownerstp) {
2908                     new_open->ls_openowner = ownerstp;
2909                     LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
2910                 } else {
2911                     new_open->ls_openowner = new_stp;
2912                     new_stp->ls_flags = 0;
2913                     nfsrvd_refcache(new_stp->ls_op);
2914                     new_stp->ls_noopens = 0;
2915                     LIST_INIT(&new_stp->ls_open);
2916                     LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
2917                     LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
2918                     *new_stpp = NULL;
2919                     newnfsstats.srvopenowners++;
2920                     nfsrv_openpluslock++;
2921                 }
2922                 openstp = new_open;
2923                 new_open = NULL;
2924                 newnfsstats.srvopens++;
2925                 nfsrv_openpluslock++;
2926             } else {
2927                 error = NFSERR_RECLAIMCONFLICT;
2928             }
2929         } else if (ownerstp) {
2930                 if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) {
2931                     /* Replace the open */
2932                     if (ownerstp->ls_op)
2933                         nfsrvd_derefcache(ownerstp->ls_op);
2934                     ownerstp->ls_op = new_stp->ls_op;
2935                     nfsrvd_refcache(ownerstp->ls_op);
2936                     ownerstp->ls_seq = new_stp->ls_seq;
2937                     *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
2938                     stp = LIST_FIRST(&ownerstp->ls_open);
2939                     stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
2940                         NFSLCK_OPEN;
2941                     stp->ls_stateid.seqid = 1;
2942                     stp->ls_uid = new_stp->ls_uid;
2943                     if (lfp != stp->ls_lfp) {
2944                         LIST_REMOVE(stp, ls_file);
2945                         LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file);
2946                         stp->ls_lfp = lfp;
2947                     }
2948                     openstp = stp;
2949                 } else if (openstp) {
2950                     openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
2951                     openstp->ls_stateid.seqid++;
2952                     if ((nd->nd_flag & ND_NFSV41) != 0 &&
2953                         openstp->ls_stateid.seqid == 0)
2954                         openstp->ls_stateid.seqid = 1;
2955
2956                     /*
2957                      * This is where we can choose to issue a delegation.
2958                      */
2959                     if (delegate == 0 || writedeleg == 0 ||
2960                         NFSVNO_EXRDONLY(exp) || (readonly != 0 &&
2961                         nfsrv_writedelegifpos == 0) ||
2962                         !NFSVNO_DELEGOK(vp) ||
2963                         (new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 ||
2964                         (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
2965                          LCL_CALLBACKSON)
2966                         *rflagsp |= NFSV4OPEN_WDCONTENTION;
2967                     else if (nfsrv_issuedelegs == 0 ||
2968                         NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
2969                         *rflagsp |= NFSV4OPEN_WDRESOURCE;
2970                     else if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
2971                         *rflagsp |= NFSV4OPEN_WDNOTWANTED;
2972                     else {
2973                         new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
2974                         new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
2975                             = clp->lc_clientid.lval[0];
2976                         new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
2977                             = clp->lc_clientid.lval[1];
2978                         new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
2979                             = nfsrv_nextstateindex(clp);
2980                         new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
2981                             NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
2982                         *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2983                         new_deleg->ls_uid = new_stp->ls_uid;
2984                         new_deleg->ls_lfp = lfp;
2985                         new_deleg->ls_clp = clp;
2986                         new_deleg->ls_filerev = filerev;
2987                         new_deleg->ls_compref = nd->nd_compref;
2988                         LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
2989                         LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2990                             new_deleg->ls_stateid), new_deleg, ls_hash);
2991                         LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
2992                         new_deleg = NULL;
2993                         newnfsstats.srvdelegates++;
2994                         nfsrv_openpluslock++;
2995                         nfsrv_delegatecnt++;
2996                     }
2997                 } else {
2998                     new_open->ls_stateid.seqid = 1;
2999                     new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3000                     new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3001                     new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3002                     new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)|
3003                         NFSLCK_OPEN;
3004                     new_open->ls_uid = new_stp->ls_uid;
3005                     new_open->ls_openowner = ownerstp;
3006                     new_open->ls_lfp = lfp;
3007                     new_open->ls_clp = clp;
3008                     LIST_INIT(&new_open->ls_open);
3009                     LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3010                     LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
3011                     LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3012                         new_open, ls_hash);
3013                     openstp = new_open;
3014                     new_open = NULL;
3015                     newnfsstats.srvopens++;
3016                     nfsrv_openpluslock++;
3017
3018                     /*
3019                      * This is where we can choose to issue a delegation.
3020                      */
3021                     if (delegate == 0 || (writedeleg == 0 && readonly == 0) ||
3022                         !NFSVNO_DELEGOK(vp) ||
3023                         (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
3024                          LCL_CALLBACKSON)
3025                         *rflagsp |= NFSV4OPEN_WDCONTENTION;
3026                     else if (nfsrv_issuedelegs == 0 ||
3027                         NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
3028                         *rflagsp |= NFSV4OPEN_WDRESOURCE;
3029                     else if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
3030                         *rflagsp |= NFSV4OPEN_WDNOTWANTED;
3031                     else {
3032                         new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
3033                         new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
3034                             = clp->lc_clientid.lval[0];
3035                         new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
3036                             = clp->lc_clientid.lval[1];
3037                         new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
3038                             = nfsrv_nextstateindex(clp);
3039                         if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
3040                             (nfsrv_writedelegifpos || !readonly) &&
3041                             (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) {
3042                             new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
3043                                 NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
3044                             *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3045                         } else {
3046                             new_deleg->ls_flags = (NFSLCK_DELEGREAD |
3047                                 NFSLCK_READACCESS);
3048                             *rflagsp |= NFSV4OPEN_READDELEGATE;
3049                         }
3050                         new_deleg->ls_uid = new_stp->ls_uid;
3051                         new_deleg->ls_lfp = lfp;
3052                         new_deleg->ls_clp = clp;
3053                         new_deleg->ls_filerev = filerev;
3054                         new_deleg->ls_compref = nd->nd_compref;
3055                         LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3056                         LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3057                             new_deleg->ls_stateid), new_deleg, ls_hash);
3058                         LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3059                         new_deleg = NULL;
3060                         newnfsstats.srvdelegates++;
3061                         nfsrv_openpluslock++;
3062                         nfsrv_delegatecnt++;
3063                     }
3064                 }
3065         } else {
3066                 /*
3067                  * New owner case. Start the open_owner sequence with a
3068                  * Needs confirmation (unless a reclaim) and hang the
3069                  * new open off it.
3070                  */
3071                 new_open->ls_stateid.seqid = 1;
3072                 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3073                 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3074                 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3075                 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
3076                     NFSLCK_OPEN;
3077                 new_open->ls_uid = new_stp->ls_uid;
3078                 LIST_INIT(&new_open->ls_open);
3079                 new_open->ls_openowner = new_stp;
3080                 new_open->ls_lfp = lfp;
3081                 new_open->ls_clp = clp;
3082                 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3083                 if (new_stp->ls_flags & NFSLCK_RECLAIM) {
3084                         new_stp->ls_flags = 0;
3085                 } else if ((nd->nd_flag & ND_NFSV41) != 0) {
3086                         /* NFSv4.1 never needs confirmation. */
3087                         new_stp->ls_flags = 0;
3088
3089                         /*
3090                          * This is where we can choose to issue a delegation.
3091                          */
3092                         if (delegate && nfsrv_issuedelegs &&
3093                             (writedeleg || readonly) &&
3094                             (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
3095                              LCL_CALLBACKSON &&
3096                             !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
3097                             NFSVNO_DELEGOK(vp) &&
3098                             ((nd->nd_flag & ND_NFSV41) == 0 ||
3099                              (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) {
3100                                 new_deleg->ls_stateid.seqid =
3101                                     delegstateidp->seqid = 1;
3102                                 new_deleg->ls_stateid.other[0] =
3103                                     delegstateidp->other[0]
3104                                     = clp->lc_clientid.lval[0];
3105                                 new_deleg->ls_stateid.other[1] =
3106                                     delegstateidp->other[1]
3107                                     = clp->lc_clientid.lval[1];
3108                                 new_deleg->ls_stateid.other[2] =
3109                                     delegstateidp->other[2]
3110                                     = nfsrv_nextstateindex(clp);
3111                                 if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
3112                                     (nfsrv_writedelegifpos || !readonly) &&
3113                                     ((nd->nd_flag & ND_NFSV41) == 0 ||
3114                                      (new_stp->ls_flags & NFSLCK_WANTRDELEG) ==
3115                                      0)) {
3116                                         new_deleg->ls_flags =
3117                                             (NFSLCK_DELEGWRITE |
3118                                              NFSLCK_READACCESS |
3119                                              NFSLCK_WRITEACCESS);
3120                                         *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3121                                 } else {
3122                                         new_deleg->ls_flags =
3123                                             (NFSLCK_DELEGREAD |
3124                                              NFSLCK_READACCESS);
3125                                         *rflagsp |= NFSV4OPEN_READDELEGATE;
3126                                 }
3127                                 new_deleg->ls_uid = new_stp->ls_uid;
3128                                 new_deleg->ls_lfp = lfp;
3129                                 new_deleg->ls_clp = clp;
3130                                 new_deleg->ls_filerev = filerev;
3131                                 new_deleg->ls_compref = nd->nd_compref;
3132                                 LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg,
3133                                     ls_file);
3134                                 LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3135                                     new_deleg->ls_stateid), new_deleg, ls_hash);
3136                                 LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg,
3137                                     ls_list);
3138                                 new_deleg = NULL;
3139                                 newnfsstats.srvdelegates++;
3140                                 nfsrv_openpluslock++;
3141                                 nfsrv_delegatecnt++;
3142                         }
3143                 } else {
3144                         *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
3145                         new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
3146                 }
3147                 nfsrvd_refcache(new_stp->ls_op);
3148                 new_stp->ls_noopens = 0;
3149                 LIST_INIT(&new_stp->ls_open);
3150                 LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3151                 LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3152                 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3153                     new_open, ls_hash);
3154                 openstp = new_open;
3155                 new_open = NULL;
3156                 *new_stpp = NULL;
3157                 newnfsstats.srvopens++;
3158                 nfsrv_openpluslock++;
3159                 newnfsstats.srvopenowners++;
3160                 nfsrv_openpluslock++;
3161         }
3162         if (!error) {
3163                 stateidp->seqid = openstp->ls_stateid.seqid;
3164                 stateidp->other[0] = openstp->ls_stateid.other[0];
3165                 stateidp->other[1] = openstp->ls_stateid.other[1];
3166                 stateidp->other[2] = openstp->ls_stateid.other[2];
3167         }
3168         NFSUNLOCKSTATE();
3169         if (haslock) {
3170                 NFSLOCKV4ROOTMUTEX();
3171                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
3172                 NFSUNLOCKV4ROOTMUTEX();
3173         }
3174         if (new_open)
3175                 FREE((caddr_t)new_open, M_NFSDSTATE);
3176         if (new_deleg)
3177                 FREE((caddr_t)new_deleg, M_NFSDSTATE);
3178
3179 out:
3180         NFSEXITCODE2(error, nd);
3181         return (error);
3182 }
3183
3184 /*
3185  * Open update. Does the confirm, downgrade and close.
3186  */
3187 APPLESTATIC int
3188 nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
3189     nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p)
3190 {
3191         struct nfsstate *stp, *ownerstp;
3192         struct nfsclient *clp;
3193         struct nfslockfile *lfp;
3194         u_int32_t bits;
3195         int error = 0, gotstate = 0, len = 0;
3196         u_char client[NFSV4_OPAQUELIMIT];
3197
3198         /*
3199          * Check for restart conditions (client and server).
3200          */
3201         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3202             &new_stp->ls_stateid, 0);
3203         if (error)
3204                 goto out;
3205
3206         NFSLOCKSTATE();
3207         /*
3208          * Get the open structure via clientid and stateid.
3209          */
3210         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3211             (nfsquad_t)((u_quad_t)0), 0, nd, p);
3212         if (!error)
3213                 error = nfsrv_getstate(clp, &new_stp->ls_stateid,
3214                     new_stp->ls_flags, &stp);
3215
3216         /*
3217          * Sanity check the open.
3218          */
3219         if (!error && (!(stp->ls_flags & NFSLCK_OPEN) ||
3220                 (!(new_stp->ls_flags & NFSLCK_CONFIRM) &&
3221                  (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) ||
3222                 ((new_stp->ls_flags & NFSLCK_CONFIRM) &&
3223                  (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)))))
3224                 error = NFSERR_BADSTATEID;
3225
3226         if (!error)
3227                 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
3228                     stp->ls_openowner, new_stp->ls_op);
3229         if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
3230             (((nd->nd_flag & ND_NFSV41) == 0 &&
3231               !(new_stp->ls_flags & NFSLCK_CONFIRM)) ||
3232              ((nd->nd_flag & ND_NFSV41) != 0 &&
3233               new_stp->ls_stateid.seqid != 0)))
3234                 error = NFSERR_OLDSTATEID;
3235         if (!error && vnode_vtype(vp) != VREG) {
3236                 if (vnode_vtype(vp) == VDIR)
3237                         error = NFSERR_ISDIR;
3238                 else
3239                         error = NFSERR_INVAL;
3240         }
3241
3242         if (error) {
3243                 /*
3244                  * If a client tries to confirm an Open with a bad
3245                  * seqid# and there are no byte range locks or other Opens
3246                  * on the openowner, just throw it away, so the next use of the
3247                  * openowner will start a fresh seq#.
3248                  */
3249                 if (error == NFSERR_BADSEQID &&
3250                     (new_stp->ls_flags & NFSLCK_CONFIRM) &&
3251                     nfsrv_nootherstate(stp))
3252                         nfsrv_freeopenowner(stp->ls_openowner, 0, p);
3253                 NFSUNLOCKSTATE();
3254                 goto out;
3255         }
3256
3257         /*
3258          * Set the return stateid.
3259          */
3260         stateidp->seqid = stp->ls_stateid.seqid + 1;
3261         if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
3262                 stateidp->seqid = 1;
3263         stateidp->other[0] = stp->ls_stateid.other[0];
3264         stateidp->other[1] = stp->ls_stateid.other[1];
3265         stateidp->other[2] = stp->ls_stateid.other[2];
3266         /*
3267          * Now, handle the three cases.
3268          */
3269         if (new_stp->ls_flags & NFSLCK_CONFIRM) {
3270                 /*
3271                  * If the open doesn't need confirmation, it seems to me that
3272                  * there is a client error, but I'll just log it and keep going?
3273                  */
3274                 if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))
3275                         printf("Nfsv4d: stray open confirm\n");
3276                 stp->ls_openowner->ls_flags = 0;
3277                 stp->ls_stateid.seqid++;
3278                 if ((nd->nd_flag & ND_NFSV41) != 0 &&
3279                     stp->ls_stateid.seqid == 0)
3280                         stp->ls_stateid.seqid = 1;
3281                 if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
3282                         clp->lc_flags |= LCL_STAMPEDSTABLE;
3283                         len = clp->lc_idlen;
3284                         NFSBCOPY(clp->lc_id, client, len);
3285                         gotstate = 1;
3286                 }
3287                 NFSUNLOCKSTATE();
3288         } else if (new_stp->ls_flags & NFSLCK_CLOSE) {
3289                 ownerstp = stp->ls_openowner;
3290                 lfp = stp->ls_lfp;
3291                 if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
3292                         /* Get the lf lock */
3293                         nfsrv_locklf(lfp);
3294                         NFSUNLOCKSTATE();
3295                         ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
3296                         NFSVOPUNLOCK(vp, 0);
3297                         if (nfsrv_freeopen(stp, vp, 1, p) == 0) {
3298                                 NFSLOCKSTATE();
3299                                 nfsrv_unlocklf(lfp);
3300                                 NFSUNLOCKSTATE();
3301                         }
3302                         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
3303                 } else {
3304                         (void) nfsrv_freeopen(stp, NULL, 0, p);
3305                         NFSUNLOCKSTATE();
3306                 }
3307         } else {
3308                 /*
3309                  * Update the share bits, making sure that the new set are a
3310                  * subset of the old ones.
3311                  */
3312                 bits = (new_stp->ls_flags & NFSLCK_SHAREBITS);
3313                 if (~(stp->ls_flags) & bits) {
3314                         NFSUNLOCKSTATE();
3315                         error = NFSERR_INVAL;
3316                         goto out;
3317                 }
3318                 stp->ls_flags = (bits | NFSLCK_OPEN);
3319                 stp->ls_stateid.seqid++;
3320                 if ((nd->nd_flag & ND_NFSV41) != 0 &&
3321                     stp->ls_stateid.seqid == 0)
3322                         stp->ls_stateid.seqid = 1;
3323                 NFSUNLOCKSTATE();
3324         }
3325
3326         /*
3327          * If the client just confirmed its first open, write a timestamp
3328          * to the stable storage file.
3329          */
3330         if (gotstate != 0) {
3331                 nfsrv_writestable(client, len, NFSNST_NEWSTATE, p);
3332                 nfsrv_backupstable();
3333         }
3334
3335 out:
3336         NFSEXITCODE2(error, nd);
3337         return (error);
3338 }
3339
3340 /*
3341  * Delegation update. Does the purge and return.
3342  */
3343 APPLESTATIC int
3344 nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid,
3345     nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred,
3346     NFSPROC_T *p)
3347 {
3348         struct nfsstate *stp;
3349         struct nfsclient *clp;
3350         int error = 0;
3351         fhandle_t fh;
3352
3353         /*
3354          * Do a sanity check against the file handle for DelegReturn.
3355          */
3356         if (vp) {
3357                 error = nfsvno_getfh(vp, &fh, p);
3358                 if (error)
3359                         goto out;
3360         }
3361         /*
3362          * Check for restart conditions (client and server).
3363          */
3364         if (op == NFSV4OP_DELEGRETURN)
3365                 error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN,
3366                         stateidp, 0);
3367         else
3368                 error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE,
3369                         stateidp, 0);
3370
3371         NFSLOCKSTATE();
3372         /*
3373          * Get the open structure via clientid and stateid.
3374          */
3375         if (!error)
3376             error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3377                 (nfsquad_t)((u_quad_t)0), 0, nd, p);
3378         if (error) {
3379                 if (error == NFSERR_CBPATHDOWN)
3380                         error = 0;
3381                 if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN)
3382                         error = NFSERR_STALESTATEID;
3383         }
3384         if (!error && op == NFSV4OP_DELEGRETURN) {
3385             error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
3386             if (!error && stp->ls_stateid.seqid != stateidp->seqid &&
3387                 ((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0))
3388                 error = NFSERR_OLDSTATEID;
3389         }
3390         /*
3391          * NFSERR_EXPIRED means that the state has gone away,
3392          * so Delegations have been purged. Just return ok.
3393          */
3394         if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) {
3395                 NFSUNLOCKSTATE();
3396                 error = 0;
3397                 goto out;
3398         }
3399         if (error) {
3400                 NFSUNLOCKSTATE();
3401                 goto out;
3402         }
3403
3404         if (op == NFSV4OP_DELEGRETURN) {
3405                 if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh,
3406                     sizeof (fhandle_t))) {
3407                         NFSUNLOCKSTATE();
3408                         error = NFSERR_BADSTATEID;
3409                         goto out;
3410                 }
3411                 nfsrv_freedeleg(stp);
3412         } else {
3413                 nfsrv_freedeleglist(&clp->lc_olddeleg);
3414         }
3415         NFSUNLOCKSTATE();
3416         error = 0;
3417
3418 out:
3419         NFSEXITCODE(error);
3420         return (error);
3421 }
3422
3423 /*
3424  * Release lock owner.
3425  */
3426 APPLESTATIC int
3427 nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid,
3428     NFSPROC_T *p)
3429 {
3430         struct nfsstate *stp, *nstp, *openstp, *ownstp;
3431         struct nfsclient *clp;
3432         int error = 0;
3433
3434         /*
3435          * Check for restart conditions (client and server).
3436          */
3437         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3438             &new_stp->ls_stateid, 0);
3439         if (error)
3440                 goto out;
3441
3442         NFSLOCKSTATE();
3443         /*
3444          * Get the lock owner by name.
3445          */
3446         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3447             (nfsquad_t)((u_quad_t)0), 0, NULL, p);
3448         if (error) {
3449                 NFSUNLOCKSTATE();
3450                 goto out;
3451         }
3452         LIST_FOREACH(ownstp, &clp->lc_open, ls_list) {
3453             LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) {
3454                 stp = LIST_FIRST(&openstp->ls_open);
3455                 while (stp != LIST_END(&openstp->ls_open)) {
3456                     nstp = LIST_NEXT(stp, ls_list);
3457                     /*
3458                      * If the owner matches, check for locks and
3459                      * then free or return an error.
3460                      */
3461                     if (stp->ls_ownerlen == new_stp->ls_ownerlen &&
3462                         !NFSBCMP(stp->ls_owner, new_stp->ls_owner,
3463                          stp->ls_ownerlen)){
3464                         if (LIST_EMPTY(&stp->ls_lock)) {
3465                             nfsrv_freelockowner(stp, NULL, 0, p);
3466                         } else {
3467                             NFSUNLOCKSTATE();
3468                             error = NFSERR_LOCKSHELD;
3469                             goto out;
3470                         }
3471                     }
3472                     stp = nstp;
3473                 }
3474             }
3475         }
3476         NFSUNLOCKSTATE();
3477
3478 out:
3479         NFSEXITCODE(error);
3480         return (error);
3481 }
3482
3483 /*
3484  * Get the file handle for a lock structure.
3485  */
3486 static int
3487 nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp,
3488     fhandle_t *nfhp, NFSPROC_T *p)
3489 {
3490         fhandle_t *fhp = NULL;
3491         int error;
3492
3493         /*
3494          * For lock, use the new nfslock structure, otherwise just
3495          * a fhandle_t on the stack.
3496          */
3497         if (flags & NFSLCK_OPEN) {
3498                 KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL"));
3499                 fhp = &new_lfp->lf_fh;
3500         } else if (nfhp) {
3501                 fhp = nfhp;
3502         } else {
3503                 panic("nfsrv_getlockfh");
3504         }
3505         error = nfsvno_getfh(vp, fhp, p);
3506         NFSEXITCODE(error);
3507         return (error);
3508 }
3509
3510 /*
3511  * Get an nfs lock structure. Allocate one, as required, and return a
3512  * pointer to it.
3513  * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock.
3514  */
3515 static int
3516 nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
3517     struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit)
3518 {
3519         struct nfslockfile *lfp;
3520         fhandle_t *fhp = NULL, *tfhp;
3521         struct nfslockhashhead *hp;
3522         struct nfslockfile *new_lfp = NULL;
3523
3524         /*
3525          * For lock, use the new nfslock structure, otherwise just
3526          * a fhandle_t on the stack.
3527          */
3528         if (flags & NFSLCK_OPEN) {
3529                 new_lfp = *new_lfpp;
3530                 fhp = &new_lfp->lf_fh;
3531         } else if (nfhp) {
3532                 fhp = nfhp;
3533         } else {
3534                 panic("nfsrv_getlockfile");
3535         }
3536
3537         hp = NFSLOCKHASH(fhp);
3538         LIST_FOREACH(lfp, hp, lf_hash) {
3539                 tfhp = &lfp->lf_fh;
3540                 if (NFSVNO_CMPFH(fhp, tfhp)) {
3541                         if (lockit)
3542                                 nfsrv_locklf(lfp);
3543                         *lfpp = lfp;
3544                         return (0);
3545                 }
3546         }
3547         if (!(flags & NFSLCK_OPEN))
3548                 return (-1);
3549
3550         /*
3551          * No match, so chain the new one into the list.
3552          */
3553         LIST_INIT(&new_lfp->lf_open);
3554         LIST_INIT(&new_lfp->lf_lock);
3555         LIST_INIT(&new_lfp->lf_deleg);
3556         LIST_INIT(&new_lfp->lf_locallock);
3557         LIST_INIT(&new_lfp->lf_rollback);
3558         new_lfp->lf_locallock_lck.nfslock_usecnt = 0;
3559         new_lfp->lf_locallock_lck.nfslock_lock = 0;
3560         new_lfp->lf_usecount = 0;
3561         LIST_INSERT_HEAD(hp, new_lfp, lf_hash);
3562         *lfpp = new_lfp;
3563         *new_lfpp = NULL;
3564         return (0);
3565 }
3566
3567 /*
3568  * This function adds a nfslock lock structure to the list for the associated
3569  * nfsstate and nfslockfile structures. It will be inserted after the
3570  * entry pointed at by insert_lop.
3571  */
3572 static void
3573 nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop,
3574     struct nfsstate *stp, struct nfslockfile *lfp)
3575 {
3576         struct nfslock *lop, *nlop;
3577
3578         new_lop->lo_stp = stp;
3579         new_lop->lo_lfp = lfp;
3580
3581         if (stp != NULL) {
3582                 /* Insert in increasing lo_first order */
3583                 lop = LIST_FIRST(&lfp->lf_lock);
3584                 if (lop == LIST_END(&lfp->lf_lock) ||
3585                     new_lop->lo_first <= lop->lo_first) {
3586                         LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile);
3587                 } else {
3588                         nlop = LIST_NEXT(lop, lo_lckfile);
3589                         while (nlop != LIST_END(&lfp->lf_lock) &&
3590                                nlop->lo_first < new_lop->lo_first) {
3591                                 lop = nlop;
3592                                 nlop = LIST_NEXT(lop, lo_lckfile);
3593                         }
3594                         LIST_INSERT_AFTER(lop, new_lop, lo_lckfile);
3595                 }
3596         } else {
3597                 new_lop->lo_lckfile.le_prev = NULL;     /* list not used */
3598         }
3599
3600         /*
3601          * Insert after insert_lop, which is overloaded as stp or lfp for
3602          * an empty list.
3603          */
3604         if (stp == NULL && (struct nfslockfile *)insert_lop == lfp)
3605                 LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner);
3606         else if ((struct nfsstate *)insert_lop == stp)
3607                 LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner);
3608         else
3609                 LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner);
3610         if (stp != NULL) {
3611                 newnfsstats.srvlocks++;
3612                 nfsrv_openpluslock++;
3613         }
3614 }
3615
3616 /*
3617  * This function updates the locking for a lock owner and given file. It
3618  * maintains a list of lock ranges ordered on increasing file offset that
3619  * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style).
3620  * It always adds new_lop to the list and sometimes uses the one pointed
3621  * at by other_lopp.
3622  */
3623 static void
3624 nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
3625     struct nfslock **other_lopp, struct nfslockfile *lfp)
3626 {
3627         struct nfslock *new_lop = *new_lopp;
3628         struct nfslock *lop, *tlop, *ilop;
3629         struct nfslock *other_lop = *other_lopp;
3630         int unlock = 0, myfile = 0;
3631         u_int64_t tmp;
3632
3633         /*
3634          * Work down the list until the lock is merged.
3635          */
3636         if (new_lop->lo_flags & NFSLCK_UNLOCK)
3637                 unlock = 1;
3638         if (stp != NULL) {
3639                 ilop = (struct nfslock *)stp;
3640                 lop = LIST_FIRST(&stp->ls_lock);
3641         } else {
3642                 ilop = (struct nfslock *)lfp;
3643                 lop = LIST_FIRST(&lfp->lf_locallock);
3644         }
3645         while (lop != NULL) {
3646             /*
3647              * Only check locks for this file that aren't before the start of
3648              * new lock's range.
3649              */
3650             if (lop->lo_lfp == lfp) {
3651               myfile = 1;
3652               if (lop->lo_end >= new_lop->lo_first) {
3653                 if (new_lop->lo_end < lop->lo_first) {
3654                         /*
3655                          * If the new lock ends before the start of the
3656                          * current lock's range, no merge, just insert
3657                          * the new lock.
3658                          */
3659                         break;
3660                 }
3661                 if (new_lop->lo_flags == lop->lo_flags ||
3662                     (new_lop->lo_first <= lop->lo_first &&
3663                      new_lop->lo_end >= lop->lo_end)) {
3664                         /*
3665                          * This lock can be absorbed by the new lock/unlock.
3666                          * This happens when it covers the entire range
3667                          * of the old lock or is contiguous
3668                          * with the old lock and is of the same type or an
3669                          * unlock.
3670                          */
3671                         if (lop->lo_first < new_lop->lo_first)
3672                                 new_lop->lo_first = lop->lo_first;
3673                         if (lop->lo_end > new_lop->lo_end)
3674                                 new_lop->lo_end = lop->lo_end;
3675                         tlop = lop;
3676                         lop = LIST_NEXT(lop, lo_lckowner);
3677                         nfsrv_freenfslock(tlop);
3678                         continue;
3679                 }
3680
3681                 /*
3682                  * All these cases are for contiguous locks that are not the
3683                  * same type, so they can't be merged.
3684                  */
3685                 if (new_lop->lo_first <= lop->lo_first) {
3686                         /*
3687                          * This case is where the new lock overlaps with the
3688                          * first part of the old lock. Move the start of the
3689                          * old lock to just past the end of the new lock. The
3690                          * new lock will be inserted in front of the old, since
3691                          * ilop hasn't been updated. (We are done now.)
3692                          */
3693                         lop->lo_first = new_lop->lo_end;
3694                         break;
3695                 }
3696                 if (new_lop->lo_end >= lop->lo_end) {
3697                         /*
3698                          * This case is where the new lock overlaps with the
3699                          * end of the old lock's range. Move the old lock's
3700                          * end to just before the new lock's first and insert
3701                          * the new lock after the old lock.
3702                          * Might not be done yet, since the new lock could
3703                          * overlap further locks with higher ranges.
3704                          */
3705                         lop->lo_end = new_lop->lo_first;
3706                         ilop = lop;
3707                         lop = LIST_NEXT(lop, lo_lckowner);
3708                         continue;
3709                 }
3710                 /*
3711                  * The final case is where the new lock's range is in the
3712                  * middle of the current lock's and splits the current lock
3713                  * up. Use *other_lopp to handle the second part of the
3714                  * split old lock range. (We are done now.)
3715                  * For unlock, we use new_lop as other_lop and tmp, since
3716                  * other_lop and new_lop are the same for this case.
3717                  * We noted the unlock case above, so we don't need
3718                  * new_lop->lo_flags any longer.
3719                  */
3720                 tmp = new_lop->lo_first;
3721                 if (other_lop == NULL) {
3722                         if (!unlock)
3723                                 panic("nfsd srv update unlock");
3724                         other_lop = new_lop;
3725                         *new_lopp = NULL;
3726                 }
3727                 other_lop->lo_first = new_lop->lo_end;
3728                 other_lop->lo_end = lop->lo_end;
3729                 other_lop->lo_flags = lop->lo_flags;
3730                 other_lop->lo_stp = stp;
3731                 other_lop->lo_lfp = lfp;
3732                 lop->lo_end = tmp;
3733                 nfsrv_insertlock(other_lop, lop, stp, lfp);
3734                 *other_lopp = NULL;
3735                 ilop = lop;
3736                 break;
3737               }
3738             }
3739             ilop = lop;
3740             lop = LIST_NEXT(lop, lo_lckowner);
3741             if (myfile && (lop == NULL || lop->lo_lfp != lfp))
3742                 break;
3743         }
3744
3745         /*
3746          * Insert the new lock in the list at the appropriate place.
3747          */
3748         if (!unlock) {
3749                 nfsrv_insertlock(new_lop, ilop, stp, lfp);
3750                 *new_lopp = NULL;
3751         }
3752 }
3753
3754 /*
3755  * This function handles sequencing of locks, etc.
3756  * It returns an error that indicates what the caller should do.
3757  */
3758 static int
3759 nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
3760     struct nfsstate *stp, struct nfsrvcache *op)
3761 {
3762         int error = 0;
3763
3764         if ((nd->nd_flag & ND_NFSV41) != 0)
3765                 /* NFSv4.1 ignores the open_seqid and lock_seqid. */
3766                 goto out;
3767         if (op != nd->nd_rp)
3768                 panic("nfsrvstate checkseqid");
3769         if (!(op->rc_flag & RC_INPROG))
3770                 panic("nfsrvstate not inprog");
3771         if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) {
3772                 printf("refcnt=%d\n", stp->ls_op->rc_refcnt);
3773                 panic("nfsrvstate op refcnt");
3774         }
3775         if ((stp->ls_seq + 1) == seqid) {
3776                 if (stp->ls_op)
3777                         nfsrvd_derefcache(stp->ls_op);
3778                 stp->ls_op = op;
3779                 nfsrvd_refcache(op);
3780                 stp->ls_seq = seqid;
3781                 goto out;
3782         } else if (stp->ls_seq == seqid && stp->ls_op &&
3783                 op->rc_xid == stp->ls_op->rc_xid &&
3784                 op->rc_refcnt == 0 &&
3785                 op->rc_reqlen == stp->ls_op->rc_reqlen &&
3786                 op->rc_cksum == stp->ls_op->rc_cksum) {
3787                 if (stp->ls_op->rc_flag & RC_INPROG) {
3788                         error = NFSERR_DONTREPLY;
3789                         goto out;
3790                 }
3791                 nd->nd_rp = stp->ls_op;
3792                 nd->nd_rp->rc_flag |= RC_INPROG;
3793                 nfsrvd_delcache(op);
3794                 error = NFSERR_REPLYFROMCACHE;
3795                 goto out;
3796         }
3797         error = NFSERR_BADSEQID;
3798
3799 out:
3800         NFSEXITCODE2(error, nd);
3801         return (error);
3802 }
3803
3804 /*
3805  * Get the client ip address for callbacks. If the strings can't be parsed,
3806  * just set lc_program to 0 to indicate no callbacks are possible.
3807  * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set
3808  *  the address to the client's transport address. This won't be used
3809  *  for callbacks, but can be printed out by newnfsstats for info.)
3810  * Return error if the xdr can't be parsed, 0 otherwise.
3811  */
3812 APPLESTATIC int
3813 nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp)
3814 {
3815         u_int32_t *tl;
3816         u_char *cp, *cp2;
3817         int i, j;
3818         struct sockaddr_in *rad, *sad;
3819         u_char protocol[5], addr[24];
3820         int error = 0, cantparse = 0;
3821         union {
3822                 u_long ival;
3823                 u_char cval[4];
3824         } ip;
3825         union {
3826                 u_short sval;
3827                 u_char cval[2];
3828         } port;
3829
3830         rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *);
3831         rad->sin_family = AF_INET;
3832         rad->sin_len = sizeof (struct sockaddr_in);
3833         rad->sin_addr.s_addr = 0;
3834         rad->sin_port = 0;
3835         clp->lc_req.nr_client = NULL;
3836         clp->lc_req.nr_lock = 0;
3837         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3838         i = fxdr_unsigned(int, *tl);
3839         if (i >= 3 && i <= 4) {
3840                 error = nfsrv_mtostr(nd, protocol, i);
3841                 if (error)
3842                         goto nfsmout;
3843                 if (!strcmp(protocol, "tcp")) {
3844                         clp->lc_flags |= LCL_TCPCALLBACK;
3845                         clp->lc_req.nr_sotype = SOCK_STREAM;
3846                         clp->lc_req.nr_soproto = IPPROTO_TCP;
3847                 } else if (!strcmp(protocol, "udp")) {
3848                         clp->lc_req.nr_sotype = SOCK_DGRAM;
3849                         clp->lc_req.nr_soproto = IPPROTO_UDP;
3850                 } else {
3851                         cantparse = 1;
3852                 }
3853         } else {
3854                 cantparse = 1;
3855                 if (i > 0) {
3856                         error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
3857                         if (error)
3858                                 goto nfsmout;
3859                 }
3860         }
3861         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3862         i = fxdr_unsigned(int, *tl);
3863         if (i < 0) {
3864                 error = NFSERR_BADXDR;
3865                 goto nfsmout;
3866         } else if (i == 0) {
3867                 cantparse = 1;
3868         } else if (!cantparse && i <= 23 && i >= 11) {
3869                 error = nfsrv_mtostr(nd, addr, i);
3870                 if (error)
3871                         goto nfsmout;
3872
3873                 /*
3874                  * Parse out the address fields. We expect 6 decimal numbers
3875                  * separated by '.'s.
3876                  */
3877                 cp = addr;
3878                 i = 0;
3879                 while (*cp && i < 6) {
3880                         cp2 = cp;
3881                         while (*cp2 && *cp2 != '.')
3882                                 cp2++;
3883                         if (*cp2)
3884                                 *cp2++ = '\0';
3885                         else if (i != 5) {
3886                                 cantparse = 1;
3887                                 break;
3888                         }
3889                         j = nfsrv_getipnumber(cp);
3890                         if (j >= 0) {
3891                                 if (i < 4)
3892                                         ip.cval[3 - i] = j;
3893                                 else
3894                                         port.cval[5 - i] = j;
3895                         } else {
3896                                 cantparse = 1;
3897                                 break;
3898                         }
3899                         cp = cp2;
3900                         i++;
3901                 }
3902                 if (!cantparse) {
3903                         if (ip.ival != 0x0) {
3904                                 rad->sin_addr.s_addr = htonl(ip.ival);
3905                                 rad->sin_port = htons(port.sval);
3906                         } else {
3907                                 cantparse = 1;
3908                         }
3909                 }
3910         } else {
3911                 cantparse = 1;
3912                 if (i > 0) {
3913                         error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
3914                         if (error)
3915                                 goto nfsmout;
3916                 }
3917         }
3918         if (cantparse) {
3919                 sad = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
3920                 rad->sin_addr.s_addr = sad->sin_addr.s_addr;
3921                 rad->sin_port = 0x0;
3922                 clp->lc_program = 0;
3923         }
3924 nfsmout:
3925         NFSEXITCODE2(error, nd);
3926         return (error);
3927 }
3928
3929 /*
3930  * Turn a string of up to three decimal digits into a number. Return -1 upon
3931  * error.
3932  */
3933 static int
3934 nfsrv_getipnumber(u_char *cp)
3935 {
3936         int i = 0, j = 0;
3937
3938         while (*cp) {
3939                 if (j > 2 || *cp < '0' || *cp > '9')
3940                         return (-1);
3941                 i *= 10;
3942                 i += (*cp - '0');
3943                 cp++;
3944                 j++;
3945         }
3946         if (i < 256)
3947                 return (i);
3948         return (-1);
3949 }
3950
3951 /*
3952  * This function checks for restart conditions.
3953  */
3954 static int
3955 nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
3956     nfsv4stateid_t *stateidp, int specialid)
3957 {
3958         int ret = 0;
3959
3960         /*
3961          * First check for a server restart. Open, LockT, ReleaseLockOwner
3962          * and DelegPurge have a clientid, the rest a stateid.
3963          */
3964         if (flags &
3965             (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) {
3966                 if (clientid.lval[0] != nfsrvboottime) {
3967                         ret = NFSERR_STALECLIENTID;
3968                         goto out;
3969                 }
3970         } else if (stateidp->other[0] != nfsrvboottime &&
3971                 specialid == 0) {
3972                 ret = NFSERR_STALESTATEID;
3973                 goto out;
3974         }
3975
3976         /*
3977          * Read, Write, Setattr and LockT can return NFSERR_GRACE and do
3978          * not use a lock/open owner seqid#, so the check can be done now.
3979          * (The others will be checked, as required, later.)
3980          */
3981         if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST)))
3982                 goto out;
3983
3984         NFSLOCKSTATE();
3985         ret = nfsrv_checkgrace(NULL, NULL, flags);
3986         NFSUNLOCKSTATE();
3987
3988 out:
3989         NFSEXITCODE(ret);
3990         return (ret);
3991 }
3992
3993 /*
3994  * Check for grace.
3995  */
3996 static int
3997 nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
3998     u_int32_t flags)
3999 {
4000         int error = 0;
4001
4002         if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) {
4003                 if (flags & NFSLCK_RECLAIM) {
4004                         error = NFSERR_NOGRACE;
4005                         goto out;
4006                 }
4007         } else {
4008                 if (!(flags & NFSLCK_RECLAIM)) {
4009                         error = NFSERR_GRACE;
4010                         goto out;
4011                 }
4012                 if (nd != NULL && clp != NULL &&
4013                     (nd->nd_flag & ND_NFSV41) != 0 &&
4014                     (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) {
4015                         error = NFSERR_NOGRACE;
4016                         goto out;
4017                 }
4018
4019                 /*
4020                  * If grace is almost over and we are still getting Reclaims,
4021                  * extend grace a bit.
4022                  */
4023                 if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) >
4024                     nfsrv_stablefirst.nsf_eograce)
4025                         nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC +
4026                                 NFSRV_LEASEDELTA;
4027         }
4028
4029 out:
4030         NFSEXITCODE(error);
4031         return (error);
4032 }
4033
4034 /*
4035  * Do a server callback.
4036  */
4037 static int
4038 nfsrv_docallback(struct nfsclient *clp, int procnum,
4039     nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
4040     struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p)
4041 {
4042         mbuf_t m;
4043         u_int32_t *tl;
4044         struct nfsrv_descript nfsd, *nd = &nfsd;
4045         struct ucred *cred;
4046         int error = 0;
4047         u_int32_t callback;
4048         struct nfsdsession *sep = NULL;
4049
4050         cred = newnfs_getcred();
4051         NFSLOCKSTATE(); /* mostly for lc_cbref++ */
4052         if (clp->lc_flags & LCL_NEEDSCONFIRM) {
4053                 NFSUNLOCKSTATE();
4054                 panic("docallb");
4055         }
4056         clp->lc_cbref++;
4057
4058         /*
4059          * Fill the callback program# and version into the request
4060          * structure for newnfs_connect() to use.
4061          */
4062         clp->lc_req.nr_prog = clp->lc_program;
4063 #ifdef notnow
4064         if ((clp->lc_flags & LCL_NFSV41) != 0)
4065                 clp->lc_req.nr_vers = NFSV41_CBVERS;
4066         else
4067 #endif
4068                 clp->lc_req.nr_vers = NFSV4_CBVERS;
4069
4070         /*
4071          * First, fill in some of the fields of nd and cr.
4072          */
4073         nd->nd_flag = ND_NFSV4;
4074         if (clp->lc_flags & LCL_GSS)
4075                 nd->nd_flag |= ND_KERBV;
4076         if ((clp->lc_flags & LCL_NFSV41) != 0)
4077                 nd->nd_flag |= ND_NFSV41;
4078         nd->nd_repstat = 0;
4079         cred->cr_uid = clp->lc_uid;
4080         cred->cr_gid = clp->lc_gid;
4081         callback = clp->lc_callback;
4082         NFSUNLOCKSTATE();
4083         cred->cr_ngroups = 1;
4084
4085         /*
4086          * Get the first mbuf for the request.
4087          */
4088         MGET(m, M_WAITOK, MT_DATA);
4089         mbuf_setlen(m, 0);
4090         nd->nd_mreq = nd->nd_mb = m;
4091         nd->nd_bpos = NFSMTOD(m, caddr_t);
4092         
4093         /*
4094          * and build the callback request.
4095          */
4096         if (procnum == NFSV4OP_CBGETATTR) {
4097                 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4098                 error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR,
4099                     "CB Getattr", &sep);
4100                 if (error != 0) {
4101                         mbuf_freem(nd->nd_mreq);
4102                         goto errout;
4103                 }
4104                 (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4105                 (void)nfsrv_putattrbit(nd, attrbitp);
4106         } else if (procnum == NFSV4OP_CBRECALL) {
4107                 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4108                 error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL,
4109                     "CB Recall", &sep);
4110                 if (error != 0) {
4111                         mbuf_freem(nd->nd_mreq);
4112                         goto errout;
4113                 }
4114                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
4115                 *tl++ = txdr_unsigned(stateidp->seqid);
4116                 NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
4117                     NFSX_STATEIDOTHER);
4118                 tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
4119                 if (trunc)
4120                         *tl = newnfs_true;
4121                 else
4122                         *tl = newnfs_false;
4123                 (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4124         } else if (procnum == NFSV4PROC_CBNULL) {
4125                 nd->nd_procnum = NFSV4PROC_CBNULL;
4126                 if ((clp->lc_flags & LCL_NFSV41) != 0) {
4127                         error = nfsv4_getcbsession(clp, &sep);
4128                         if (error != 0) {
4129                                 mbuf_freem(nd->nd_mreq);
4130                                 goto errout;
4131                         }
4132                 }
4133         } else {
4134                 error = NFSERR_SERVERFAULT;
4135                 mbuf_freem(nd->nd_mreq);
4136                 goto errout;
4137         }
4138
4139         /*
4140          * Call newnfs_connect(), as required, and then newnfs_request().
4141          */
4142         (void) newnfs_sndlock(&clp->lc_req.nr_lock);
4143         if (clp->lc_req.nr_client == NULL) {
4144                 if ((clp->lc_flags & LCL_NFSV41) != 0)
4145                         error = ECONNREFUSED;
4146                 else if (nd->nd_procnum == NFSV4PROC_CBNULL)
4147                         error = newnfs_connect(NULL, &clp->lc_req, cred,
4148                             NULL, 1);
4149                 else
4150                         error = newnfs_connect(NULL, &clp->lc_req, cred,
4151                             NULL, 3);
4152         }
4153         newnfs_sndunlock(&clp->lc_req.nr_lock);
4154         if (!error) {
4155                 if ((nd->nd_flag & ND_NFSV41) != 0) {
4156                         KASSERT(sep != NULL, ("sep NULL"));
4157                         error = newnfs_request(nd, NULL, clp, &clp->lc_req,
4158                             NULL, NULL, cred, clp->lc_program,
4159                             clp->lc_req.nr_vers, NULL, 1, NULL,
4160                             &sep->sess_cbsess);
4161                         nfsrv_freesession(sep, NULL);
4162                 } else
4163                         error = newnfs_request(nd, NULL, clp, &clp->lc_req,
4164                             NULL, NULL, cred, clp->lc_program,
4165                             clp->lc_req.nr_vers, NULL, 1, NULL, NULL);
4166         }
4167 errout:
4168         NFSFREECRED(cred);
4169
4170         /*
4171          * If error is set here, the Callback path isn't working
4172          * properly, so twiddle the appropriate LCL_ flags.
4173          * (nd_repstat != 0 indicates the Callback path is working,
4174          *  but the callback failed on the client.)
4175          */
4176         if (error) {
4177                 /*
4178                  * Mark the callback pathway down, which disabled issuing
4179                  * of delegations and gets Renew to return NFSERR_CBPATHDOWN.
4180                  */
4181                 NFSLOCKSTATE();
4182                 clp->lc_flags |= LCL_CBDOWN;
4183                 NFSUNLOCKSTATE();
4184         } else {
4185                 /*
4186                  * Callback worked. If the callback path was down, disable
4187                  * callbacks, so no more delegations will be issued. (This
4188                  * is done on the assumption that the callback pathway is
4189                  * flakey.)
4190                  */
4191                 NFSLOCKSTATE();
4192                 if (clp->lc_flags & LCL_CBDOWN)
4193                         clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON);
4194                 NFSUNLOCKSTATE();
4195                 if (nd->nd_repstat)
4196                         error = nd->nd_repstat;
4197                 else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
4198                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4199                             NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
4200                             p, NULL);
4201                 mbuf_freem(nd->nd_mrep);
4202         }
4203         NFSLOCKSTATE();
4204         clp->lc_cbref--;
4205         if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
4206                 clp->lc_flags &= ~LCL_WAKEUPWANTED;
4207                 wakeup(clp);
4208         }
4209         NFSUNLOCKSTATE();
4210
4211         NFSEXITCODE(error);
4212         return (error);
4213 }
4214
4215 /*
4216  * Set up the compound RPC for the callback.
4217  */
4218 static int
4219 nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
4220     uint32_t callback, int op, const char *optag, struct nfsdsession **sepp)
4221 {
4222         uint32_t *tl;
4223         int error, len;
4224
4225         len = strlen(optag);
4226         (void)nfsm_strtom(nd, optag, len);
4227         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4228         if ((nd->nd_flag & ND_NFSV41) != 0) {
4229                 *tl++ = txdr_unsigned(NFSV41_MINORVERSION);
4230                 *tl++ = txdr_unsigned(callback);
4231                 *tl++ = txdr_unsigned(2);
4232                 *tl = txdr_unsigned(NFSV4OP_CBSEQUENCE);
4233                 error = nfsv4_setcbsequence(nd, clp, 1, sepp);
4234                 if (error != 0)
4235                         return (error);
4236                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4237                 *tl = txdr_unsigned(op);
4238         } else {
4239                 *tl++ = txdr_unsigned(NFSV4_MINORVERSION);
4240                 *tl++ = txdr_unsigned(callback);
4241                 *tl++ = txdr_unsigned(1);
4242                 *tl = txdr_unsigned(op);
4243         }
4244         return (0);
4245 }
4246
4247 /*
4248  * Return the next index# for a clientid. Mostly just increment and return
4249  * the next one, but... if the 32bit unsigned does actually wrap around,
4250  * it should be rebooted.
4251  * At an average rate of one new client per second, it will wrap around in
4252  * approximately 136 years. (I think the server will have been shut
4253  * down or rebooted before then.)
4254  */
4255 static u_int32_t
4256 nfsrv_nextclientindex(void)
4257 {
4258         static u_int32_t client_index = 0;
4259
4260         client_index++;
4261         if (client_index != 0)
4262                 return (client_index);
4263
4264         printf("%s: out of clientids\n", __func__);
4265         return (client_index);
4266 }
4267
4268 /*
4269  * Return the next index# for a stateid. Mostly just increment and return
4270  * the next one, but... if the 32bit unsigned does actually wrap around
4271  * (will a BSD server stay up that long?), find
4272  * new start and end values.
4273  */
4274 static u_int32_t
4275 nfsrv_nextstateindex(struct nfsclient *clp)
4276 {
4277         struct nfsstate *stp;
4278         int i;
4279         u_int32_t canuse, min_index, max_index;
4280
4281         if (!(clp->lc_flags & LCL_INDEXNOTOK)) {
4282                 clp->lc_stateindex++;
4283                 if (clp->lc_stateindex != clp->lc_statemaxindex)
4284                         return (clp->lc_stateindex);
4285         }
4286
4287         /*
4288          * Yuck, we've hit the end.
4289          * Look for a new min and max.
4290          */
4291         min_index = 0;
4292         max_index = 0xffffffff;
4293         for (i = 0; i < NFSSTATEHASHSIZE; i++) {
4294             LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4295                 if (stp->ls_stateid.other[2] > 0x80000000) {
4296                     if (stp->ls_stateid.other[2] < max_index)
4297                         max_index = stp->ls_stateid.other[2];
4298                 } else {
4299                     if (stp->ls_stateid.other[2] > min_index)
4300                         min_index = stp->ls_stateid.other[2];
4301                 }
4302             }
4303         }
4304
4305         /*
4306          * Yikes, highly unlikely, but I'll handle it anyhow.
4307          */
4308         if (min_index == 0x80000000 && max_index == 0x80000001) {
4309             canuse = 0;
4310             /*
4311              * Loop around until we find an unused entry. Return that
4312              * and set LCL_INDEXNOTOK, so the search will continue next time.
4313              * (This is one of those rare cases where a goto is the
4314              *  cleanest way to code the loop.)
4315              */
4316 tryagain:
4317             for (i = 0; i < NFSSTATEHASHSIZE; i++) {
4318                 LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4319                     if (stp->ls_stateid.other[2] == canuse) {
4320                         canuse++;
4321                         goto tryagain;
4322                     }
4323                 }
4324             }
4325             clp->lc_flags |= LCL_INDEXNOTOK;
4326             return (canuse);
4327         }
4328
4329         /*
4330          * Ok to start again from min + 1.
4331          */
4332         clp->lc_stateindex = min_index + 1;
4333         clp->lc_statemaxindex = max_index;
4334         clp->lc_flags &= ~LCL_INDEXNOTOK;
4335         return (clp->lc_stateindex);
4336 }
4337
4338 /*
4339  * The following functions handle the stable storage file that deals with
4340  * the edge conditions described in RFC3530 Sec. 8.6.3.
4341  * The file is as follows:
4342  * - a single record at the beginning that has the lease time of the
4343  *   previous server instance (before the last reboot) and the nfsrvboottime
4344  *   values for the previous server boots.
4345  *   These previous boot times are used to ensure that the current
4346  *   nfsrvboottime does not, somehow, get set to a previous one.
4347  *   (This is important so that Stale ClientIDs and StateIDs can
4348  *    be recognized.)
4349  *   The number of previous nfsvrboottime values preceeds the list.
4350  * - followed by some number of appended records with:
4351  *   - client id string
4352  *   - flag that indicates it is a record revoking state via lease
4353  *     expiration or similar
4354  *     OR has successfully acquired state.
4355  * These structures vary in length, with the client string at the end, up
4356  * to NFSV4_OPAQUELIMIT in size.
4357  *
4358  * At the end of the grace period, the file is truncated, the first
4359  * record is rewritten with updated information and any acquired state
4360  * records for successful reclaims of state are written.
4361  *
4362  * Subsequent records are appended when the first state is issued to
4363  * a client and when state is revoked for a client.
4364  *
4365  * When reading the file in, state issued records that come later in
4366  * the file override older ones, since the append log is in cronological order.
4367  * If, for some reason, the file can't be read, the grace period is
4368  * immediately terminated and all reclaims get NFSERR_NOGRACE.
4369  */
4370
4371 /*
4372  * Read in the stable storage file. Called by nfssvc() before the nfsd
4373  * processes start servicing requests.
4374  */
4375 APPLESTATIC void
4376 nfsrv_setupstable(NFSPROC_T *p)
4377 {
4378         struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4379         struct nfsrv_stable *sp, *nsp;
4380         struct nfst_rec *tsp;
4381         int error, i, tryagain;
4382         off_t off = 0;
4383         ssize_t aresid, len;
4384
4385         /*
4386          * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without
4387          * a reboot, so state has not been lost.
4388          */
4389         if (sf->nsf_flags & NFSNSF_UPDATEDONE)
4390                 return;
4391         /*
4392          * Set Grace over just until the file reads successfully.
4393          */
4394         nfsrvboottime = time_second;
4395         LIST_INIT(&sf->nsf_head);
4396         sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
4397         sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA;
4398         if (sf->nsf_fp == NULL)
4399                 return;
4400         error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4401             (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE,
4402             0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4403         if (error || aresid || sf->nsf_numboots == 0 ||
4404                 sf->nsf_numboots > NFSNSF_MAXNUMBOOTS)
4405                 return;
4406
4407         /*
4408          * Now, read in the boottimes.
4409          */
4410         sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) *
4411                 sizeof (time_t), M_TEMP, M_WAITOK);
4412         off = sizeof (struct nfsf_rec);
4413         error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4414             (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off,
4415             UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4416         if (error || aresid) {
4417                 free((caddr_t)sf->nsf_bootvals, M_TEMP);
4418                 sf->nsf_bootvals = NULL;
4419                 return;
4420         }
4421
4422         /*
4423          * Make sure this nfsrvboottime is different from all recorded
4424          * previous ones.
4425          */
4426         do {
4427                 tryagain = 0;
4428                 for (i = 0; i < sf->nsf_numboots; i++) {
4429                         if (nfsrvboottime == sf->nsf_bootvals[i]) {
4430                                 nfsrvboottime++;
4431                                 tryagain = 1;
4432                                 break;
4433                         }
4434                 }
4435         } while (tryagain);
4436
4437         sf->nsf_flags |= NFSNSF_OK;
4438         off += (sf->nsf_numboots * sizeof (time_t));
4439
4440         /*
4441          * Read through the file, building a list of records for grace
4442          * checking.
4443          * Each record is between sizeof (struct nfst_rec) and
4444          * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1
4445          * and is actually sizeof (struct nfst_rec) + nst_len - 1.
4446          */
4447         tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4448                 NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK);
4449         do {
4450             error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4451                 (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1,
4452                 off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4453             len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid;
4454             if (error || (len > 0 && (len < sizeof (struct nfst_rec) ||
4455                 len < (sizeof (struct nfst_rec) + tsp->len - 1)))) {
4456                 /*
4457                  * Yuck, the file has been corrupted, so just return
4458                  * after clearing out any restart state, so the grace period
4459                  * is over.
4460                  */
4461                 LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4462                         LIST_REMOVE(sp, nst_list);
4463                         free((caddr_t)sp, M_TEMP);
4464                 }
4465                 free((caddr_t)tsp, M_TEMP);
4466                 sf->nsf_flags &= ~NFSNSF_OK;
4467                 free((caddr_t)sf->nsf_bootvals, M_TEMP);
4468                 sf->nsf_bootvals = NULL;
4469                 return;
4470             }
4471             if (len > 0) {
4472                 off += sizeof (struct nfst_rec) + tsp->len - 1;
4473                 /*
4474                  * Search the list for a matching client.
4475                  */
4476                 LIST_FOREACH(sp, &sf->nsf_head, nst_list) {
4477                         if (tsp->len == sp->nst_len &&
4478                             !NFSBCMP(tsp->client, sp->nst_client, tsp->len))
4479                                 break;
4480                 }
4481                 if (sp == LIST_END(&sf->nsf_head)) {
4482                         sp = (struct nfsrv_stable *)malloc(tsp->len +
4483                                 sizeof (struct nfsrv_stable) - 1, M_TEMP,
4484                                 M_WAITOK);
4485                         NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec,
4486                                 sizeof (struct nfst_rec) + tsp->len - 1);
4487                         LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list);
4488                 } else {
4489                         if (tsp->flag == NFSNST_REVOKE)
4490                                 sp->nst_flag |= NFSNST_REVOKE;
4491                         else
4492                                 /*
4493                                  * A subsequent timestamp indicates the client
4494                                  * did a setclientid/confirm and any previous
4495                                  * revoke is no longer relevant.
4496                                  */
4497                                 sp->nst_flag &= ~NFSNST_REVOKE;
4498                 }
4499             }
4500         } while (len > 0);
4501         free((caddr_t)tsp, M_TEMP);
4502         sf->nsf_flags = NFSNSF_OK;
4503         sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease +
4504                 NFSRV_LEASEDELTA;
4505 }
4506
4507 /*
4508  * Update the stable storage file, now that the grace period is over.
4509  */
4510 APPLESTATIC void
4511 nfsrv_updatestable(NFSPROC_T *p)
4512 {
4513         struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4514         struct nfsrv_stable *sp, *nsp;
4515         int i;
4516         struct nfsvattr nva;
4517         vnode_t vp;
4518 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000)
4519         mount_t mp = NULL;
4520 #endif
4521         int error;
4522
4523         if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE))
4524                 return;
4525         sf->nsf_flags |= NFSNSF_UPDATEDONE;
4526         /*
4527          * Ok, we need to rewrite the stable storage file.
4528          * - truncate to 0 length
4529          * - write the new first structure
4530          * - loop through the data structures, writing out any that
4531          *   have timestamps older than the old boot
4532          */
4533         if (sf->nsf_bootvals) {
4534                 sf->nsf_numboots++;
4535                 for (i = sf->nsf_numboots - 2; i >= 0; i--)
4536                         sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i];
4537         } else {
4538                 sf->nsf_numboots = 1;
4539                 sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t),
4540                         M_TEMP, M_WAITOK);
4541         }
4542         sf->nsf_bootvals[0] = nfsrvboottime;
4543         sf->nsf_lease = nfsrv_lease;
4544         NFSVNO_ATTRINIT(&nva);
4545         NFSVNO_SETATTRVAL(&nva, size, 0);
4546         vp = NFSFPVNODE(sf->nsf_fp);
4547         vn_start_write(vp, &mp, V_WAIT);
4548         if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
4549                 error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p,
4550                     NULL);
4551                 NFSVOPUNLOCK(vp, 0);
4552         } else
4553                 error = EPERM;
4554         vn_finished_write(mp);
4555         if (!error)
4556             error = NFSD_RDWR(UIO_WRITE, vp,
4557                 (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0,
4558                 UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4559         if (!error)
4560             error = NFSD_RDWR(UIO_WRITE, vp,
4561                 (caddr_t)sf->nsf_bootvals,
4562                 sf->nsf_numboots * sizeof (time_t),
4563                 (off_t)(sizeof (struct nfsf_rec)),
4564                 UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4565         free((caddr_t)sf->nsf_bootvals, M_TEMP);
4566         sf->nsf_bootvals = NULL;
4567         if (error) {
4568                 sf->nsf_flags &= ~NFSNSF_OK;
4569                 printf("EEK! Can't write NfsV4 stable storage file\n");
4570                 return;
4571         }
4572         sf->nsf_flags |= NFSNSF_OK;
4573
4574         /*
4575          * Loop through the list and write out timestamp records for
4576          * any clients that successfully reclaimed state.
4577          */
4578         LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4579                 if (sp->nst_flag & NFSNST_GOTSTATE) {
4580                         nfsrv_writestable(sp->nst_client, sp->nst_len,
4581                                 NFSNST_NEWSTATE, p);
4582                         sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE;
4583                 }
4584                 LIST_REMOVE(sp, nst_list);
4585                 free((caddr_t)sp, M_TEMP);
4586         }
4587         nfsrv_backupstable();
4588 }
4589
4590 /*
4591  * Append a record to the stable storage file.
4592  */
4593 APPLESTATIC void
4594 nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p)
4595 {
4596         struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4597         struct nfst_rec *sp;
4598         int error;
4599
4600         if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL)
4601                 return;
4602         sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4603                 len - 1, M_TEMP, M_WAITOK);
4604         sp->len = len;
4605         NFSBCOPY(client, sp->client, len);
4606         sp->flag = flag;
4607         error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp),
4608             (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0,
4609             UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p);
4610         free((caddr_t)sp, M_TEMP);
4611         if (error) {
4612                 sf->nsf_flags &= ~NFSNSF_OK;
4613                 printf("EEK! Can't write NfsV4 stable storage file\n");
4614         }
4615 }
4616
4617 /*
4618  * This function is called during the grace period to mark a client
4619  * that successfully reclaimed state.
4620  */
4621 static void
4622 nfsrv_markstable(struct nfsclient *clp)
4623 {
4624         struct nfsrv_stable *sp;
4625
4626         /*
4627          * First find the client structure.
4628          */
4629         LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
4630                 if (sp->nst_len == clp->lc_idlen &&
4631                     !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4632                         break;
4633         }
4634         if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
4635                 return;
4636
4637         /*
4638          * Now, just mark it and set the nfsclient back pointer.
4639          */
4640         sp->nst_flag |= NFSNST_GOTSTATE;
4641         sp->nst_clp = clp;
4642 }
4643
4644 /*
4645  * This function is called for a reclaim, to see if it gets grace.
4646  * It returns 0 if a reclaim is allowed, 1 otherwise.
4647  */
4648 static int
4649 nfsrv_checkstable(struct nfsclient *clp)
4650 {
4651         struct nfsrv_stable *sp;
4652
4653         /*
4654          * First, find the entry for the client.
4655          */
4656         LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
4657                 if (sp->nst_len == clp->lc_idlen &&
4658                     !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4659                         break;
4660         }
4661
4662         /*
4663          * If not in the list, state was revoked or no state was issued
4664          * since the previous reboot, a reclaim is denied.
4665          */
4666         if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) ||
4667             (sp->nst_flag & NFSNST_REVOKE) ||
4668             !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK))
4669                 return (1);
4670         return (0);
4671 }
4672
4673 /*
4674  * Test for and try to clear out a conflicting client. This is called by
4675  * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients
4676  * a found.
4677  * The trick here is that it can't revoke a conflicting client with an
4678  * expired lease unless it holds the v4root lock, so...
4679  * If no v4root lock, get the lock and return 1 to indicate "try again".
4680  * Return 0 to indicate the conflict can't be revoked and 1 to indicate
4681  * the revocation worked and the conflicting client is "bye, bye", so it
4682  * can be tried again.
4683  * Return 2 to indicate that the vnode is VI_DOOMED after NFSVOPLOCK().
4684  * Unlocks State before a non-zero value is returned.
4685  */
4686 static int
4687 nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
4688     NFSPROC_T *p)
4689 {
4690         int gotlock, lktype = 0;
4691
4692         /*
4693          * If lease hasn't expired, we can't fix it.
4694          */
4695         if (clp->lc_expiry >= NFSD_MONOSEC ||
4696             !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE))
4697                 return (0);
4698         if (*haslockp == 0) {
4699                 NFSUNLOCKSTATE();
4700                 if (vp != NULL) {
4701                         lktype = NFSVOPISLOCKED(vp);
4702                         NFSVOPUNLOCK(vp, 0);
4703                 }
4704                 NFSLOCKV4ROOTMUTEX();
4705                 nfsv4_relref(&nfsv4rootfs_lock);
4706                 do {
4707                         gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
4708                             NFSV4ROOTLOCKMUTEXPTR, NULL);
4709                 } while (!gotlock);
4710                 NFSUNLOCKV4ROOTMUTEX();
4711                 *haslockp = 1;
4712                 if (vp != NULL) {
4713                         NFSVOPLOCK(vp, lktype | LK_RETRY);
4714                         if ((vp->v_iflag & VI_DOOMED) != 0)
4715                                 return (2);
4716                 }
4717                 return (1);
4718         }
4719         NFSUNLOCKSTATE();
4720
4721         /*
4722          * Ok, we can expire the conflicting client.
4723          */
4724         nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
4725         nfsrv_backupstable();
4726         nfsrv_cleanclient(clp, p);
4727         nfsrv_freedeleglist(&clp->lc_deleg);
4728         nfsrv_freedeleglist(&clp->lc_olddeleg);
4729         LIST_REMOVE(clp, lc_hash);
4730         nfsrv_zapclient(clp, p);
4731         return (1);
4732 }
4733
4734 /*
4735  * Resolve a delegation conflict.
4736  * Returns 0 to indicate the conflict was resolved without sleeping.
4737  * Return -1 to indicate that the caller should check for conflicts again.
4738  * Return > 0 for an error that should be returned, normally NFSERR_DELAY.
4739  *
4740  * Also, manipulate the nfsv4root_lock, as required. It isn't changed
4741  * for a return of 0, since there was no sleep and it could be required
4742  * later. It is released for a return of NFSERR_DELAY, since the caller
4743  * will return that error. It is released when a sleep was done waiting
4744  * for the delegation to be returned or expire (so that other nfsds can
4745  * handle ops). Then, it must be acquired for the write to stable storage.
4746  * (This function is somewhat similar to nfsrv_clientconflict(), but
4747  *  the semantics differ in a couple of subtle ways. The return of 0
4748  *  indicates the conflict was resolved without sleeping here, not
4749  *  that the conflict can't be resolved and the handling of nfsv4root_lock
4750  *  differs, as noted above.)
4751  * Unlocks State before returning a non-zero value.
4752  */
4753 static int
4754 nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
4755     vnode_t vp)
4756 {
4757         struct nfsclient *clp = stp->ls_clp;
4758         int gotlock, error, lktype = 0, retrycnt, zapped_clp;
4759         nfsv4stateid_t tstateid;
4760         fhandle_t tfh;
4761
4762         /*
4763          * If the conflict is with an old delegation...
4764          */
4765         if (stp->ls_flags & NFSLCK_OLDDELEG) {
4766                 /*
4767                  * You can delete it, if it has expired.
4768                  */
4769                 if (clp->lc_delegtime < NFSD_MONOSEC) {
4770                         nfsrv_freedeleg(stp);
4771                         NFSUNLOCKSTATE();
4772                         error = -1;
4773                         goto out;
4774                 }
4775                 NFSUNLOCKSTATE();
4776                 /*
4777                  * During this delay, the old delegation could expire or it
4778                  * could be recovered by the client via an Open with
4779                  * CLAIM_DELEGATE_PREV.
4780                  * Release the nfsv4root_lock, if held.
4781                  */
4782                 if (*haslockp) {
4783                         *haslockp = 0;
4784                         NFSLOCKV4ROOTMUTEX();
4785                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
4786                         NFSUNLOCKV4ROOTMUTEX();
4787                 }
4788                 error = NFSERR_DELAY;
4789                 goto out;
4790         }
4791
4792         /*
4793          * It's a current delegation, so:
4794          * - check to see if the delegation has expired
4795          *   - if so, get the v4root lock and then expire it
4796          */
4797         if (!(stp->ls_flags & NFSLCK_DELEGRECALL)) {
4798                 /*
4799                  * - do a recall callback, since not yet done
4800                  * For now, never allow truncate to be set. To use
4801                  * truncate safely, it must be guaranteed that the
4802                  * Remove, Rename or Setattr with size of 0 will
4803                  * succeed and that would require major changes to
4804                  * the VFS/Vnode OPs.
4805                  * Set the expiry time large enough so that it won't expire
4806                  * until after the callback, then set it correctly, once
4807                  * the callback is done. (The delegation will now time
4808                  * out whether or not the Recall worked ok. The timeout
4809                  * will be extended when ops are done on the delegation
4810                  * stateid, up to the timelimit.)
4811                  */
4812                 stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) +
4813                     NFSRV_LEASEDELTA;
4814                 stp->ls_delegtimelimit = NFSD_MONOSEC + (6 * nfsrv_lease) +
4815                     NFSRV_LEASEDELTA;
4816                 stp->ls_flags |= NFSLCK_DELEGRECALL;
4817
4818                 /*
4819                  * Loop NFSRV_CBRETRYCNT times while the CBRecall replies
4820                  * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done
4821                  * in order to try and avoid a race that could happen
4822                  * when a CBRecall request passed the Open reply with
4823                  * the delegation in it when transitting the network.
4824                  * Since nfsrv_docallback will sleep, don't use stp after
4825                  * the call.
4826                  */
4827                 NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid,
4828                     sizeof (tstateid));
4829                 NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh,
4830                     sizeof (tfh));
4831                 NFSUNLOCKSTATE();
4832                 if (*haslockp) {
4833                         *haslockp = 0;
4834                         NFSLOCKV4ROOTMUTEX();
4835                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
4836                         NFSUNLOCKV4ROOTMUTEX();
4837                 }
4838                 retrycnt = 0;
4839                 do {
4840                     error = nfsrv_docallback(clp, NFSV4OP_CBRECALL,
4841                         &tstateid, 0, &tfh, NULL, NULL, p);
4842                     retrycnt++;
4843                 } while ((error == NFSERR_BADSTATEID ||
4844                     error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT);
4845                 error = NFSERR_DELAY;
4846                 goto out;
4847         }
4848
4849         if (clp->lc_expiry >= NFSD_MONOSEC &&
4850             stp->ls_delegtime >= NFSD_MONOSEC) {
4851                 NFSUNLOCKSTATE();
4852                 /*
4853                  * A recall has been done, but it has not yet expired.
4854                  * So, RETURN_DELAY.
4855                  */
4856                 if (*haslockp) {
4857                         *haslockp = 0;
4858                         NFSLOCKV4ROOTMUTEX();
4859                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
4860                         NFSUNLOCKV4ROOTMUTEX();
4861                 }
4862                 error = NFSERR_DELAY;
4863                 goto out;
4864         }
4865
4866         /*
4867          * If we don't yet have the lock, just get it and then return,
4868          * since we need that before deleting expired state, such as
4869          * this delegation.
4870          * When getting the lock, unlock the vnode, so other nfsds that
4871          * are in progress, won't get stuck waiting for the vnode lock.
4872          */
4873         if (*haslockp == 0) {
4874                 NFSUNLOCKSTATE();
4875                 if (vp != NULL) {
4876                         lktype = NFSVOPISLOCKED(vp);
4877                         NFSVOPUNLOCK(vp, 0);
4878                 }
4879                 NFSLOCKV4ROOTMUTEX();
4880                 nfsv4_relref(&nfsv4rootfs_lock);
4881                 do {
4882                         gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
4883                             NFSV4ROOTLOCKMUTEXPTR, NULL);
4884                 } while (!gotlock);
4885                 NFSUNLOCKV4ROOTMUTEX();
4886                 *haslockp = 1;
4887                 if (vp != NULL) {
4888                         NFSVOPLOCK(vp, lktype | LK_RETRY);
4889                         if ((vp->v_iflag & VI_DOOMED) != 0) {
4890                                 *haslockp = 0;
4891                                 NFSLOCKV4ROOTMUTEX();
4892                                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
4893                                 NFSUNLOCKV4ROOTMUTEX();
4894                                 error = NFSERR_PERM;
4895                                 goto out;
4896                         }
4897                 }
4898                 error = -1;
4899                 goto out;
4900         }
4901
4902         NFSUNLOCKSTATE();
4903         /*
4904          * Ok, we can delete the expired delegation.
4905          * First, write the Revoke record to stable storage and then
4906          * clear out the conflict.
4907          * Since all other nfsd threads are now blocked, we can safely
4908          * sleep without the state changing.
4909          */
4910         nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
4911         nfsrv_backupstable();
4912         if (clp->lc_expiry < NFSD_MONOSEC) {
4913                 nfsrv_cleanclient(clp, p);
4914                 nfsrv_freedeleglist(&clp->lc_deleg);
4915                 nfsrv_freedeleglist(&clp->lc_olddeleg);
4916                 LIST_REMOVE(clp, lc_hash);
4917                 zapped_clp = 1;
4918         } else {
4919                 nfsrv_freedeleg(stp);
4920                 zapped_clp = 0;
4921         }
4922         if (zapped_clp)
4923                 nfsrv_zapclient(clp, p);
4924         error = -1;
4925
4926 out:
4927         NFSEXITCODE(error);
4928         return (error);
4929 }
4930
4931 /*
4932  * Check for a remove allowed, if remove is set to 1 and get rid of
4933  * delegations.
4934  */
4935 APPLESTATIC int
4936 nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p)
4937 {
4938         struct nfsstate *stp;
4939         struct nfslockfile *lfp;
4940         int error, haslock = 0;
4941         fhandle_t nfh;
4942
4943         /*
4944          * First, get the lock file structure.
4945          * (A return of -1 means no associated state, so remove ok.)
4946          */
4947         error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
4948 tryagain:
4949         NFSLOCKSTATE();
4950         if (!error)
4951                 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
4952         if (error) {
4953                 NFSUNLOCKSTATE();
4954                 if (haslock) {
4955                         NFSLOCKV4ROOTMUTEX();
4956                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
4957                         NFSUNLOCKV4ROOTMUTEX();
4958                 }
4959                 if (error == -1)
4960                         error = 0;
4961                 goto out;
4962         }
4963
4964         /*
4965          * Now, we must Recall any delegations.
4966          */
4967         error = nfsrv_cleandeleg(vp, lfp, NULL, &haslock, p);
4968         if (error) {
4969                 /*
4970                  * nfsrv_cleandeleg() unlocks state for non-zero
4971                  * return.
4972                  */
4973                 if (error == -1)
4974                         goto tryagain;
4975                 if (haslock) {
4976                         NFSLOCKV4ROOTMUTEX();
4977                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
4978                         NFSUNLOCKV4ROOTMUTEX();
4979                 }
4980                 goto out;
4981         }
4982
4983         /*
4984          * Now, look for a conflicting open share.
4985          */
4986         if (remove) {
4987                 LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
4988                         if (stp->ls_flags & NFSLCK_WRITEDENY) {
4989                                 error = NFSERR_FILEOPEN;
4990                                 break;
4991                         }
4992                 }
4993         }
4994
4995         NFSUNLOCKSTATE();
4996         if (haslock) {
4997                 NFSLOCKV4ROOTMUTEX();
4998                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
4999                 NFSUNLOCKV4ROOTMUTEX();
5000         }
5001
5002 out:
5003         NFSEXITCODE(error);
5004         return (error);
5005 }
5006
5007 /*
5008  * Clear out all delegations for the file referred to by lfp.
5009  * May return NFSERR_DELAY, if there will be a delay waiting for
5010  * delegations to expire.
5011  * Returns -1 to indicate it slept while recalling a delegation.
5012  * This function has the side effect of deleting the nfslockfile structure,
5013  * if it no longer has associated state and didn't have to sleep.
5014  * Unlocks State before a non-zero value is returned.
5015  */
5016 static int
5017 nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
5018     struct nfsclient *clp, int *haslockp, NFSPROC_T *p)
5019 {
5020         struct nfsstate *stp, *nstp;
5021         int ret = 0;
5022
5023         stp = LIST_FIRST(&lfp->lf_deleg);
5024         while (stp != LIST_END(&lfp->lf_deleg)) {
5025                 nstp = LIST_NEXT(stp, ls_file);
5026                 if (stp->ls_clp != clp) {
5027                         ret = nfsrv_delegconflict(stp, haslockp, p, vp);
5028                         if (ret) {
5029                                 /*
5030                                  * nfsrv_delegconflict() unlocks state
5031                                  * when it returns non-zero.
5032                                  */
5033                                 goto out;
5034                         }
5035                 }
5036                 stp = nstp;
5037         }
5038 out:
5039         NFSEXITCODE(ret);
5040         return (ret);
5041 }
5042
5043 /*
5044  * There are certain operations that, when being done outside of NFSv4,
5045  * require that any NFSv4 delegation for the file be recalled.
5046  * This function is to be called for those cases:
5047  * VOP_RENAME() - When a delegation is being recalled for any reason,
5048  *      the client may have to do Opens against the server, using the file's
5049  *      final component name. If the file has been renamed on the server,
5050  *      that component name will be incorrect and the Open will fail.
5051  * VOP_REMOVE() - Theoretically, a client could Open a file after it has
5052  *      been removed on the server, if there is a delegation issued to
5053  *      that client for the file. I say "theoretically" since clients
5054  *      normally do an Access Op before the Open and that Access Op will
5055  *      fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so
5056  *      they will detect the file's removal in the same manner. (There is
5057  *      one case where RFC3530 allows a client to do an Open without first
5058  *      doing an Access Op, which is passage of a check against the ACE
5059  *      returned with a Write delegation, but current practice is to ignore
5060  *      the ACE and always do an Access Op.)
5061  *      Since the functions can only be called with an unlocked vnode, this
5062  *      can't be done at this time.
5063  * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range
5064  *      locks locally in the client, which are not visible to the server. To
5065  *      deal with this, issuing of delegations for a vnode must be disabled
5066  *      and all delegations for the vnode recalled. This is done via the
5067  *      second function, using the VV_DISABLEDELEG vflag on the vnode.
5068  */
5069 APPLESTATIC void
5070 nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p)
5071 {
5072         time_t starttime;
5073         int error;
5074
5075         /*
5076          * First, check to see if the server is currently running and it has
5077          * been called for a regular file when issuing delegations.
5078          */
5079         if (newnfs_numnfsd == 0 || vp->v_type != VREG ||
5080             nfsrv_issuedelegs == 0)
5081                 return;
5082
5083         KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp));
5084         /*
5085          * First, get a reference on the nfsv4rootfs_lock so that an
5086          * exclusive lock cannot be acquired by another thread.
5087          */
5088         NFSLOCKV4ROOTMUTEX();
5089         nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
5090         NFSUNLOCKV4ROOTMUTEX();
5091
5092         /*
5093          * Now, call nfsrv_checkremove() in a loop while it returns
5094          * NFSERR_DELAY. Return upon any other error or when timed out.
5095          */
5096         starttime = NFSD_MONOSEC;
5097         do {
5098                 if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
5099                         error = nfsrv_checkremove(vp, 0, p);
5100                         NFSVOPUNLOCK(vp, 0);
5101                 } else
5102                         error = EPERM;
5103                 if (error == NFSERR_DELAY) {
5104                         if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO)
5105                                 break;
5106                         /* Sleep for a short period of time */
5107                         (void) nfs_catnap(PZERO, 0, "nfsremove");
5108                 }
5109         } while (error == NFSERR_DELAY);
5110         NFSLOCKV4ROOTMUTEX();
5111         nfsv4_relref(&nfsv4rootfs_lock);
5112         NFSUNLOCKV4ROOTMUTEX();
5113 }
5114
5115 APPLESTATIC void
5116 nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p)
5117 {
5118
5119 #ifdef VV_DISABLEDELEG
5120         /*
5121          * First, flag issuance of delegations disabled.
5122          */
5123         atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG);
5124 #endif
5125
5126         /*
5127          * Then call nfsd_recalldelegation() to get rid of all extant
5128          * delegations.
5129          */
5130         nfsd_recalldelegation(vp, p);
5131 }
5132
5133 /*
5134  * Check for conflicting locks, etc. and then get rid of delegations.
5135  * (At one point I thought that I should get rid of delegations for any
5136  *  Setattr, since it could potentially disallow the I/O op (read or write)
5137  *  allowed by the delegation. However, Setattr Ops that aren't changing
5138  *  the size get a stateid of all 0s, so you can't tell if it is a delegation
5139  *  for the same client or a different one, so I decided to only get rid
5140  *  of delegations for other clients when the size is being changed.)
5141  * In general, a Setattr can disable NFS I/O Ops that are outstanding, such
5142  * as Write backs, even if there is no delegation, so it really isn't any
5143  * different?)
5144  */
5145 APPLESTATIC int
5146 nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd,
5147     nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp,
5148     struct nfsexstuff *exp, NFSPROC_T *p)
5149 {
5150         struct nfsstate st, *stp = &st;
5151         struct nfslock lo, *lop = &lo;
5152         int error = 0;
5153         nfsquad_t clientid;
5154
5155         if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) {
5156                 stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
5157                 lop->lo_first = nvap->na_size;
5158         } else {
5159                 stp->ls_flags = 0;
5160                 lop->lo_first = 0;
5161         }
5162         if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) ||
5163             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) ||
5164             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) ||
5165             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL))
5166                 stp->ls_flags |= NFSLCK_SETATTR;
5167         if (stp->ls_flags == 0)
5168                 goto out;
5169         lop->lo_end = NFS64BITSSET;
5170         lop->lo_flags = NFSLCK_WRITE;
5171         stp->ls_ownerlen = 0;
5172         stp->ls_op = NULL;
5173         stp->ls_uid = nd->nd_cred->cr_uid;
5174         stp->ls_stateid.seqid = stateidp->seqid;
5175         clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0];
5176         clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1];
5177         stp->ls_stateid.other[2] = stateidp->other[2];
5178         error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
5179             stateidp, exp, nd, p);
5180
5181 out:
5182         NFSEXITCODE2(error, nd);
5183         return (error);
5184 }
5185
5186 /*
5187  * Check for a write delegation and do a CBGETATTR if there is one, updating
5188  * the attributes, as required.
5189  * Should I return an error if I can't get the attributes? (For now, I'll
5190  * just return ok.
5191  */
5192 APPLESTATIC int
5193 nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp,
5194     struct nfsvattr *nvap, nfsattrbit_t *attrbitp, struct ucred *cred,
5195     NFSPROC_T *p)
5196 {
5197         struct nfsstate *stp;
5198         struct nfslockfile *lfp;
5199         struct nfsclient *clp;
5200         struct nfsvattr nva;
5201         fhandle_t nfh;
5202         int error = 0;
5203         nfsattrbit_t cbbits;
5204         u_quad_t delegfilerev;
5205
5206         NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
5207         if (!NFSNONZERO_ATTRBIT(&cbbits))
5208                 goto out;
5209
5210         /*
5211          * Get the lock file structure.
5212          * (A return of -1 means no associated state, so return ok.)
5213          */
5214         error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
5215         NFSLOCKSTATE();
5216         if (!error)
5217                 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5218         if (error) {
5219                 NFSUNLOCKSTATE();
5220                 if (error == -1)
5221                         error = 0;
5222                 goto out;
5223         }
5224
5225         /*
5226          * Now, look for a write delegation.
5227          */
5228         LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
5229                 if (stp->ls_flags & NFSLCK_DELEGWRITE)
5230                         break;
5231         }
5232         if (stp == LIST_END(&lfp->lf_deleg)) {
5233                 NFSUNLOCKSTATE();
5234                 goto out;
5235         }
5236         clp = stp->ls_clp;
5237         delegfilerev = stp->ls_filerev;
5238
5239         /*
5240          * If the Write delegation was issued as a part of this Compound RPC
5241          * or if we have an Implied Clientid (used in a previous Op in this
5242          * compound) and it is the client the delegation was issued to,
5243          * just return ok.
5244          * I also assume that it is from the same client iff the network
5245          * host IP address is the same as the callback address. (Not
5246          * exactly correct by the RFC, but avoids a lot of Getattr
5247          * callbacks.)
5248          */
5249         if (nd->nd_compref == stp->ls_compref ||
5250             ((nd->nd_flag & ND_IMPLIEDCLID) &&
5251              clp->lc_clientid.qval == nd->nd_clientid.qval) ||
5252              nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) {
5253                 NFSUNLOCKSTATE();
5254                 goto out;
5255         }
5256
5257         /*
5258          * We are now done with the delegation state structure,
5259          * so the statelock can be released and we can now tsleep().
5260          */
5261
5262         /*
5263          * Now, we must do the CB Getattr callback, to see if Change or Size
5264          * has changed.
5265          */
5266         if (clp->lc_expiry >= NFSD_MONOSEC) {
5267                 NFSUNLOCKSTATE();
5268                 NFSVNO_ATTRINIT(&nva);
5269                 nva.na_filerev = NFS64BITSSET;
5270                 error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL,
5271                     0, &nfh, &nva, &cbbits, p);
5272                 if (!error) {
5273                         if ((nva.na_filerev != NFS64BITSSET &&
5274                             nva.na_filerev > delegfilerev) ||
5275                             (NFSVNO_ISSETSIZE(&nva) &&
5276                              nva.na_size != nvap->na_size)) {
5277                                 error = nfsvno_updfilerev(vp, nvap, cred, p);
5278                                 if (NFSVNO_ISSETSIZE(&nva))
5279                                         nvap->na_size = nva.na_size;
5280                         }
5281                 } else
5282                         error = 0;      /* Ignore callback errors for now. */
5283         } else {
5284                 NFSUNLOCKSTATE();
5285         }
5286
5287 out:
5288         NFSEXITCODE2(error, nd);
5289         return (error);
5290 }
5291
5292 /*
5293  * This function looks for openowners that haven't had any opens for
5294  * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS
5295  * is set.
5296  */
5297 APPLESTATIC void
5298 nfsrv_throwawayopens(NFSPROC_T *p)
5299 {
5300         struct nfsclient *clp, *nclp;
5301         struct nfsstate *stp, *nstp;
5302         int i;
5303
5304         NFSLOCKSTATE();
5305         nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS;
5306         /*
5307          * For each client...
5308          */
5309         for (i = 0; i < NFSCLIENTHASHSIZE; i++) {
5310             LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
5311                 LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
5312                         if (LIST_EMPTY(&stp->ls_open) &&
5313                             (stp->ls_noopens > NFSNOOPEN ||
5314                              (nfsrv_openpluslock * 2) >
5315                              NFSRV_V4STATELIMIT))
5316                                 nfsrv_freeopenowner(stp, 0, p);
5317                 }
5318             }
5319         }
5320         NFSUNLOCKSTATE();
5321 }
5322
5323 /*
5324  * This function checks to see if the credentials are the same.
5325  * Returns 1 for not same, 0 otherwise.
5326  */
5327 static int
5328 nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp)
5329 {
5330
5331         if (nd->nd_flag & ND_GSS) {
5332                 if (!(clp->lc_flags & LCL_GSS))
5333                         return (1);
5334                 if (clp->lc_flags & LCL_NAME) {
5335                         if (nd->nd_princlen != clp->lc_namelen ||
5336                             NFSBCMP(nd->nd_principal, clp->lc_name,
5337                                 clp->lc_namelen))
5338                                 return (1);
5339                         else
5340                                 return (0);
5341                 }
5342                 if (nd->nd_cred->cr_uid == clp->lc_uid)
5343                         return (0);
5344                 else
5345                         return (1);
5346         } else if (clp->lc_flags & LCL_GSS)
5347                 return (1);
5348         /*
5349          * For AUTH_SYS, allow the same uid or root. (This is underspecified
5350          * in RFC3530, which talks about principals, but doesn't say anything
5351          * about uids for AUTH_SYS.)
5352          */
5353         if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0)
5354                 return (0);
5355         else
5356                 return (1);
5357 }
5358
5359 /*
5360  * Calculate the lease expiry time.
5361  */
5362 static time_t
5363 nfsrv_leaseexpiry(void)
5364 {
5365
5366         if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC)
5367                 return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA));
5368         return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA);
5369 }
5370
5371 /*
5372  * Delay the delegation timeout as far as ls_delegtimelimit, as required.
5373  */
5374 static void
5375 nfsrv_delaydelegtimeout(struct nfsstate *stp)
5376 {
5377
5378         if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0)
5379                 return;
5380
5381         if ((stp->ls_delegtime + 15) > NFSD_MONOSEC &&
5382             stp->ls_delegtime < stp->ls_delegtimelimit) {
5383                 stp->ls_delegtime += nfsrv_lease;
5384                 if (stp->ls_delegtime > stp->ls_delegtimelimit)
5385                         stp->ls_delegtime = stp->ls_delegtimelimit;
5386         }
5387 }
5388
5389 /*
5390  * This function checks to see if there is any other state associated
5391  * with the openowner for this Open.
5392  * It returns 1 if there is no other state, 0 otherwise.
5393  */
5394 static int
5395 nfsrv_nootherstate(struct nfsstate *stp)
5396 {
5397         struct nfsstate *tstp;
5398
5399         LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) {
5400                 if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock))
5401                         return (0);
5402         }
5403         return (1);
5404 }
5405
5406 /*
5407  * Create a list of lock deltas (changes to local byte range locking
5408  * that can be rolled back using the list) and apply the changes via
5409  * nfsvno_advlock(). Optionally, lock the list. It is expected that either
5410  * the rollback or update function will be called after this.
5411  * It returns an error (and rolls back, as required), if any nfsvno_advlock()
5412  * call fails. If it returns an error, it will unlock the list.
5413  */
5414 static int
5415 nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
5416     uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5417 {
5418         struct nfslock *lop, *nlop;
5419         int error = 0;
5420
5421         /* Loop through the list of locks. */
5422         lop = LIST_FIRST(&lfp->lf_locallock);
5423         while (first < end && lop != NULL) {
5424                 nlop = LIST_NEXT(lop, lo_lckowner);
5425                 if (first >= lop->lo_end) {
5426                         /* not there yet */
5427                         lop = nlop;
5428                 } else if (first < lop->lo_first) {
5429                         /* new one starts before entry in list */
5430                         if (end <= lop->lo_first) {
5431                                 /* no overlap between old and new */
5432                                 error = nfsrv_dolocal(vp, lfp, flags,
5433                                     NFSLCK_UNLOCK, first, end, cfp, p);
5434                                 if (error != 0)
5435                                         break;
5436                                 first = end;
5437                         } else {
5438                                 /* handle fragment overlapped with new one */
5439                                 error = nfsrv_dolocal(vp, lfp, flags,
5440                                     NFSLCK_UNLOCK, first, lop->lo_first, cfp,
5441                                     p);
5442                                 if (error != 0)
5443                                         break;
5444                                 first = lop->lo_first;
5445                         }
5446                 } else {
5447                         /* new one overlaps this entry in list */
5448                         if (end <= lop->lo_end) {
5449                                 /* overlaps all of new one */
5450                                 error = nfsrv_dolocal(vp, lfp, flags,
5451                                     lop->lo_flags, first, end, cfp, p);
5452                                 if (error != 0)
5453                                         break;
5454                                 first = end;
5455                         } else {
5456                                 /* handle fragment overlapped with new one */
5457                                 error = nfsrv_dolocal(vp, lfp, flags,
5458                                     lop->lo_flags, first, lop->lo_end, cfp, p);
5459                                 if (error != 0)
5460                                         break;
5461                                 first = lop->lo_end;
5462                                 lop = nlop;
5463                         }
5464                 }
5465         }
5466         if (first < end && error == 0)
5467                 /* handle fragment past end of list */
5468                 error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first,
5469                     end, cfp, p);
5470
5471         NFSEXITCODE(error);
5472         return (error);
5473 }
5474
5475 /*
5476  * Local lock unlock. Unlock all byte ranges that are no longer locked
5477  * by NFSv4. To do this, unlock any subranges of first-->end that
5478  * do not overlap with the byte ranges of any lock in the lfp->lf_lock
5479  * list. This list has all locks for the file held by other
5480  * <clientid, lockowner> tuples. The list is ordered by increasing
5481  * lo_first value, but may have entries that overlap each other, for
5482  * the case of read locks.
5483  */
5484 static void
5485 nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first,
5486     uint64_t init_end, NFSPROC_T *p)
5487 {
5488         struct nfslock *lop;
5489         uint64_t first, end, prevfirst;
5490
5491         first = init_first;
5492         end = init_end;
5493         while (first < init_end) {
5494                 /* Loop through all nfs locks, adjusting first and end */
5495                 prevfirst = 0;
5496                 LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
5497                         KASSERT(prevfirst <= lop->lo_first,
5498                             ("nfsv4 locks out of order"));
5499                         KASSERT(lop->lo_first < lop->lo_end,
5500                             ("nfsv4 bogus lock"));
5501                         prevfirst = lop->lo_first;
5502                         if (first >= lop->lo_first &&
5503                             first < lop->lo_end)
5504                                 /*
5505                                  * Overlaps with initial part, so trim
5506                                  * off that initial part by moving first past
5507                                  * it.
5508                                  */
5509                                 first = lop->lo_end;
5510                         else if (end > lop->lo_first &&
5511                             lop->lo_first > first) {
5512                                 /*
5513                                  * This lock defines the end of the
5514                                  * segment to unlock, so set end to the
5515                                  * start of it and break out of the loop.
5516                                  */
5517                                 end = lop->lo_first;
5518                                 break;
5519                         }
5520                         if (first >= end)
5521                                 /*
5522                                  * There is no segment left to do, so
5523                                  * break out of this loop and then exit
5524                                  * the outer while() since first will be set
5525                                  * to end, which must equal init_end here.
5526                                  */
5527                                 break;
5528                 }
5529                 if (first < end) {
5530                         /* Unlock this segment */
5531                         (void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK,
5532                             NFSLCK_READ, first, end, NULL, p);
5533                         nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK,
5534                             first, end);
5535                 }
5536                 /*
5537                  * Now move past this segment and look for any further
5538                  * segment in the range, if there is one.
5539                  */
5540                 first = end;
5541                 end = init_end;
5542         }
5543 }
5544
5545 /*
5546  * Do the local lock operation and update the rollback list, as required.
5547  * Perform the rollback and return the error if nfsvno_advlock() fails.
5548  */
5549 static int
5550 nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags,
5551     uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5552 {
5553         struct nfsrollback *rlp;
5554         int error = 0, ltype, oldltype;
5555
5556         if (flags & NFSLCK_WRITE)
5557                 ltype = F_WRLCK;
5558         else if (flags & NFSLCK_READ)
5559                 ltype = F_RDLCK;
5560         else
5561                 ltype = F_UNLCK;
5562         if (oldflags & NFSLCK_WRITE)
5563                 oldltype = F_WRLCK;
5564         else if (oldflags & NFSLCK_READ)
5565                 oldltype = F_RDLCK;
5566         else
5567                 oldltype = F_UNLCK;
5568         if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK))
5569                 /* nothing to do */
5570                 goto out;
5571         error = nfsvno_advlock(vp, ltype, first, end, p);
5572         if (error != 0) {
5573                 if (cfp != NULL) {
5574                         cfp->cl_clientid.lval[0] = 0;
5575                         cfp->cl_clientid.lval[1] = 0;
5576                         cfp->cl_first = 0;
5577                         cfp->cl_end = NFS64BITSSET;
5578                         cfp->cl_flags = NFSLCK_WRITE;
5579                         cfp->cl_ownerlen = 5;
5580                         NFSBCOPY("LOCAL", cfp->cl_owner, 5);
5581                 }
5582                 nfsrv_locallock_rollback(vp, lfp, p);
5583         } else if (ltype != F_UNLCK) {
5584                 rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK,
5585                     M_WAITOK);
5586                 rlp->rlck_first = first;
5587                 rlp->rlck_end = end;
5588                 rlp->rlck_type = oldltype;
5589                 LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list);
5590         }
5591
5592 out:
5593         NFSEXITCODE(error);
5594         return (error);
5595 }
5596
5597 /*
5598  * Roll back local lock changes and free up the rollback list.
5599  */
5600 static void
5601 nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p)
5602 {
5603         struct nfsrollback *rlp, *nrlp;
5604
5605         LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) {
5606                 (void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first,
5607                     rlp->rlck_end, p);
5608                 free(rlp, M_NFSDROLLBACK);
5609         }
5610         LIST_INIT(&lfp->lf_rollback);
5611 }
5612
5613 /*
5614  * Update local lock list and delete rollback list (ie now committed to the
5615  * local locks). Most of the work is done by the internal function.
5616  */
5617 static void
5618 nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first,
5619     uint64_t end)
5620 {
5621         struct nfsrollback *rlp, *nrlp;
5622         struct nfslock *new_lop, *other_lop;
5623
5624         new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK);
5625         if (flags & (NFSLCK_READ | NFSLCK_WRITE))
5626                 other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK,
5627                     M_WAITOK);
5628         else
5629                 other_lop = NULL;
5630         new_lop->lo_flags = flags;
5631         new_lop->lo_first = first;
5632         new_lop->lo_end = end;
5633         nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp);
5634         if (new_lop != NULL)
5635                 free(new_lop, M_NFSDLOCK);
5636         if (other_lop != NULL)
5637                 free(other_lop, M_NFSDLOCK);
5638
5639         /* and get rid of the rollback list */
5640         LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp)
5641                 free(rlp, M_NFSDROLLBACK);
5642         LIST_INIT(&lfp->lf_rollback);
5643 }
5644
5645 /*
5646  * Lock the struct nfslockfile for local lock updating.
5647  */
5648 static void
5649 nfsrv_locklf(struct nfslockfile *lfp)
5650 {
5651         int gotlock;
5652
5653         /* lf_usecount ensures *lfp won't be free'd */
5654         lfp->lf_usecount++;
5655         do {
5656                 gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL,
5657                     NFSSTATEMUTEXPTR, NULL);
5658         } while (gotlock == 0);
5659         lfp->lf_usecount--;
5660 }
5661
5662 /*
5663  * Unlock the struct nfslockfile after local lock updating.
5664  */
5665 static void
5666 nfsrv_unlocklf(struct nfslockfile *lfp)
5667 {
5668
5669         nfsv4_unlock(&lfp->lf_locallock_lck, 0);
5670 }
5671
5672 /*
5673  * Clear out all state for the NFSv4 server.
5674  * Must be called by a thread that can sleep when no nfsds are running.
5675  */
5676 void
5677 nfsrv_throwawayallstate(NFSPROC_T *p)
5678 {
5679         struct nfsclient *clp, *nclp;
5680         struct nfslockfile *lfp, *nlfp;
5681         int i;
5682
5683         /*
5684          * For each client, clean out the state and then free the structure.
5685          */
5686         for (i = 0; i < NFSCLIENTHASHSIZE; i++) {
5687                 LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
5688                         nfsrv_cleanclient(clp, p);
5689                         nfsrv_freedeleglist(&clp->lc_deleg);
5690                         nfsrv_freedeleglist(&clp->lc_olddeleg);
5691                         free(clp, M_NFSDCLIENT);
5692                 }
5693         }
5694
5695         /*
5696          * Also, free up any remaining lock file structures.
5697          */
5698         for (i = 0; i < NFSLOCKHASHSIZE; i++) {
5699                 LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) {
5700                         printf("nfsd unload: fnd a lock file struct\n");
5701                         nfsrv_freenfslockfile(lfp);
5702                 }
5703         }
5704 }
5705
5706 /*
5707  * Check the sequence# for the session and slot provided as an argument.
5708  * Also, renew the lease if the session will return NFS_OK.
5709  */
5710 int
5711 nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
5712     uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this,
5713     uint32_t *sflagsp, NFSPROC_T *p)
5714 {
5715         struct nfsdsession *sep;
5716         struct nfssessionhash *shp;
5717         int error;
5718         SVCXPRT *savxprt;
5719
5720         shp = NFSSESSIONHASH(nd->nd_sessionid);
5721         NFSLOCKSESSION(shp);
5722         sep = nfsrv_findsession(nd->nd_sessionid);
5723         if (sep == NULL) {
5724                 NFSUNLOCKSESSION(shp);
5725                 return (NFSERR_BADSESSION);
5726         }
5727         error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp,
5728             sep->sess_slots, NULL, NFSV4_SLOTS - 1);
5729         if (error != 0) {
5730                 NFSUNLOCKSESSION(shp);
5731                 return (error);
5732         }
5733         if (cache_this != 0)
5734                 nd->nd_flag |= ND_SAVEREPLY;
5735         /* Renew the lease. */
5736         sep->sess_clp->lc_expiry = nfsrv_leaseexpiry();
5737         nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval;
5738         nd->nd_flag |= ND_IMPLIEDCLID;
5739
5740         /*
5741          * If this session handles the backchannel, save the nd_xprt for this
5742          * RPC, since this is the one being used.
5743          */
5744         if (sep->sess_cbsess.nfsess_xprt != NULL &&
5745             (sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0) {
5746                 savxprt = sep->sess_cbsess.nfsess_xprt;
5747                 SVC_ACQUIRE(nd->nd_xprt);
5748                 nd->nd_xprt->xp_p2 = savxprt->xp_p2;
5749                 nd->nd_xprt->xp_idletimeout = 0;        /* Disable timeout. */
5750                 sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
5751                 SVC_RELEASE(savxprt);
5752         }
5753
5754         *sflagsp = 0;
5755         if (sep->sess_clp->lc_req.nr_client == NULL)
5756                 *sflagsp |= NFSV4SEQ_CBPATHDOWN;
5757         NFSUNLOCKSESSION(shp);
5758         if (error == NFSERR_EXPIRED) {
5759                 *sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED;
5760                 error = 0;
5761         } else if (error == NFSERR_ADMINREVOKED) {
5762                 *sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED;
5763                 error = 0;
5764         }
5765         *highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1;
5766         return (0);
5767 }
5768
5769 /*
5770  * Check/set reclaim complete for this session/clientid.
5771  */
5772 int
5773 nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd)
5774 {
5775         struct nfsdsession *sep;
5776         struct nfssessionhash *shp;
5777         int error = 0;
5778
5779         shp = NFSSESSIONHASH(nd->nd_sessionid);
5780         NFSLOCKSTATE();
5781         NFSLOCKSESSION(shp);
5782         sep = nfsrv_findsession(nd->nd_sessionid);
5783         if (sep == NULL) {
5784                 NFSUNLOCKSESSION(shp);
5785                 NFSUNLOCKSTATE();
5786                 return (NFSERR_BADSESSION);
5787         }
5788
5789         /* Check to see if reclaim complete has already happened. */
5790         if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0)
5791                 error = NFSERR_COMPLETEALREADY;
5792         else
5793                 sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE;
5794         NFSUNLOCKSESSION(shp);
5795         NFSUNLOCKSTATE();
5796         return (error);
5797 }
5798
5799 /*
5800  * Cache the reply in a session slot.
5801  */
5802 void
5803 nfsrv_cache_session(uint8_t *sessionid, uint32_t slotid, int repstat,
5804    struct mbuf **m)
5805 {
5806         struct nfsdsession *sep;
5807         struct nfssessionhash *shp;
5808
5809         shp = NFSSESSIONHASH(sessionid);
5810         NFSLOCKSESSION(shp);
5811         sep = nfsrv_findsession(sessionid);
5812         if (sep == NULL) {
5813                 NFSUNLOCKSESSION(shp);
5814                 printf("nfsrv_cache_session: no session\n");
5815                 m_freem(*m);
5816                 return;
5817         }
5818         nfsv4_seqsess_cacherep(slotid, sep->sess_slots, repstat, m);
5819         NFSUNLOCKSESSION(shp);
5820 }
5821
5822 /*
5823  * Search for a session that matches the sessionid.
5824  */
5825 static struct nfsdsession *
5826 nfsrv_findsession(uint8_t *sessionid)
5827 {
5828         struct nfsdsession *sep;
5829         struct nfssessionhash *shp;
5830
5831         shp = NFSSESSIONHASH(sessionid);
5832         LIST_FOREACH(sep, &shp->list, sess_hash) {
5833                 if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID))
5834                         break;
5835         }
5836         return (sep);
5837 }
5838
5839 /*
5840  * Destroy a session.
5841  */
5842 int
5843 nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
5844 {
5845         int error, samesess;
5846
5847         samesess = 0;
5848         if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID)) {
5849                 samesess = 1;
5850                 if ((nd->nd_flag & ND_LASTOP) == 0)
5851                         return (NFSERR_BADSESSION);
5852         }
5853         error = nfsrv_freesession(NULL, sessionid);
5854         if (error == 0 && samesess != 0)
5855                 nd->nd_flag &= ~ND_HASSEQUENCE;
5856         return (error);
5857 }
5858
5859 /*
5860  * Free up a session structure.
5861  */
5862 static int
5863 nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid)
5864 {
5865         struct nfssessionhash *shp;
5866         int i;
5867
5868         if (sep == NULL) {
5869                 shp = NFSSESSIONHASH(sessionid);
5870                 NFSLOCKSESSION(shp);
5871                 sep = nfsrv_findsession(sessionid);
5872         } else {
5873                 shp = NFSSESSIONHASH(sep->sess_sessionid);
5874                 NFSLOCKSESSION(shp);
5875         }
5876         if (sep != NULL) {
5877                 NFSLOCKSTATE();
5878                 sep->sess_refcnt--;
5879                 if (sep->sess_refcnt > 0) {
5880                         NFSUNLOCKSTATE();
5881                         NFSUNLOCKSESSION(shp);
5882                         return (0);
5883                 }
5884                 LIST_REMOVE(sep, sess_hash);
5885                 LIST_REMOVE(sep, sess_list);
5886                 NFSUNLOCKSTATE();
5887         }
5888         NFSUNLOCKSESSION(shp);
5889         if (sep == NULL)
5890                 return (NFSERR_BADSESSION);
5891         for (i = 0; i < NFSV4_SLOTS; i++)
5892                 if (sep->sess_slots[i].nfssl_reply != NULL)
5893                         m_freem(sep->sess_slots[i].nfssl_reply);
5894         if (sep->sess_cbsess.nfsess_xprt != NULL)
5895                 SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
5896         free(sep, M_NFSDSESSION);
5897         return (0);
5898 }
5899
5900 /*
5901  * Free a stateid.
5902  * RFC5661 says that it should fail when there are associated opens, locks
5903  * or delegations. Since stateids represent opens, I don't see how you can
5904  * free an open stateid (it will be free'd when closed), so this function
5905  * only works for lock stateids (freeing the lock_owner) or delegations.
5906  */
5907 int
5908 nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
5909     NFSPROC_T *p)
5910 {
5911         struct nfsclient *clp;
5912         struct nfsstate *stp;
5913         int error;
5914
5915         NFSLOCKSTATE();
5916         /*
5917          * Look up the stateid
5918          */
5919         error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
5920             NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
5921         if (error == 0) {
5922                 /* First, check for a delegation. */
5923                 LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
5924                         if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
5925                             NFSX_STATEIDOTHER))
5926                                 break;
5927                 }
5928                 if (stp != NULL) {
5929                         nfsrv_freedeleg(stp);
5930                         NFSUNLOCKSTATE();
5931                         return (error);
5932                 }
5933         }
5934         /* Not a delegation, try for a lock_owner. */
5935         if (error == 0)
5936                 error = nfsrv_getstate(clp, stateidp, 0, &stp);
5937         if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD |
5938             NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0))
5939                 /* Not a lock_owner stateid. */
5940                 error = NFSERR_LOCKSHELD;
5941         if (error == 0 && !LIST_EMPTY(&stp->ls_lock))
5942                 error = NFSERR_LOCKSHELD;
5943         if (error == 0)
5944                 nfsrv_freelockowner(stp, NULL, 0, p);
5945         NFSUNLOCKSTATE();
5946         return (error);
5947 }
5948
5949 /*
5950  * Generate the xdr for an NFSv4.1 CBSequence Operation.
5951  */
5952 static int
5953 nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
5954     int dont_replycache, struct nfsdsession **sepp)
5955 {
5956         struct nfsdsession *sep;
5957         uint32_t *tl, slotseq = 0;
5958         int maxslot, slotpos;
5959         uint8_t sessionid[NFSX_V4SESSIONID];
5960         int error;
5961
5962         error = nfsv4_getcbsession(clp, sepp);
5963         if (error != 0)
5964                 return (error);
5965         sep = *sepp;
5966         (void)nfsv4_sequencelookup(NULL, &sep->sess_cbsess, &slotpos, &maxslot,
5967             &slotseq, sessionid);
5968         KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot"));
5969
5970         /* Build the Sequence arguments. */
5971         NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED);
5972         bcopy(sessionid, tl, NFSX_V4SESSIONID);
5973         tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
5974         nd->nd_slotseq = tl;
5975         *tl++ = txdr_unsigned(slotseq);
5976         *tl++ = txdr_unsigned(slotpos);
5977         *tl++ = txdr_unsigned(maxslot);
5978         if (dont_replycache == 0)
5979                 *tl++ = newnfs_true;
5980         else
5981                 *tl++ = newnfs_false;
5982         *tl = 0;                        /* No referring call list, for now. */
5983         nd->nd_flag |= ND_HASSEQUENCE;
5984         return (0);
5985 }
5986
5987 /*
5988  * Get a session for the callback.
5989  */
5990 static int
5991 nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
5992 {
5993         struct nfsdsession *sep;
5994
5995         NFSLOCKSTATE();
5996         LIST_FOREACH(sep, &clp->lc_session, sess_list) {
5997                 if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
5998                         break;
5999         }
6000         if (sep == NULL) {
6001                 NFSUNLOCKSTATE();
6002                 return (NFSERR_BADSESSION);
6003         }
6004         sep->sess_refcnt++;
6005         *sepp = sep;
6006         NFSUNLOCKSTATE();
6007         return (0);
6008 }
6009