]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/fs/nfsclient/nfs_clstate.c
nfscl: Fix a deadlock related to the NFSv4 clientID lock
[FreeBSD/FreeBSD.git] / sys / fs / nfsclient / nfs_clstate.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2009 Rick Macklem, University of Guelph
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 /*
34  * These functions implement the client side state handling for NFSv4.
35  * NFSv4 state handling:
36  * - A lockowner is used to determine lock contention, so it
37  *   corresponds directly to a Posix pid. (1 to 1 mapping)
38  * - The correct granularity of an OpenOwner is not nearly so
39  *   obvious. An OpenOwner does the following:
40  *   - provides a serial sequencing of Open/Close/Lock-with-new-lockowner
41  *   - is used to check for Open/Share contention (not applicable to
42  *     this client, since all Opens are Deny_None)
43  *   As such, I considered both extreme.
44  *   1 OpenOwner per ClientID - Simple to manage, but fully serializes
45  *   all Open, Close and Lock (with a new lockowner) Ops.
46  *   1 OpenOwner for each Open - This one results in an OpenConfirm for
47  *   every Open, for most servers.
48  *   So, I chose to use the same mapping as I did for LockOwnwers.
49  *   The main concern here is that you can end up with multiple Opens
50  *   for the same File Handle, but on different OpenOwners (opens
51  *   inherited from parents, grandparents...) and you do not know
52  *   which of these the vnodeop close applies to. This is handled by
53  *   delaying the Close Op(s) until all of the Opens have been closed.
54  *   (It is not yet obvious if this is the correct granularity.)
55  * - How the code handles serialization:
56  *   - For the ClientId, it uses an exclusive lock while getting its
57  *     SetClientId and during recovery. Otherwise, it uses a shared
58  *     lock via a reference count.
59  *   - For the rest of the data structures, it uses an SMP mutex
60  *     (once the nfs client is SMP safe) and doesn't sleep while
61  *     manipulating the linked lists.
62  *   - The serialization of Open/Close/Lock/LockU falls out in the
63  *     "wash", since OpenOwners and LockOwners are both mapped from
64  *     Posix pid. In other words, there is only one Posix pid using
65  *     any given owner, so that owner is serialized. (If you change
66  *     the granularity of the OpenOwner, then code must be added to
67  *     serialize Ops on the OpenOwner.)
68  * - When to get rid of OpenOwners and LockOwners.
69  *   - The function nfscl_cleanup_common() is executed after a process exits.
70  *     It goes through the client list looking for all Open and Lock Owners.
71  *     When one is found, it is marked "defunct" or in the case of
72  *     an OpenOwner without any Opens, freed.
73  *     The renew thread scans for defunct Owners and gets rid of them,
74  *     if it can. The LockOwners will also be deleted when the
75  *     associated Open is closed.
76  *   - If the LockU or Close Op(s) fail during close in a way
77  *     that could be recovered upon retry, they are relinked to the
78  *     ClientId's defunct open list and retried by the renew thread
79  *     until they succeed or an unmount/recovery occurs.
80  *     (Since we are done with them, they do not need to be recovered.)
81  */
82
83 #include <fs/nfs/nfsport.h>
84
85 /*
86  * Global variables
87  */
88 extern struct nfsstatsv1 nfsstatsv1;
89 extern struct nfsreqhead nfsd_reqq;
90 extern u_int32_t newnfs_false, newnfs_true;
91 extern int nfscl_debuglevel;
92 extern int nfscl_enablecallb;
93 extern int nfs_numnfscbd;
94 NFSREQSPINLOCK;
95 NFSCLSTATEMUTEX;
96 int nfscl_inited = 0;
97 struct nfsclhead nfsclhead;     /* Head of clientid list */
98 int nfscl_deleghighwater = NFSCLDELEGHIGHWATER;
99 int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER;
100
101 static int nfscl_delegcnt = 0;
102 static int nfscl_layoutcnt = 0;
103 static int nfscl_getopen(struct nfsclownerhead *, u_int8_t *, int, u_int8_t *,
104     u_int8_t *, u_int32_t, struct nfscllockowner **, struct nfsclopen **);
105 static void nfscl_clrelease(struct nfsclclient *);
106 static void nfscl_cleanclient(struct nfsclclient *);
107 static void nfscl_expireclient(struct nfsclclient *, struct nfsmount *,
108     struct ucred *, NFSPROC_T *);
109 static int nfscl_expireopen(struct nfsclclient *, struct nfsclopen *,
110     struct nfsmount *, struct ucred *, NFSPROC_T *);
111 static void nfscl_recover(struct nfsclclient *, struct ucred *, NFSPROC_T *);
112 static void nfscl_insertlock(struct nfscllockowner *, struct nfscllock *,
113     struct nfscllock *, int);
114 static int nfscl_updatelock(struct nfscllockowner *, struct nfscllock **,
115     struct nfscllock **, int);
116 static void nfscl_delegreturnall(struct nfsclclient *, NFSPROC_T *);
117 static u_int32_t nfscl_nextcbident(void);
118 static mount_t nfscl_getmnt(int, uint8_t *, u_int32_t, struct nfsclclient **);
119 static struct nfsclclient *nfscl_getclnt(u_int32_t);
120 static struct nfsclclient *nfscl_getclntsess(uint8_t *);
121 static struct nfscldeleg *nfscl_finddeleg(struct nfsclclient *, u_int8_t *,
122     int);
123 static void nfscl_retoncloselayout(vnode_t, struct nfsclclient *, uint8_t *,
124     int, struct nfsclrecalllayout **);
125 static void nfscl_reldevinfo_locked(struct nfscldevinfo *);
126 static struct nfscllayout *nfscl_findlayout(struct nfsclclient *, u_int8_t *,
127     int);
128 static struct nfscldevinfo *nfscl_finddevinfo(struct nfsclclient *, uint8_t *);
129 static int nfscl_checkconflict(struct nfscllockownerhead *, struct nfscllock *,
130     u_int8_t *, struct nfscllock **);
131 static void nfscl_freealllocks(struct nfscllockownerhead *, int);
132 static int nfscl_localconflict(struct nfsclclient *, u_int8_t *, int,
133     struct nfscllock *, u_int8_t *, struct nfscldeleg *, struct nfscllock **);
134 static void nfscl_newopen(struct nfsclclient *, struct nfscldeleg *,
135     struct nfsclowner **, struct nfsclowner **, struct nfsclopen **,
136     struct nfsclopen **, u_int8_t *, u_int8_t *, int, struct ucred *, int *);
137 static int nfscl_moveopen(vnode_t , struct nfsclclient *,
138     struct nfsmount *, struct nfsclopen *, struct nfsclowner *,
139     struct nfscldeleg *, struct ucred *, NFSPROC_T *);
140 static void nfscl_totalrecall(struct nfsclclient *);
141 static int nfscl_relock(vnode_t , struct nfsclclient *, struct nfsmount *,
142     struct nfscllockowner *, struct nfscllock *, struct ucred *, NFSPROC_T *);
143 static int nfscl_tryopen(struct nfsmount *, vnode_t , u_int8_t *, int,
144     u_int8_t *, int, u_int32_t, struct nfsclopen *, u_int8_t *, int,
145     struct nfscldeleg **, int, u_int32_t, struct ucred *, NFSPROC_T *);
146 static int nfscl_trylock(struct nfsmount *, vnode_t , u_int8_t *,
147     int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short,
148     struct ucred *, NFSPROC_T *);
149 static int nfsrpc_reopen(struct nfsmount *, u_int8_t *, int, u_int32_t,
150     struct nfsclopen *, struct nfscldeleg **, struct ucred *, NFSPROC_T *);
151 static void nfscl_freedeleg(struct nfscldeleghead *, struct nfscldeleg *,
152     bool);
153 static int nfscl_errmap(struct nfsrv_descript *, u_int32_t);
154 static void nfscl_cleanup_common(struct nfsclclient *, u_int8_t *);
155 static int nfscl_recalldeleg(struct nfsclclient *, struct nfsmount *,
156     struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *, int,
157     vnode_t *);
158 static void nfscl_freeopenowner(struct nfsclowner *, int);
159 static void nfscl_cleandeleg(struct nfscldeleg *);
160 static int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *,
161     struct nfsmount *, NFSPROC_T *);
162 static void nfscl_emptylockowner(struct nfscllockowner *,
163     struct nfscllockownerfhhead *);
164 static void nfscl_mergeflayouts(struct nfsclflayouthead *,
165     struct nfsclflayouthead *);
166 static int nfscl_layoutrecall(int, struct nfscllayout *, uint32_t, uint64_t,
167     uint64_t, uint32_t, uint32_t, uint32_t, char *, struct nfsclrecalllayout *);
168 static int nfscl_seq(uint32_t, uint32_t);
169 static void nfscl_layoutreturn(struct nfsmount *, struct nfscllayout *,
170     struct ucred *, NFSPROC_T *);
171 static void nfscl_dolayoutcommit(struct nfsmount *, struct nfscllayout *,
172     struct ucred *, NFSPROC_T *);
173
174 static short nfscberr_null[] = {
175         0,
176         0,
177 };
178
179 static short nfscberr_getattr[] = {
180         NFSERR_RESOURCE,
181         NFSERR_BADHANDLE,
182         NFSERR_BADXDR,
183         NFSERR_RESOURCE,
184         NFSERR_SERVERFAULT,
185         0,
186 };
187
188 static short nfscberr_recall[] = {
189         NFSERR_RESOURCE,
190         NFSERR_BADHANDLE,
191         NFSERR_BADSTATEID,
192         NFSERR_BADXDR,
193         NFSERR_RESOURCE,
194         NFSERR_SERVERFAULT,
195         0,
196 };
197
198 static short *nfscl_cberrmap[] = {
199         nfscberr_null,
200         nfscberr_null,
201         nfscberr_null,
202         nfscberr_getattr,
203         nfscberr_recall
204 };
205
206 #define NETFAMILY(clp) \
207                 (((clp)->nfsc_flags & NFSCLFLAGS_AFINET6) ? AF_INET6 : AF_INET)
208
209 /*
210  * Called for an open operation.
211  * If the nfhp argument is NULL, just get an openowner.
212  */
213 int
214 nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg,
215     struct ucred *cred, NFSPROC_T *p, struct nfsclowner **owpp,
216     struct nfsclopen **opp, int *newonep, int *retp, int lockit, bool firstref)
217 {
218         struct nfsclclient *clp;
219         struct nfsclowner *owp, *nowp;
220         struct nfsclopen *op = NULL, *nop = NULL;
221         struct nfscldeleg *dp;
222         struct nfsclownerhead *ohp;
223         u_int8_t own[NFSV4CL_LOCKNAMELEN];
224         int ret;
225
226         if (newonep != NULL)
227                 *newonep = 0;
228         if (opp != NULL)
229                 *opp = NULL;
230         if (owpp != NULL)
231                 *owpp = NULL;
232
233         /*
234          * Might need one or both of these, so MALLOC them now, to
235          * avoid a tsleep() in MALLOC later.
236          */
237         nowp = malloc(sizeof (struct nfsclowner),
238             M_NFSCLOWNER, M_WAITOK);
239         if (nfhp != NULL)
240             nop = malloc(sizeof (struct nfsclopen) +
241                 fhlen - 1, M_NFSCLOPEN, M_WAITOK);
242         ret = nfscl_getcl(vnode_mount(vp), cred, p, 1, firstref, &clp);
243         if (ret != 0) {
244                 free(nowp, M_NFSCLOWNER);
245                 if (nop != NULL)
246                         free(nop, M_NFSCLOPEN);
247                 return (ret);
248         }
249
250         /*
251          * Get the Open iff it already exists.
252          * If none found, add the new one or return error, depending upon
253          * "create".
254          */
255         NFSLOCKCLSTATE();
256         dp = NULL;
257         /* First check the delegation list */
258         if (nfhp != NULL && usedeleg) {
259                 LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
260                         if (dp->nfsdl_fhlen == fhlen &&
261                             !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
262                                 if (!(amode & NFSV4OPEN_ACCESSWRITE) ||
263                                     (dp->nfsdl_flags & NFSCLDL_WRITE))
264                                         break;
265                                 dp = NULL;
266                                 break;
267                         }
268                 }
269         }
270
271         if (dp != NULL) {
272                 nfscl_filllockowner(p->td_proc, own, F_POSIX);
273                 ohp = &dp->nfsdl_owner;
274         } else {
275                 /* For NFSv4.1 and this option, use a single open_owner. */
276                 if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp))))
277                         nfscl_filllockowner(NULL, own, F_POSIX);
278                 else
279                         nfscl_filllockowner(p->td_proc, own, F_POSIX);
280                 ohp = &clp->nfsc_owner;
281         }
282         /* Now, search for an openowner */
283         LIST_FOREACH(owp, ohp, nfsow_list) {
284                 if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN))
285                         break;
286         }
287
288         /*
289          * Create a new open, as required.
290          */
291         nfscl_newopen(clp, dp, &owp, &nowp, &op, &nop, own, nfhp, fhlen,
292             cred, newonep);
293
294         /*
295          * Now, check the mode on the open and return the appropriate
296          * value.
297          */
298         if (retp != NULL) {
299                 if (nfhp != NULL && dp != NULL && nop == NULL)
300                         /* new local open on delegation */
301                         *retp = NFSCLOPEN_SETCRED;
302                 else
303                         *retp = NFSCLOPEN_OK;
304         }
305         if (op != NULL && (amode & ~(op->nfso_mode))) {
306                 op->nfso_mode |= amode;
307                 if (retp != NULL && dp == NULL)
308                         *retp = NFSCLOPEN_DOOPEN;
309         }
310
311         /*
312          * Serialize modifications to the open owner for multiple threads
313          * within the same process using a read/write sleep lock.
314          * For NFSv4.1 and a single OpenOwner, allow concurrent open operations
315          * by acquiring a shared lock.  The close operations still use an
316          * exclusive lock for this case.
317          */
318         if (lockit != 0) {
319                 if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp)))) {
320                         /*
321                          * Get a shared lock on the OpenOwner, but first
322                          * wait for any pending exclusive lock, so that the
323                          * exclusive locker gets priority.
324                          */
325                         nfsv4_lock(&owp->nfsow_rwlock, 0, NULL,
326                             NFSCLSTATEMUTEXPTR, NULL);
327                         nfsv4_getref(&owp->nfsow_rwlock, NULL,
328                             NFSCLSTATEMUTEXPTR, NULL);
329                 } else
330                         nfscl_lockexcl(&owp->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
331         }
332         NFSUNLOCKCLSTATE();
333         if (nowp != NULL)
334                 free(nowp, M_NFSCLOWNER);
335         if (nop != NULL)
336                 free(nop, M_NFSCLOPEN);
337         if (owpp != NULL)
338                 *owpp = owp;
339         if (opp != NULL)
340                 *opp = op;
341         return (0);
342 }
343
344 /*
345  * Create a new open, as required.
346  */
347 static void
348 nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp,
349     struct nfsclowner **owpp, struct nfsclowner **nowpp, struct nfsclopen **opp,
350     struct nfsclopen **nopp, u_int8_t *own, u_int8_t *fhp, int fhlen,
351     struct ucred *cred, int *newonep)
352 {
353         struct nfsclowner *owp = *owpp, *nowp;
354         struct nfsclopen *op, *nop;
355
356         if (nowpp != NULL)
357                 nowp = *nowpp;
358         else
359                 nowp = NULL;
360         if (nopp != NULL)
361                 nop = *nopp;
362         else
363                 nop = NULL;
364         if (owp == NULL && nowp != NULL) {
365                 NFSBCOPY(own, nowp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
366                 LIST_INIT(&nowp->nfsow_open);
367                 nowp->nfsow_clp = clp;
368                 nowp->nfsow_seqid = 0;
369                 nowp->nfsow_defunct = 0;
370                 nfscl_lockinit(&nowp->nfsow_rwlock);
371                 if (dp != NULL) {
372                         nfsstatsv1.cllocalopenowners++;
373                         LIST_INSERT_HEAD(&dp->nfsdl_owner, nowp, nfsow_list);
374                 } else {
375                         nfsstatsv1.clopenowners++;
376                         LIST_INSERT_HEAD(&clp->nfsc_owner, nowp, nfsow_list);
377                 }
378                 owp = *owpp = nowp;
379                 *nowpp = NULL;
380                 if (newonep != NULL)
381                         *newonep = 1;
382         }
383
384          /* If an fhp has been specified, create an Open as well. */
385         if (fhp != NULL) {
386                 /* and look for the correct open, based upon FH */
387                 LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
388                         if (op->nfso_fhlen == fhlen &&
389                             !NFSBCMP(op->nfso_fh, fhp, fhlen))
390                                 break;
391                 }
392                 if (op == NULL && nop != NULL) {
393                         nop->nfso_own = owp;
394                         nop->nfso_mode = 0;
395                         nop->nfso_opencnt = 0;
396                         nop->nfso_posixlock = 1;
397                         nop->nfso_fhlen = fhlen;
398                         NFSBCOPY(fhp, nop->nfso_fh, fhlen);
399                         LIST_INIT(&nop->nfso_lock);
400                         nop->nfso_stateid.seqid = 0;
401                         nop->nfso_stateid.other[0] = 0;
402                         nop->nfso_stateid.other[1] = 0;
403                         nop->nfso_stateid.other[2] = 0;
404                         KASSERT(cred != NULL, ("%s: cred NULL\n", __func__));
405                         newnfs_copyincred(cred, &nop->nfso_cred);
406                         if (dp != NULL) {
407                                 TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
408                                 TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
409                                     nfsdl_list);
410                                 dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
411                                 nfsstatsv1.cllocalopens++;
412                         } else {
413                                 nfsstatsv1.clopens++;
414                         }
415                         LIST_INSERT_HEAD(&owp->nfsow_open, nop, nfso_list);
416                         *opp = nop;
417                         *nopp = NULL;
418                         if (newonep != NULL)
419                                 *newonep = 1;
420                 } else {
421                         *opp = op;
422                 }
423         }
424 }
425
426 /*
427  * Called to find/add a delegation to a client.
428  */
429 int
430 nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp,
431     int fhlen, struct ucred *cred, NFSPROC_T *p, struct nfscldeleg **dpp)
432 {
433         struct nfscldeleg *dp = *dpp, *tdp;
434
435         /*
436          * First, if we have received a Read delegation for a file on a
437          * read/write file system, just return it, because they aren't
438          * useful, imho.
439          */
440         if (mp != NULL && dp != NULL && !NFSMNT_RDONLY(mp) &&
441             (dp->nfsdl_flags & NFSCLDL_READ)) {
442                 (void) nfscl_trydelegreturn(dp, cred, VFSTONFS(mp), p);
443                 free(dp, M_NFSCLDELEG);
444                 *dpp = NULL;
445                 return (0);
446         }
447
448         /* Look for the correct deleg, based upon FH */
449         NFSLOCKCLSTATE();
450         tdp = nfscl_finddeleg(clp, nfhp, fhlen);
451         if (tdp == NULL) {
452                 if (dp == NULL) {
453                         NFSUNLOCKCLSTATE();
454                         return (NFSERR_BADSTATEID);
455                 }
456                 *dpp = NULL;
457                 TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
458                 LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp, fhlen), dp,
459                     nfsdl_hash);
460                 dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
461                 nfsstatsv1.cldelegates++;
462                 nfscl_delegcnt++;
463         } else {
464                 /*
465                  * Delegation already exists, what do we do if a new one??
466                  */
467                 if (dp != NULL) {
468                         printf("Deleg already exists!\n");
469                         free(dp, M_NFSCLDELEG);
470                         *dpp = NULL;
471                 } else {
472                         *dpp = tdp;
473                 }
474         }
475         NFSUNLOCKCLSTATE();
476         return (0);
477 }
478
479 /*
480  * Find a delegation for this file handle. Return NULL upon failure.
481  */
482 static struct nfscldeleg *
483 nfscl_finddeleg(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
484 {
485         struct nfscldeleg *dp;
486
487         LIST_FOREACH(dp, NFSCLDELEGHASH(clp, fhp, fhlen), nfsdl_hash) {
488             if (dp->nfsdl_fhlen == fhlen &&
489                 !NFSBCMP(dp->nfsdl_fh, fhp, fhlen))
490                 break;
491         }
492         return (dp);
493 }
494
495 /*
496  * Get a stateid for an I/O operation. First, look for an open and iff
497  * found, return either a lockowner stateid or the open stateid.
498  * If no Open is found, just return error and the special stateid of all zeros.
499  */
500 int
501 nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode,
502     int fords, struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp,
503     void **lckpp)
504 {
505         struct nfsclclient *clp;
506         struct nfsclowner *owp;
507         struct nfsclopen *op = NULL, *top;
508         struct nfscllockowner *lp;
509         struct nfscldeleg *dp;
510         struct nfsnode *np;
511         struct nfsmount *nmp;
512         u_int8_t own[NFSV4CL_LOCKNAMELEN];
513         int error, done;
514
515         *lckpp = NULL;
516         /*
517          * Initially, just set the special stateid of all zeros.
518          * (Don't do this for a DS, since the special stateid can't be used.)
519          */
520         if (fords == 0) {
521                 stateidp->seqid = 0;
522                 stateidp->other[0] = 0;
523                 stateidp->other[1] = 0;
524                 stateidp->other[2] = 0;
525         }
526         if (vnode_vtype(vp) != VREG)
527                 return (EISDIR);
528         np = VTONFS(vp);
529         nmp = VFSTONFS(vnode_mount(vp));
530         NFSLOCKCLSTATE();
531         clp = nfscl_findcl(nmp);
532         if (clp == NULL) {
533                 NFSUNLOCKCLSTATE();
534                 return (EACCES);
535         }
536
537         /*
538          * Wait for recovery to complete.
539          */
540         while ((clp->nfsc_flags & NFSCLFLAGS_RECVRINPROG))
541                 (void) nfsmsleep(&clp->nfsc_flags, NFSCLSTATEMUTEXPTR,
542                     PZERO, "nfsrecvr", NULL);
543
544         /*
545          * First, look for a delegation.
546          */
547         LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
548                 if (dp->nfsdl_fhlen == fhlen &&
549                     !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
550                         if (!(mode & NFSV4OPEN_ACCESSWRITE) ||
551                             (dp->nfsdl_flags & NFSCLDL_WRITE)) {
552                                 stateidp->seqid = dp->nfsdl_stateid.seqid;
553                                 stateidp->other[0] = dp->nfsdl_stateid.other[0];
554                                 stateidp->other[1] = dp->nfsdl_stateid.other[1];
555                                 stateidp->other[2] = dp->nfsdl_stateid.other[2];
556                                 if (!(np->n_flag & NDELEGRECALL)) {
557                                         TAILQ_REMOVE(&clp->nfsc_deleg, dp,
558                                             nfsdl_list);
559                                         TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
560                                             nfsdl_list);
561                                         dp->nfsdl_timestamp = NFSD_MONOSEC +
562                                             120;
563                                         dp->nfsdl_rwlock.nfslock_usecnt++;
564                                         *lckpp = (void *)&dp->nfsdl_rwlock;
565                                 }
566                                 NFSUNLOCKCLSTATE();
567                                 return (0);
568                         }
569                         break;
570                 }
571         }
572
573         if (p != NULL) {
574                 /*
575                  * If p != NULL, we want to search the parentage tree
576                  * for a matching OpenOwner and use that.
577                  */
578                 if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp))))
579                         nfscl_filllockowner(NULL, own, F_POSIX);
580                 else
581                         nfscl_filllockowner(p->td_proc, own, F_POSIX);
582                 lp = NULL;
583                 error = nfscl_getopen(&clp->nfsc_owner, nfhp, fhlen, own, own,
584                     mode, &lp, &op);
585                 if (error == 0 && lp != NULL && fords == 0) {
586                         /* Don't return a lock stateid for a DS. */
587                         stateidp->seqid =
588                             lp->nfsl_stateid.seqid;
589                         stateidp->other[0] =
590                             lp->nfsl_stateid.other[0];
591                         stateidp->other[1] =
592                             lp->nfsl_stateid.other[1];
593                         stateidp->other[2] =
594                             lp->nfsl_stateid.other[2];
595                         NFSUNLOCKCLSTATE();
596                         return (0);
597                 }
598         }
599         if (op == NULL) {
600                 /* If not found, just look for any OpenOwner that will work. */
601                 top = NULL;
602                 done = 0;
603                 owp = LIST_FIRST(&clp->nfsc_owner);
604                 while (!done && owp != NULL) {
605                         LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
606                                 if (op->nfso_fhlen == fhlen &&
607                                     !NFSBCMP(op->nfso_fh, nfhp, fhlen)) {
608                                         if (top == NULL && (op->nfso_mode &
609                                             NFSV4OPEN_ACCESSWRITE) != 0 &&
610                                             (mode & NFSV4OPEN_ACCESSREAD) != 0)
611                                                 top = op;
612                                         if ((mode & op->nfso_mode) == mode) {
613                                                 done = 1;
614                                                 break;
615                                         }
616                                 }
617                         }
618                         if (!done)
619                                 owp = LIST_NEXT(owp, nfsow_list);
620                 }
621                 if (!done) {
622                         NFSCL_DEBUG(2, "openmode top=%p\n", top);
623                         if (top == NULL || NFSHASOPENMODE(nmp)) {
624                                 NFSUNLOCKCLSTATE();
625                                 return (ENOENT);
626                         } else
627                                 op = top;
628                 }
629                 /*
630                  * For read aheads or write behinds, use the open cred.
631                  * A read ahead or write behind is indicated by p == NULL.
632                  */
633                 if (p == NULL)
634                         newnfs_copycred(&op->nfso_cred, cred);
635         }
636
637         /*
638          * No lock stateid, so return the open stateid.
639          */
640         stateidp->seqid = op->nfso_stateid.seqid;
641         stateidp->other[0] = op->nfso_stateid.other[0];
642         stateidp->other[1] = op->nfso_stateid.other[1];
643         stateidp->other[2] = op->nfso_stateid.other[2];
644         NFSUNLOCKCLSTATE();
645         return (0);
646 }
647
648 /*
649  * Search for a matching file, mode and, optionally, lockowner.
650  */
651 static int
652 nfscl_getopen(struct nfsclownerhead *ohp, u_int8_t *nfhp, int fhlen,
653     u_int8_t *openown, u_int8_t *lockown, u_int32_t mode,
654     struct nfscllockowner **lpp, struct nfsclopen **opp)
655 {
656         struct nfsclowner *owp;
657         struct nfsclopen *op, *rop, *rop2;
658         struct nfscllockowner *lp;
659         int keep_looping;
660
661         if (lpp != NULL)
662                 *lpp = NULL;
663         /*
664          * rop will be set to the open to be returned. There are three
665          * variants of this, all for an open of the correct file:
666          * 1 - A match of lockown.
667          * 2 - A match of the openown, when no lockown match exists.
668          * 3 - A match for any open, if no openown or lockown match exists.
669          * Looking for #2 over #3 probably isn't necessary, but since
670          * RFC3530 is vague w.r.t. the relationship between openowners and
671          * lockowners, I think this is the safer way to go.
672          */
673         rop = NULL;
674         rop2 = NULL;
675         keep_looping = 1;
676         /* Search the client list */
677         owp = LIST_FIRST(ohp);
678         while (owp != NULL && keep_looping != 0) {
679                 /* and look for the correct open */
680                 op = LIST_FIRST(&owp->nfsow_open);
681                 while (op != NULL && keep_looping != 0) {
682                         if (op->nfso_fhlen == fhlen &&
683                             !NFSBCMP(op->nfso_fh, nfhp, fhlen)
684                             && (op->nfso_mode & mode) == mode) {
685                                 if (lpp != NULL) {
686                                         /* Now look for a matching lockowner. */
687                                         LIST_FOREACH(lp, &op->nfso_lock,
688                                             nfsl_list) {
689                                                 if (!NFSBCMP(lp->nfsl_owner,
690                                                     lockown,
691                                                     NFSV4CL_LOCKNAMELEN)) {
692                                                         *lpp = lp;
693                                                         rop = op;
694                                                         keep_looping = 0;
695                                                         break;
696                                                 }
697                                         }
698                                 }
699                                 if (rop == NULL && !NFSBCMP(owp->nfsow_owner,
700                                     openown, NFSV4CL_LOCKNAMELEN)) {
701                                         rop = op;
702                                         if (lpp == NULL)
703                                                 keep_looping = 0;
704                                 }
705                                 if (rop2 == NULL)
706                                         rop2 = op;
707                         }
708                         op = LIST_NEXT(op, nfso_list);
709                 }
710                 owp = LIST_NEXT(owp, nfsow_list);
711         }
712         if (rop == NULL)
713                 rop = rop2;
714         if (rop == NULL)
715                 return (EBADF);
716         *opp = rop;
717         return (0);
718 }
719
720 /*
721  * Release use of an open owner. Called when open operations are done
722  * with the open owner.
723  */
724 void
725 nfscl_ownerrelease(struct nfsmount *nmp, struct nfsclowner *owp,
726     __unused int error, __unused int candelete, int unlocked)
727 {
728
729         if (owp == NULL)
730                 return;
731         NFSLOCKCLSTATE();
732         if (unlocked == 0) {
733                 if (NFSHASONEOPENOWN(nmp))
734                         nfsv4_relref(&owp->nfsow_rwlock);
735                 else
736                         nfscl_lockunlock(&owp->nfsow_rwlock);
737         }
738         nfscl_clrelease(owp->nfsow_clp);
739         NFSUNLOCKCLSTATE();
740 }
741
742 /*
743  * Release use of an open structure under an open owner.
744  */
745 void
746 nfscl_openrelease(struct nfsmount *nmp, struct nfsclopen *op, int error,
747     int candelete)
748 {
749         struct nfsclclient *clp;
750         struct nfsclowner *owp;
751
752         if (op == NULL)
753                 return;
754         NFSLOCKCLSTATE();
755         owp = op->nfso_own;
756         if (NFSHASONEOPENOWN(nmp))
757                 nfsv4_relref(&owp->nfsow_rwlock);
758         else
759                 nfscl_lockunlock(&owp->nfsow_rwlock);
760         clp = owp->nfsow_clp;
761         if (error && candelete && op->nfso_opencnt == 0)
762                 nfscl_freeopen(op, 0);
763         nfscl_clrelease(clp);
764         NFSUNLOCKCLSTATE();
765 }
766
767 /*
768  * Called to get a clientid structure. It will optionally lock the
769  * client data structures to do the SetClientId/SetClientId_confirm,
770  * but will release that lock and return the clientid with a reference
771  * count on it.
772  * If the "cred" argument is NULL, a new clientid should not be created.
773  * If the "p" argument is NULL, a SetClientID/SetClientIDConfirm cannot
774  * be done.
775  * The start_renewthread argument tells nfscl_getcl() to start a renew
776  * thread if this creates a new clp.
777  * It always clpp with a reference count on it, unless returning an error.
778  */
779 int
780 nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p,
781     int start_renewthread, bool firstref, struct nfsclclient **clpp)
782 {
783         struct nfsclclient *clp;
784         struct nfsclclient *newclp = NULL;
785         struct nfsmount *nmp;
786         char uuid[HOSTUUIDLEN];
787         int igotlock = 0, error, trystalecnt, clidinusedelay, i;
788         u_int16_t idlen = 0;
789
790         nmp = VFSTONFS(mp);
791         if (cred != NULL) {
792                 getcredhostuuid(cred, uuid, sizeof uuid);
793                 idlen = strlen(uuid);
794                 if (idlen > 0)
795                         idlen += sizeof (u_int64_t);
796                 else
797                         idlen += sizeof (u_int64_t) + 16; /* 16 random bytes */
798                 newclp = malloc(
799                     sizeof (struct nfsclclient) + idlen - 1, M_NFSCLCLIENT,
800                     M_WAITOK | M_ZERO);
801         }
802         NFSLOCKCLSTATE();
803         /*
804          * If a forced dismount is already in progress, don't
805          * allocate a new clientid and get out now. For the case where
806          * clp != NULL, this is a harmless optimization.
807          */
808         if (NFSCL_FORCEDISM(mp)) {
809                 NFSUNLOCKCLSTATE();
810                 if (newclp != NULL)
811                         free(newclp, M_NFSCLCLIENT);
812                 return (EBADF);
813         }
814         clp = nmp->nm_clp;
815         if (clp == NULL) {
816                 if (newclp == NULL) {
817                         NFSUNLOCKCLSTATE();
818                         return (EACCES);
819                 }
820                 clp = newclp;
821                 clp->nfsc_idlen = idlen;
822                 LIST_INIT(&clp->nfsc_owner);
823                 TAILQ_INIT(&clp->nfsc_deleg);
824                 TAILQ_INIT(&clp->nfsc_layout);
825                 LIST_INIT(&clp->nfsc_devinfo);
826                 for (i = 0; i < NFSCLDELEGHASHSIZE; i++)
827                         LIST_INIT(&clp->nfsc_deleghash[i]);
828                 for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
829                         LIST_INIT(&clp->nfsc_layouthash[i]);
830                 clp->nfsc_flags = NFSCLFLAGS_INITED;
831                 clp->nfsc_clientidrev = 1;
832                 clp->nfsc_cbident = nfscl_nextcbident();
833                 nfscl_fillclid(nmp->nm_clval, uuid, clp->nfsc_id,
834                     clp->nfsc_idlen);
835                 LIST_INSERT_HEAD(&nfsclhead, clp, nfsc_list);
836                 nmp->nm_clp = clp;
837                 clp->nfsc_nmp = nmp;
838                 NFSUNLOCKCLSTATE();
839                 if (start_renewthread != 0)
840                         nfscl_start_renewthread(clp);
841         } else {
842                 NFSUNLOCKCLSTATE();
843                 if (newclp != NULL)
844                         free(newclp, M_NFSCLCLIENT);
845         }
846         NFSLOCKCLSTATE();
847         while ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0 && !igotlock &&
848             !NFSCL_FORCEDISM(mp))
849                 igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
850                     NFSCLSTATEMUTEXPTR, mp);
851         if (igotlock == 0) {
852                 /*
853                  * Call nfsv4_lock() with "iwantlock == 0" on the firstref so
854                  * that it will wait for a pending exclusive lock request.
855                  * This gives the exclusive lock request priority over this
856                  * shared lock request.
857                  * An exclusive lock on nfsc_lock is used mainly for server
858                  * crash recoveries and delegation recalls.
859                  */
860                 if (firstref)
861                         nfsv4_lock(&clp->nfsc_lock, 0, NULL, NFSCLSTATEMUTEXPTR,
862                             mp);
863                 nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
864         }
865         if (igotlock == 0 && NFSCL_FORCEDISM(mp)) {
866                 /*
867                  * Both nfsv4_lock() and nfsv4_getref() know to check
868                  * for NFSCL_FORCEDISM() and return without sleeping to
869                  * wait for the exclusive lock to be released, since it
870                  * might be held by nfscl_umount() and we need to get out
871                  * now for that case and not wait until nfscl_umount()
872                  * releases it.
873                  */
874                 NFSUNLOCKCLSTATE();
875                 return (EBADF);
876         }
877         NFSUNLOCKCLSTATE();
878
879         /*
880          * If it needs a clientid, do the setclientid now.
881          */
882         if ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0) {
883                 if (!igotlock)
884                         panic("nfscl_clget");
885                 if (p == NULL || cred == NULL) {
886                         NFSLOCKCLSTATE();
887                         nfsv4_unlock(&clp->nfsc_lock, 0);
888                         NFSUNLOCKCLSTATE();
889                         return (EACCES);
890                 }
891                 /*
892                  * If RFC3530 Sec. 14.2.33 is taken literally,
893                  * NFSERR_CLIDINUSE will be returned persistently for the
894                  * case where a new mount of the same file system is using
895                  * a different principal. In practice, NFSERR_CLIDINUSE is
896                  * only returned when there is outstanding unexpired state
897                  * on the clientid. As such, try for twice the lease
898                  * interval, if we know what that is. Otherwise, make a
899                  * wild ass guess.
900                  * The case of returning NFSERR_STALECLIENTID is far less
901                  * likely, but might occur if there is a significant delay
902                  * between doing the SetClientID and SetClientIDConfirm Ops,
903                  * such that the server throws away the clientid before
904                  * receiving the SetClientIDConfirm.
905                  */
906                 if (clp->nfsc_renew > 0)
907                         clidinusedelay = NFSCL_LEASE(clp->nfsc_renew) * 2;
908                 else
909                         clidinusedelay = 120;
910                 trystalecnt = 3;
911                 do {
912                         error = nfsrpc_setclient(nmp, clp, 0, cred, p);
913                         if (error == NFSERR_STALECLIENTID ||
914                             error == NFSERR_STALEDONTRECOVER ||
915                             error == NFSERR_BADSESSION ||
916                             error == NFSERR_CLIDINUSE) {
917                                 (void) nfs_catnap(PZERO, error, "nfs_setcl");
918                         }
919                 } while (((error == NFSERR_STALECLIENTID ||
920                      error == NFSERR_BADSESSION ||
921                      error == NFSERR_STALEDONTRECOVER) && --trystalecnt > 0) ||
922                     (error == NFSERR_CLIDINUSE && --clidinusedelay > 0));
923                 if (error) {
924                         NFSLOCKCLSTATE();
925                         nfsv4_unlock(&clp->nfsc_lock, 0);
926                         NFSUNLOCKCLSTATE();
927                         return (error);
928                 }
929                 clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
930         }
931         if (igotlock) {
932                 NFSLOCKCLSTATE();
933                 nfsv4_unlock(&clp->nfsc_lock, 1);
934                 NFSUNLOCKCLSTATE();
935         }
936
937         *clpp = clp;
938         return (0);
939 }
940
941 /*
942  * Get a reference to a clientid and return it, if valid.
943  */
944 struct nfsclclient *
945 nfscl_findcl(struct nfsmount *nmp)
946 {
947         struct nfsclclient *clp;
948
949         clp = nmp->nm_clp;
950         if (clp == NULL || !(clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID))
951                 return (NULL);
952         return (clp);
953 }
954
955 /*
956  * Release the clientid structure. It may be locked or reference counted.
957  */
958 static void
959 nfscl_clrelease(struct nfsclclient *clp)
960 {
961
962         if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK)
963                 nfsv4_unlock(&clp->nfsc_lock, 0);
964         else
965                 nfsv4_relref(&clp->nfsc_lock);
966 }
967
968 /*
969  * External call for nfscl_clrelease.
970  */
971 void
972 nfscl_clientrelease(struct nfsclclient *clp)
973 {
974
975         NFSLOCKCLSTATE();
976         if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK)
977                 nfsv4_unlock(&clp->nfsc_lock, 0);
978         else
979                 nfsv4_relref(&clp->nfsc_lock);
980         NFSUNLOCKCLSTATE();
981 }
982
983 /*
984  * Called when wanting to lock a byte region.
985  */
986 int
987 nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
988     short type, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *rclp,
989     int recovery, void *id, int flags, u_int8_t *rownp, u_int8_t *ropenownp,
990     struct nfscllockowner **lpp, int *newonep, int *donelocallyp)
991 {
992         struct nfscllockowner *lp;
993         struct nfsclopen *op;
994         struct nfsclclient *clp;
995         struct nfscllockowner *nlp;
996         struct nfscllock *nlop, *otherlop;
997         struct nfscldeleg *dp = NULL, *ldp = NULL;
998         struct nfscllockownerhead *lhp = NULL;
999         struct nfsnode *np;
1000         u_int8_t own[NFSV4CL_LOCKNAMELEN], *ownp, openown[NFSV4CL_LOCKNAMELEN];
1001         u_int8_t *openownp;
1002         int error = 0, ret, donelocally = 0;
1003         u_int32_t mode;
1004
1005         /* For Lock Ops, the open mode doesn't matter, so use 0 to match any. */
1006         mode = 0;
1007         np = VTONFS(vp);
1008         *lpp = NULL;
1009         lp = NULL;
1010         *newonep = 0;
1011         *donelocallyp = 0;
1012
1013         /*
1014          * Might need these, so MALLOC them now, to
1015          * avoid a tsleep() in MALLOC later.
1016          */
1017         nlp = malloc(
1018             sizeof (struct nfscllockowner), M_NFSCLLOCKOWNER, M_WAITOK);
1019         otherlop = malloc(
1020             sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1021         nlop = malloc(
1022             sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1023         nlop->nfslo_type = type;
1024         nlop->nfslo_first = off;
1025         if (len == NFS64BITSSET) {
1026                 nlop->nfslo_end = NFS64BITSSET;
1027         } else {
1028                 nlop->nfslo_end = off + len;
1029                 if (nlop->nfslo_end <= nlop->nfslo_first)
1030                         error = NFSERR_INVAL;
1031         }
1032
1033         if (!error) {
1034                 if (recovery)
1035                         clp = rclp;
1036                 else
1037                         error = nfscl_getcl(vnode_mount(vp), cred, p, 1, true,
1038                             &clp);
1039         }
1040         if (error) {
1041                 free(nlp, M_NFSCLLOCKOWNER);
1042                 free(otherlop, M_NFSCLLOCK);
1043                 free(nlop, M_NFSCLLOCK);
1044                 return (error);
1045         }
1046
1047         op = NULL;
1048         if (recovery) {
1049                 ownp = rownp;
1050                 openownp = ropenownp;
1051         } else {
1052                 nfscl_filllockowner(id, own, flags);
1053                 ownp = own;
1054                 if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp))))
1055                         nfscl_filllockowner(NULL, openown, F_POSIX);
1056                 else
1057                         nfscl_filllockowner(p->td_proc, openown, F_POSIX);
1058                 openownp = openown;
1059         }
1060         if (!recovery) {
1061                 NFSLOCKCLSTATE();
1062                 /*
1063                  * First, search for a delegation. If one exists for this file,
1064                  * the lock can be done locally against it, so long as there
1065                  * isn't a local lock conflict.
1066                  */
1067                 ldp = dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
1068                     np->n_fhp->nfh_len);
1069                 /* Just sanity check for correct type of delegation */
1070                 if (dp != NULL && ((dp->nfsdl_flags &
1071                     (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) != 0 ||
1072                      (type == F_WRLCK &&
1073                       (dp->nfsdl_flags & NFSCLDL_WRITE) == 0)))
1074                         dp = NULL;
1075         }
1076         if (dp != NULL) {
1077                 /* Now, find an open and maybe a lockowner. */
1078                 ret = nfscl_getopen(&dp->nfsdl_owner, np->n_fhp->nfh_fh,
1079                     np->n_fhp->nfh_len, openownp, ownp, mode, NULL, &op);
1080                 if (ret)
1081                         ret = nfscl_getopen(&clp->nfsc_owner,
1082                             np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp,
1083                             ownp, mode, NULL, &op);
1084                 if (!ret) {
1085                         lhp = &dp->nfsdl_lock;
1086                         TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
1087                         TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
1088                         dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
1089                         donelocally = 1;
1090                 } else {
1091                         dp = NULL;
1092                 }
1093         }
1094         if (!donelocally) {
1095                 /*
1096                  * Get the related Open and maybe lockowner.
1097                  */
1098                 error = nfscl_getopen(&clp->nfsc_owner,
1099                     np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp,
1100                     ownp, mode, &lp, &op);
1101                 if (!error)
1102                         lhp = &op->nfso_lock;
1103         }
1104         if (!error && !recovery)
1105                 error = nfscl_localconflict(clp, np->n_fhp->nfh_fh,
1106                     np->n_fhp->nfh_len, nlop, ownp, ldp, NULL);
1107         if (error) {
1108                 if (!recovery) {
1109                         nfscl_clrelease(clp);
1110                         NFSUNLOCKCLSTATE();
1111                 }
1112                 free(nlp, M_NFSCLLOCKOWNER);
1113                 free(otherlop, M_NFSCLLOCK);
1114                 free(nlop, M_NFSCLLOCK);
1115                 return (error);
1116         }
1117
1118         /*
1119          * Ok, see if a lockowner exists and create one, as required.
1120          */
1121         if (lp == NULL)
1122                 LIST_FOREACH(lp, lhp, nfsl_list) {
1123                         if (!NFSBCMP(lp->nfsl_owner, ownp, NFSV4CL_LOCKNAMELEN))
1124                                 break;
1125                 }
1126         if (lp == NULL) {
1127                 NFSBCOPY(ownp, nlp->nfsl_owner, NFSV4CL_LOCKNAMELEN);
1128                 if (recovery)
1129                         NFSBCOPY(ropenownp, nlp->nfsl_openowner,
1130                             NFSV4CL_LOCKNAMELEN);
1131                 else
1132                         NFSBCOPY(op->nfso_own->nfsow_owner, nlp->nfsl_openowner,
1133                             NFSV4CL_LOCKNAMELEN);
1134                 nlp->nfsl_seqid = 0;
1135                 nlp->nfsl_lockflags = flags;
1136                 nlp->nfsl_inprog = NULL;
1137                 nfscl_lockinit(&nlp->nfsl_rwlock);
1138                 LIST_INIT(&nlp->nfsl_lock);
1139                 if (donelocally) {
1140                         nlp->nfsl_open = NULL;
1141                         nfsstatsv1.cllocallockowners++;
1142                 } else {
1143                         nlp->nfsl_open = op;
1144                         nfsstatsv1.cllockowners++;
1145                 }
1146                 LIST_INSERT_HEAD(lhp, nlp, nfsl_list);
1147                 lp = nlp;
1148                 nlp = NULL;
1149                 *newonep = 1;
1150         }
1151
1152         /*
1153          * Now, update the byte ranges for locks.
1154          */
1155         ret = nfscl_updatelock(lp, &nlop, &otherlop, donelocally);
1156         if (!ret)
1157                 donelocally = 1;
1158         if (donelocally) {
1159                 *donelocallyp = 1;
1160                 if (!recovery)
1161                         nfscl_clrelease(clp);
1162         } else {
1163                 /*
1164                  * Serial modifications on the lock owner for multiple threads
1165                  * for the same process using a read/write lock.
1166                  */
1167                 if (!recovery)
1168                         nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR);
1169         }
1170         if (!recovery)
1171                 NFSUNLOCKCLSTATE();
1172
1173         if (nlp)
1174                 free(nlp, M_NFSCLLOCKOWNER);
1175         if (nlop)
1176                 free(nlop, M_NFSCLLOCK);
1177         if (otherlop)
1178                 free(otherlop, M_NFSCLLOCK);
1179
1180         *lpp = lp;
1181         return (0);
1182 }
1183
1184 /*
1185  * Called to unlock a byte range, for LockU.
1186  */
1187 int
1188 nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
1189     __unused struct ucred *cred, NFSPROC_T *p, int callcnt,
1190     struct nfsclclient *clp, void *id, int flags,
1191     struct nfscllockowner **lpp, int *dorpcp)
1192 {
1193         struct nfscllockowner *lp;
1194         struct nfsclowner *owp;
1195         struct nfsclopen *op;
1196         struct nfscllock *nlop, *other_lop = NULL;
1197         struct nfscldeleg *dp;
1198         struct nfsnode *np;
1199         u_int8_t own[NFSV4CL_LOCKNAMELEN];
1200         int ret = 0, fnd;
1201
1202         np = VTONFS(vp);
1203         *lpp = NULL;
1204         *dorpcp = 0;
1205
1206         /*
1207          * Might need these, so MALLOC them now, to
1208          * avoid a tsleep() in MALLOC later.
1209          */
1210         nlop = malloc(
1211             sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1212         nlop->nfslo_type = F_UNLCK;
1213         nlop->nfslo_first = off;
1214         if (len == NFS64BITSSET) {
1215                 nlop->nfslo_end = NFS64BITSSET;
1216         } else {
1217                 nlop->nfslo_end = off + len;
1218                 if (nlop->nfslo_end <= nlop->nfslo_first) {
1219                         free(nlop, M_NFSCLLOCK);
1220                         return (NFSERR_INVAL);
1221                 }
1222         }
1223         if (callcnt == 0) {
1224                 other_lop = malloc(
1225                     sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1226                 *other_lop = *nlop;
1227         }
1228         nfscl_filllockowner(id, own, flags);
1229         dp = NULL;
1230         NFSLOCKCLSTATE();
1231         if (callcnt == 0)
1232                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
1233                     np->n_fhp->nfh_len);
1234
1235         /*
1236          * First, unlock any local regions on a delegation.
1237          */
1238         if (dp != NULL) {
1239                 /* Look for this lockowner. */
1240                 LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1241                         if (!NFSBCMP(lp->nfsl_owner, own,
1242                             NFSV4CL_LOCKNAMELEN))
1243                                 break;
1244                 }
1245                 if (lp != NULL)
1246                         /* Use other_lop, so nlop is still available */
1247                         (void)nfscl_updatelock(lp, &other_lop, NULL, 1);
1248         }
1249
1250         /*
1251          * Now, find a matching open/lockowner that hasn't already been done,
1252          * as marked by nfsl_inprog.
1253          */
1254         lp = NULL;
1255         fnd = 0;
1256         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
1257             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1258                 if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1259                     !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1260                     LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1261                         if (lp->nfsl_inprog == NULL &&
1262                             !NFSBCMP(lp->nfsl_owner, own,
1263                              NFSV4CL_LOCKNAMELEN)) {
1264                                 fnd = 1;
1265                                 break;
1266                         }
1267                     }
1268                     if (fnd)
1269                         break;
1270                 }
1271             }
1272             if (fnd)
1273                 break;
1274         }
1275
1276         if (lp != NULL) {
1277                 ret = nfscl_updatelock(lp, &nlop, NULL, 0);
1278                 if (ret)
1279                         *dorpcp = 1;
1280                 /*
1281                  * Serial modifications on the lock owner for multiple
1282                  * threads for the same process using a read/write lock.
1283                  */
1284                 lp->nfsl_inprog = p;
1285                 nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR);
1286                 *lpp = lp;
1287         }
1288         NFSUNLOCKCLSTATE();
1289         if (nlop)
1290                 free(nlop, M_NFSCLLOCK);
1291         if (other_lop)
1292                 free(other_lop, M_NFSCLLOCK);
1293         return (0);
1294 }
1295
1296 /*
1297  * Release all lockowners marked in progess for this process and file.
1298  */
1299 void
1300 nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p,
1301     void *id, int flags)
1302 {
1303         struct nfsclowner *owp;
1304         struct nfsclopen *op;
1305         struct nfscllockowner *lp;
1306         struct nfsnode *np;
1307         u_int8_t own[NFSV4CL_LOCKNAMELEN];
1308
1309         np = VTONFS(vp);
1310         nfscl_filllockowner(id, own, flags);
1311         NFSLOCKCLSTATE();
1312         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
1313             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1314                 if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1315                     !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1316                     LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1317                         if (lp->nfsl_inprog == p &&
1318                             !NFSBCMP(lp->nfsl_owner, own,
1319                             NFSV4CL_LOCKNAMELEN)) {
1320                             lp->nfsl_inprog = NULL;
1321                             nfscl_lockunlock(&lp->nfsl_rwlock);
1322                         }
1323                     }
1324                 }
1325             }
1326         }
1327         nfscl_clrelease(clp);
1328         NFSUNLOCKCLSTATE();
1329 }
1330
1331 /*
1332  * Called to find out if any bytes within the byte range specified are
1333  * write locked by the calling process. Used to determine if flushing
1334  * is required before a LockU.
1335  * If in doubt, return 1, so the flush will occur.
1336  */
1337 int
1338 nfscl_checkwritelocked(vnode_t vp, struct flock *fl,
1339     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
1340 {
1341         struct nfsclowner *owp;
1342         struct nfscllockowner *lp;
1343         struct nfsclopen *op;
1344         struct nfsclclient *clp;
1345         struct nfscllock *lop;
1346         struct nfscldeleg *dp;
1347         struct nfsnode *np;
1348         u_int64_t off, end;
1349         u_int8_t own[NFSV4CL_LOCKNAMELEN];
1350         int error = 0;
1351
1352         np = VTONFS(vp);
1353         switch (fl->l_whence) {
1354         case SEEK_SET:
1355         case SEEK_CUR:
1356                 /*
1357                  * Caller is responsible for adding any necessary offset
1358                  * when SEEK_CUR is used.
1359                  */
1360                 off = fl->l_start;
1361                 break;
1362         case SEEK_END:
1363                 off = np->n_size + fl->l_start;
1364                 break;
1365         default:
1366                 return (1);
1367         }
1368         if (fl->l_len != 0) {
1369                 end = off + fl->l_len;
1370                 if (end < off)
1371                         return (1);
1372         } else {
1373                 end = NFS64BITSSET;
1374         }
1375
1376         error = nfscl_getcl(vnode_mount(vp), cred, p, 1, true, &clp);
1377         if (error)
1378                 return (1);
1379         nfscl_filllockowner(id, own, flags);
1380         NFSLOCKCLSTATE();
1381
1382         /*
1383          * First check the delegation locks.
1384          */
1385         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
1386         if (dp != NULL) {
1387                 LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1388                         if (!NFSBCMP(lp->nfsl_owner, own,
1389                             NFSV4CL_LOCKNAMELEN))
1390                                 break;
1391                 }
1392                 if (lp != NULL) {
1393                         LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
1394                                 if (lop->nfslo_first >= end)
1395                                         break;
1396                                 if (lop->nfslo_end <= off)
1397                                         continue;
1398                                 if (lop->nfslo_type == F_WRLCK) {
1399                                         nfscl_clrelease(clp);
1400                                         NFSUNLOCKCLSTATE();
1401                                         return (1);
1402                                 }
1403                         }
1404                 }
1405         }
1406
1407         /*
1408          * Now, check state against the server.
1409          */
1410         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
1411             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1412                 if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1413                     !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1414                     LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1415                         if (!NFSBCMP(lp->nfsl_owner, own,
1416                             NFSV4CL_LOCKNAMELEN))
1417                             break;
1418                     }
1419                     if (lp != NULL) {
1420                         LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
1421                             if (lop->nfslo_first >= end)
1422                                 break;
1423                             if (lop->nfslo_end <= off)
1424                                 continue;
1425                             if (lop->nfslo_type == F_WRLCK) {
1426                                 nfscl_clrelease(clp);
1427                                 NFSUNLOCKCLSTATE();
1428                                 return (1);
1429                             }
1430                         }
1431                     }
1432                 }
1433             }
1434         }
1435         nfscl_clrelease(clp);
1436         NFSUNLOCKCLSTATE();
1437         return (0);
1438 }
1439
1440 /*
1441  * Release a byte range lock owner structure.
1442  */
1443 void
1444 nfscl_lockrelease(struct nfscllockowner *lp, int error, int candelete)
1445 {
1446         struct nfsclclient *clp;
1447
1448         if (lp == NULL)
1449                 return;
1450         NFSLOCKCLSTATE();
1451         clp = lp->nfsl_open->nfso_own->nfsow_clp;
1452         if (error != 0 && candelete &&
1453             (lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED) == 0)
1454                 nfscl_freelockowner(lp, 0);
1455         else
1456                 nfscl_lockunlock(&lp->nfsl_rwlock);
1457         nfscl_clrelease(clp);
1458         NFSUNLOCKCLSTATE();
1459 }
1460
1461 /*
1462  * Free up an open structure and any associated byte range lock structures.
1463  */
1464 void
1465 nfscl_freeopen(struct nfsclopen *op, int local)
1466 {
1467
1468         LIST_REMOVE(op, nfso_list);
1469         nfscl_freealllocks(&op->nfso_lock, local);
1470         free(op, M_NFSCLOPEN);
1471         if (local)
1472                 nfsstatsv1.cllocalopens--;
1473         else
1474                 nfsstatsv1.clopens--;
1475 }
1476
1477 /*
1478  * Free up all lock owners and associated locks.
1479  */
1480 static void
1481 nfscl_freealllocks(struct nfscllockownerhead *lhp, int local)
1482 {
1483         struct nfscllockowner *lp, *nlp;
1484
1485         LIST_FOREACH_SAFE(lp, lhp, nfsl_list, nlp) {
1486                 if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED))
1487                         panic("nfscllckw");
1488                 nfscl_freelockowner(lp, local);
1489         }
1490 }
1491
1492 /*
1493  * Called for an Open when NFSERR_EXPIRED is received from the server.
1494  * If there are no byte range locks nor a Share Deny lost, try to do a
1495  * fresh Open. Otherwise, free the open.
1496  */
1497 static int
1498 nfscl_expireopen(struct nfsclclient *clp, struct nfsclopen *op,
1499     struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
1500 {
1501         struct nfscllockowner *lp;
1502         struct nfscldeleg *dp;
1503         int mustdelete = 0, error;
1504
1505         /*
1506          * Look for any byte range lock(s).
1507          */
1508         LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1509                 if (!LIST_EMPTY(&lp->nfsl_lock)) {
1510                         mustdelete = 1;
1511                         break;
1512                 }
1513         }
1514
1515         /*
1516          * If no byte range lock(s) nor a Share deny, try to re-open.
1517          */
1518         if (!mustdelete && (op->nfso_mode & NFSLCK_DENYBITS) == 0) {
1519                 newnfs_copycred(&op->nfso_cred, cred);
1520                 dp = NULL;
1521                 error = nfsrpc_reopen(nmp, op->nfso_fh,
1522                     op->nfso_fhlen, op->nfso_mode, op, &dp, cred, p);
1523                 if (error) {
1524                         mustdelete = 1;
1525                         if (dp != NULL) {
1526                                 free(dp, M_NFSCLDELEG);
1527                                 dp = NULL;
1528                         }
1529                 }
1530                 if (dp != NULL)
1531                         nfscl_deleg(nmp->nm_mountp, clp, op->nfso_fh,
1532                             op->nfso_fhlen, cred, p, &dp);
1533         }
1534
1535         /*
1536          * If a byte range lock or Share deny or couldn't re-open, free it.
1537          */
1538         if (mustdelete)
1539                 nfscl_freeopen(op, 0);
1540         return (mustdelete);
1541 }
1542
1543 /*
1544  * Free up an open owner structure.
1545  */
1546 static void
1547 nfscl_freeopenowner(struct nfsclowner *owp, int local)
1548 {
1549
1550         LIST_REMOVE(owp, nfsow_list);
1551         free(owp, M_NFSCLOWNER);
1552         if (local)
1553                 nfsstatsv1.cllocalopenowners--;
1554         else
1555                 nfsstatsv1.clopenowners--;
1556 }
1557
1558 /*
1559  * Free up a byte range lock owner structure.
1560  */
1561 void
1562 nfscl_freelockowner(struct nfscllockowner *lp, int local)
1563 {
1564         struct nfscllock *lop, *nlop;
1565
1566         LIST_REMOVE(lp, nfsl_list);
1567         LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
1568                 nfscl_freelock(lop, local);
1569         }
1570         free(lp, M_NFSCLLOCKOWNER);
1571         if (local)
1572                 nfsstatsv1.cllocallockowners--;
1573         else
1574                 nfsstatsv1.cllockowners--;
1575 }
1576
1577 /*
1578  * Free up a byte range lock structure.
1579  */
1580 void
1581 nfscl_freelock(struct nfscllock *lop, int local)
1582 {
1583
1584         LIST_REMOVE(lop, nfslo_list);
1585         free(lop, M_NFSCLLOCK);
1586         if (local)
1587                 nfsstatsv1.cllocallocks--;
1588         else
1589                 nfsstatsv1.cllocks--;
1590 }
1591
1592 /*
1593  * Clean out the state related to a delegation.
1594  */
1595 static void
1596 nfscl_cleandeleg(struct nfscldeleg *dp)
1597 {
1598         struct nfsclowner *owp, *nowp;
1599         struct nfsclopen *op;
1600
1601         LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) {
1602                 op = LIST_FIRST(&owp->nfsow_open);
1603                 if (op != NULL) {
1604                         if (LIST_NEXT(op, nfso_list) != NULL)
1605                                 panic("nfscleandel");
1606                         nfscl_freeopen(op, 1);
1607                 }
1608                 nfscl_freeopenowner(owp, 1);
1609         }
1610         nfscl_freealllocks(&dp->nfsdl_lock, 1);
1611 }
1612
1613 /*
1614  * Free a delegation.
1615  */
1616 static void
1617 nfscl_freedeleg(struct nfscldeleghead *hdp, struct nfscldeleg *dp, bool freeit)
1618 {
1619
1620         TAILQ_REMOVE(hdp, dp, nfsdl_list);
1621         LIST_REMOVE(dp, nfsdl_hash);
1622         if (freeit)
1623                 free(dp, M_NFSCLDELEG);
1624         nfsstatsv1.cldelegates--;
1625         nfscl_delegcnt--;
1626 }
1627
1628 /*
1629  * Free up all state related to this client structure.
1630  */
1631 static void
1632 nfscl_cleanclient(struct nfsclclient *clp)
1633 {
1634         struct nfsclowner *owp, *nowp;
1635         struct nfsclopen *op, *nop;
1636         struct nfscllayout *lyp, *nlyp;
1637         struct nfscldevinfo *dip, *ndip;
1638
1639         TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
1640                 nfscl_freelayout(lyp);
1641
1642         LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip)
1643                 nfscl_freedevinfo(dip);
1644
1645         /* Now, all the OpenOwners, etc. */
1646         LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1647                 LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
1648                         nfscl_freeopen(op, 0);
1649                 }
1650                 nfscl_freeopenowner(owp, 0);
1651         }
1652 }
1653
1654 /*
1655  * Called when an NFSERR_EXPIRED is received from the server.
1656  */
1657 static void
1658 nfscl_expireclient(struct nfsclclient *clp, struct nfsmount *nmp,
1659     struct ucred *cred, NFSPROC_T *p)
1660 {
1661         struct nfsclowner *owp, *nowp, *towp;
1662         struct nfsclopen *op, *nop, *top;
1663         struct nfscldeleg *dp, *ndp;
1664         int ret, printed = 0;
1665
1666         /*
1667          * First, merge locally issued Opens into the list for the server.
1668          */
1669         dp = TAILQ_FIRST(&clp->nfsc_deleg);
1670         while (dp != NULL) {
1671             ndp = TAILQ_NEXT(dp, nfsdl_list);
1672             owp = LIST_FIRST(&dp->nfsdl_owner);
1673             while (owp != NULL) {
1674                 nowp = LIST_NEXT(owp, nfsow_list);
1675                 op = LIST_FIRST(&owp->nfsow_open);
1676                 if (op != NULL) {
1677                     if (LIST_NEXT(op, nfso_list) != NULL)
1678                         panic("nfsclexp");
1679                     LIST_FOREACH(towp, &clp->nfsc_owner, nfsow_list) {
1680                         if (!NFSBCMP(towp->nfsow_owner, owp->nfsow_owner,
1681                             NFSV4CL_LOCKNAMELEN))
1682                             break;
1683                     }
1684                     if (towp != NULL) {
1685                         /* Merge opens in */
1686                         LIST_FOREACH(top, &towp->nfsow_open, nfso_list) {
1687                             if (top->nfso_fhlen == op->nfso_fhlen &&
1688                                 !NFSBCMP(top->nfso_fh, op->nfso_fh,
1689                                  op->nfso_fhlen)) {
1690                                 top->nfso_mode |= op->nfso_mode;
1691                                 top->nfso_opencnt += op->nfso_opencnt;
1692                                 break;
1693                             }
1694                         }
1695                         if (top == NULL) {
1696                             /* Just add the open to the owner list */
1697                             LIST_REMOVE(op, nfso_list);
1698                             op->nfso_own = towp;
1699                             LIST_INSERT_HEAD(&towp->nfsow_open, op, nfso_list);
1700                             nfsstatsv1.cllocalopens--;
1701                             nfsstatsv1.clopens++;
1702                         }
1703                     } else {
1704                         /* Just add the openowner to the client list */
1705                         LIST_REMOVE(owp, nfsow_list);
1706                         owp->nfsow_clp = clp;
1707                         LIST_INSERT_HEAD(&clp->nfsc_owner, owp, nfsow_list);
1708                         nfsstatsv1.cllocalopenowners--;
1709                         nfsstatsv1.clopenowners++;
1710                         nfsstatsv1.cllocalopens--;
1711                         nfsstatsv1.clopens++;
1712                     }
1713                 }
1714                 owp = nowp;
1715             }
1716             if (!printed && !LIST_EMPTY(&dp->nfsdl_lock)) {
1717                 printed = 1;
1718                 printf("nfsv4 expired locks lost\n");
1719             }
1720             nfscl_cleandeleg(dp);
1721             nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
1722             dp = ndp;
1723         }
1724         if (!TAILQ_EMPTY(&clp->nfsc_deleg))
1725             panic("nfsclexp");
1726
1727         /*
1728          * Now, try and reopen against the server.
1729          */
1730         LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1731                 owp->nfsow_seqid = 0;
1732                 LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
1733                         ret = nfscl_expireopen(clp, op, nmp, cred, p);
1734                         if (ret && !printed) {
1735                                 printed = 1;
1736                                 printf("nfsv4 expired locks lost\n");
1737                         }
1738                 }
1739                 if (LIST_EMPTY(&owp->nfsow_open))
1740                         nfscl_freeopenowner(owp, 0);
1741         }
1742 }
1743
1744 /*
1745  * This function must be called after the process represented by "own" has
1746  * exited. Must be called with CLSTATE lock held.
1747  */
1748 static void
1749 nfscl_cleanup_common(struct nfsclclient *clp, u_int8_t *own)
1750 {
1751         struct nfsclowner *owp, *nowp;
1752         struct nfscllockowner *lp, *nlp;
1753         struct nfscldeleg *dp;
1754
1755         /* First, get rid of local locks on delegations. */
1756         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
1757                 LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
1758                     if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) {
1759                         if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED))
1760                             panic("nfscllckw");
1761                         nfscl_freelockowner(lp, 1);
1762                     }
1763                 }
1764         }
1765         owp = LIST_FIRST(&clp->nfsc_owner);
1766         while (owp != NULL) {
1767                 nowp = LIST_NEXT(owp, nfsow_list);
1768                 if (!NFSBCMP(owp->nfsow_owner, own,
1769                     NFSV4CL_LOCKNAMELEN)) {
1770                         /*
1771                          * If there are children that haven't closed the
1772                          * file descriptors yet, the opens will still be
1773                          * here. For that case, let the renew thread clear
1774                          * out the OpenOwner later.
1775                          */
1776                         if (LIST_EMPTY(&owp->nfsow_open))
1777                                 nfscl_freeopenowner(owp, 0);
1778                         else
1779                                 owp->nfsow_defunct = 1;
1780                 }
1781                 owp = nowp;
1782         }
1783 }
1784
1785 /*
1786  * Find open/lock owners for processes that have exited.
1787  */
1788 static void
1789 nfscl_cleanupkext(struct nfsclclient *clp, struct nfscllockownerfhhead *lhp)
1790 {
1791         struct nfsclowner *owp, *nowp;
1792         struct nfsclopen *op;
1793         struct nfscllockowner *lp, *nlp;
1794         struct nfscldeleg *dp;
1795
1796         NFSPROCLISTLOCK();
1797         NFSLOCKCLSTATE();
1798         LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1799                 LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1800                         LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp) {
1801                                 if (LIST_EMPTY(&lp->nfsl_lock))
1802                                         nfscl_emptylockowner(lp, lhp);
1803                         }
1804                 }
1805                 if (nfscl_procdoesntexist(owp->nfsow_owner))
1806                         nfscl_cleanup_common(clp, owp->nfsow_owner);
1807         }
1808
1809         /*
1810          * For the single open_owner case, these lock owners need to be
1811          * checked to see if they still exist separately.
1812          * This is because nfscl_procdoesntexist() never returns true for
1813          * the single open_owner so that the above doesn't ever call
1814          * nfscl_cleanup_common().
1815          */
1816         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
1817                 LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
1818                         if (nfscl_procdoesntexist(lp->nfsl_owner))
1819                                 nfscl_cleanup_common(clp, lp->nfsl_owner);
1820                 }
1821         }
1822         NFSUNLOCKCLSTATE();
1823         NFSPROCLISTUNLOCK();
1824 }
1825
1826 /*
1827  * Take the empty lock owner and move it to the local lhp list if the
1828  * associated process no longer exists.
1829  */
1830 static void
1831 nfscl_emptylockowner(struct nfscllockowner *lp,
1832     struct nfscllockownerfhhead *lhp)
1833 {
1834         struct nfscllockownerfh *lfhp, *mylfhp;
1835         struct nfscllockowner *nlp;
1836         int fnd_it;
1837
1838         /* If not a Posix lock owner, just return. */
1839         if ((lp->nfsl_lockflags & F_POSIX) == 0)
1840                 return;
1841
1842         fnd_it = 0;
1843         mylfhp = NULL;
1844         /*
1845          * First, search to see if this lock owner is already in the list.
1846          * If it is, then the associated process no longer exists.
1847          */
1848         SLIST_FOREACH(lfhp, lhp, nfslfh_list) {
1849                 if (lfhp->nfslfh_len == lp->nfsl_open->nfso_fhlen &&
1850                     !NFSBCMP(lfhp->nfslfh_fh, lp->nfsl_open->nfso_fh,
1851                     lfhp->nfslfh_len))
1852                         mylfhp = lfhp;
1853                 LIST_FOREACH(nlp, &lfhp->nfslfh_lock, nfsl_list)
1854                         if (!NFSBCMP(nlp->nfsl_owner, lp->nfsl_owner,
1855                             NFSV4CL_LOCKNAMELEN))
1856                                 fnd_it = 1;
1857         }
1858         /* If not found, check if process still exists. */
1859         if (fnd_it == 0 && nfscl_procdoesntexist(lp->nfsl_owner) == 0)
1860                 return;
1861
1862         /* Move the lock owner over to the local list. */
1863         if (mylfhp == NULL) {
1864                 mylfhp = malloc(sizeof(struct nfscllockownerfh), M_TEMP,
1865                     M_NOWAIT);
1866                 if (mylfhp == NULL)
1867                         return;
1868                 mylfhp->nfslfh_len = lp->nfsl_open->nfso_fhlen;
1869                 NFSBCOPY(lp->nfsl_open->nfso_fh, mylfhp->nfslfh_fh,
1870                     mylfhp->nfslfh_len);
1871                 LIST_INIT(&mylfhp->nfslfh_lock);
1872                 SLIST_INSERT_HEAD(lhp, mylfhp, nfslfh_list);
1873         }
1874         LIST_REMOVE(lp, nfsl_list);
1875         LIST_INSERT_HEAD(&mylfhp->nfslfh_lock, lp, nfsl_list);
1876 }
1877
1878 static int      fake_global;    /* Used to force visibility of MNTK_UNMOUNTF */
1879 /*
1880  * Called from nfs umount to free up the clientid.
1881  */
1882 void
1883 nfscl_umount(struct nfsmount *nmp, NFSPROC_T *p)
1884 {
1885         struct nfsclclient *clp;
1886         struct ucred *cred;
1887         int igotlock;
1888
1889         /*
1890          * For the case that matters, this is the thread that set
1891          * MNTK_UNMOUNTF, so it will see it set. The code that follows is
1892          * done to ensure that any thread executing nfscl_getcl() after
1893          * this time, will see MNTK_UNMOUNTF set. nfscl_getcl() uses the
1894          * mutex for NFSLOCKCLSTATE(), so it is "m" for the following
1895          * explanation, courtesy of Alan Cox.
1896          * What follows is a snippet from Alan Cox's email at:
1897          * https://docs.FreeBSD.org/cgi/mid.cgi?BANLkTikR3d65zPHo9==08ZfJ2vmqZucEvw
1898          * 
1899          * 1. Set MNTK_UNMOUNTF
1900          * 2. Acquire a standard FreeBSD mutex "m".
1901          * 3. Update some data structures.
1902          * 4. Release mutex "m".
1903          * 
1904          * Then, other threads that acquire "m" after step 4 has occurred will
1905          * see MNTK_UNMOUNTF as set.  But, other threads that beat thread X to
1906          * step 2 may or may not see MNTK_UNMOUNTF as set.
1907          */
1908         NFSLOCKCLSTATE();
1909         if ((nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1910                 fake_global++;
1911                 NFSUNLOCKCLSTATE();
1912                 NFSLOCKCLSTATE();
1913         }
1914
1915         clp = nmp->nm_clp;
1916         if (clp != NULL) {
1917                 if ((clp->nfsc_flags & NFSCLFLAGS_INITED) == 0)
1918                         panic("nfscl umount");
1919         
1920                 /*
1921                  * First, handshake with the nfscl renew thread, to terminate
1922                  * it.
1923                  */
1924                 clp->nfsc_flags |= NFSCLFLAGS_UMOUNT;
1925                 while (clp->nfsc_flags & NFSCLFLAGS_HASTHREAD)
1926                         (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT,
1927                             "nfsclumnt", hz);
1928         
1929                 /*
1930                  * Now, get the exclusive lock on the client state, so
1931                  * that no uses of the state are still in progress.
1932                  */
1933                 do {
1934                         igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
1935                             NFSCLSTATEMUTEXPTR, NULL);
1936                 } while (!igotlock);
1937                 NFSUNLOCKCLSTATE();
1938         
1939                 /*
1940                  * Free up all the state. It will expire on the server, but
1941                  * maybe we should do a SetClientId/SetClientIdConfirm so
1942                  * the server throws it away?
1943                  */
1944                 LIST_REMOVE(clp, nfsc_list);
1945                 nfscl_delegreturnall(clp, p);
1946                 cred = newnfs_getcred();
1947                 if (NFSHASNFSV4N(nmp)) {
1948                         (void)nfsrpc_destroysession(nmp, clp, cred, p);
1949                         (void)nfsrpc_destroyclient(nmp, clp, cred, p);
1950                 } else
1951                         (void)nfsrpc_setclient(nmp, clp, 0, cred, p);
1952                 nfscl_cleanclient(clp);
1953                 nmp->nm_clp = NULL;
1954                 NFSFREECRED(cred);
1955                 free(clp, M_NFSCLCLIENT);
1956         } else
1957                 NFSUNLOCKCLSTATE();
1958 }
1959
1960 /*
1961  * This function is called when a server replies with NFSERR_STALECLIENTID
1962  * NFSERR_STALESTATEID or NFSERR_BADSESSION. It traverses the clientid lists,
1963  * doing Opens and Locks with reclaim. If these fail, it deletes the
1964  * corresponding state.
1965  */
1966 static void
1967 nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p)
1968 {
1969         struct nfsclowner *owp, *nowp;
1970         struct nfsclopen *op, *nop;
1971         struct nfscllockowner *lp, *nlp;
1972         struct nfscllock *lop, *nlop;
1973         struct nfscldeleg *dp, *ndp, *tdp;
1974         struct nfsmount *nmp;
1975         struct ucred *tcred;
1976         struct nfsclopenhead extra_open;
1977         struct nfscldeleghead extra_deleg;
1978         struct nfsreq *rep;
1979         u_int64_t len;
1980         u_int32_t delegtype = NFSV4OPEN_DELEGATEWRITE, mode;
1981         int i, igotlock = 0, error, trycnt, firstlock;
1982         struct nfscllayout *lyp, *nlyp;
1983         bool recovered_one;
1984
1985         /*
1986          * First, lock the client structure, so everyone else will
1987          * block when trying to use state.
1988          */
1989         NFSLOCKCLSTATE();
1990         clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG;
1991         do {
1992                 igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
1993                     NFSCLSTATEMUTEXPTR, NULL);
1994         } while (!igotlock);
1995         NFSUNLOCKCLSTATE();
1996
1997         nmp = clp->nfsc_nmp;
1998         if (nmp == NULL)
1999                 panic("nfscl recover");
2000
2001         /*
2002          * For now, just get rid of all layouts. There may be a need
2003          * to do LayoutCommit Ops with reclaim == true later.
2004          */
2005         TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
2006                 nfscl_freelayout(lyp);
2007         TAILQ_INIT(&clp->nfsc_layout);
2008         for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
2009                 LIST_INIT(&clp->nfsc_layouthash[i]);
2010
2011         trycnt = 5;
2012         do {
2013                 error = nfsrpc_setclient(nmp, clp, 1, cred, p);
2014         } while ((error == NFSERR_STALECLIENTID ||
2015              error == NFSERR_BADSESSION ||
2016              error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
2017         if (error) {
2018                 NFSLOCKCLSTATE();
2019                 clp->nfsc_flags &= ~(NFSCLFLAGS_RECOVER |
2020                     NFSCLFLAGS_RECVRINPROG);
2021                 wakeup(&clp->nfsc_flags);
2022                 nfsv4_unlock(&clp->nfsc_lock, 0);
2023                 NFSUNLOCKCLSTATE();
2024                 return;
2025         }
2026         clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
2027         clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2028
2029         /*
2030          * Mark requests already queued on the server, so that they don't
2031          * initiate another recovery cycle. Any requests already in the
2032          * queue that handle state information will have the old stale
2033          * clientid/stateid and will get a NFSERR_STALESTATEID,
2034          * NFSERR_STALECLIENTID or NFSERR_BADSESSION reply from the server.
2035          * This will be translated to NFSERR_STALEDONTRECOVER when
2036          * R_DONTRECOVER is set.
2037          */
2038         NFSLOCKREQ();
2039         TAILQ_FOREACH(rep, &nfsd_reqq, r_chain) {
2040                 if (rep->r_nmp == nmp)
2041                         rep->r_flags |= R_DONTRECOVER;
2042         }
2043         NFSUNLOCKREQ();
2044
2045         /*
2046          * Now, mark all delegations "need reclaim".
2047          */
2048         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list)
2049                 dp->nfsdl_flags |= NFSCLDL_NEEDRECLAIM;
2050
2051         TAILQ_INIT(&extra_deleg);
2052         LIST_INIT(&extra_open);
2053         /*
2054          * Now traverse the state lists, doing Open and Lock Reclaims.
2055          */
2056         tcred = newnfs_getcred();
2057         recovered_one = false;
2058         owp = LIST_FIRST(&clp->nfsc_owner);
2059         while (owp != NULL) {
2060             nowp = LIST_NEXT(owp, nfsow_list);
2061             owp->nfsow_seqid = 0;
2062             op = LIST_FIRST(&owp->nfsow_open);
2063             while (op != NULL) {
2064                 nop = LIST_NEXT(op, nfso_list);
2065                 if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
2066                     /* Search for a delegation to reclaim with the open */
2067                     TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2068                         if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
2069                             continue;
2070                         if ((dp->nfsdl_flags & NFSCLDL_WRITE)) {
2071                             mode = NFSV4OPEN_ACCESSWRITE;
2072                             delegtype = NFSV4OPEN_DELEGATEWRITE;
2073                         } else {
2074                             mode = NFSV4OPEN_ACCESSREAD;
2075                             delegtype = NFSV4OPEN_DELEGATEREAD;
2076                         }
2077                         if ((op->nfso_mode & mode) == mode &&
2078                             op->nfso_fhlen == dp->nfsdl_fhlen &&
2079                             !NFSBCMP(op->nfso_fh, dp->nfsdl_fh, op->nfso_fhlen))
2080                             break;
2081                     }
2082                     ndp = dp;
2083                     if (dp == NULL)
2084                         delegtype = NFSV4OPEN_DELEGATENONE;
2085                     newnfs_copycred(&op->nfso_cred, tcred);
2086                     error = nfscl_tryopen(nmp, NULL, op->nfso_fh,
2087                         op->nfso_fhlen, op->nfso_fh, op->nfso_fhlen,
2088                         op->nfso_mode, op, NULL, 0, &ndp, 1, delegtype,
2089                         tcred, p);
2090                     if (!error) {
2091                         recovered_one = true;
2092                         /* Handle any replied delegation */
2093                         if (ndp != NULL && ((ndp->nfsdl_flags & NFSCLDL_WRITE)
2094                             || NFSMNT_RDONLY(nmp->nm_mountp))) {
2095                             if ((ndp->nfsdl_flags & NFSCLDL_WRITE))
2096                                 mode = NFSV4OPEN_ACCESSWRITE;
2097                             else
2098                                 mode = NFSV4OPEN_ACCESSREAD;
2099                             TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2100                                 if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
2101                                     continue;
2102                                 if ((op->nfso_mode & mode) == mode &&
2103                                     op->nfso_fhlen == dp->nfsdl_fhlen &&
2104                                     !NFSBCMP(op->nfso_fh, dp->nfsdl_fh,
2105                                     op->nfso_fhlen)) {
2106                                     dp->nfsdl_stateid = ndp->nfsdl_stateid;
2107                                     dp->nfsdl_sizelimit = ndp->nfsdl_sizelimit;
2108                                     dp->nfsdl_ace = ndp->nfsdl_ace;
2109                                     dp->nfsdl_change = ndp->nfsdl_change;
2110                                     dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM;
2111                                     if ((ndp->nfsdl_flags & NFSCLDL_RECALL))
2112                                         dp->nfsdl_flags |= NFSCLDL_RECALL;
2113                                     free(ndp, M_NFSCLDELEG);
2114                                     ndp = NULL;
2115                                     break;
2116                                 }
2117                             }
2118                         }
2119                         if (ndp != NULL)
2120                             TAILQ_INSERT_HEAD(&extra_deleg, ndp, nfsdl_list);
2121
2122                         /* and reclaim all byte range locks */
2123                         lp = LIST_FIRST(&op->nfso_lock);
2124                         while (lp != NULL) {
2125                             nlp = LIST_NEXT(lp, nfsl_list);
2126                             lp->nfsl_seqid = 0;
2127                             firstlock = 1;
2128                             lop = LIST_FIRST(&lp->nfsl_lock);
2129                             while (lop != NULL) {
2130                                 nlop = LIST_NEXT(lop, nfslo_list);
2131                                 if (lop->nfslo_end == NFS64BITSSET)
2132                                     len = NFS64BITSSET;
2133                                 else
2134                                     len = lop->nfslo_end - lop->nfslo_first;
2135                                 error = nfscl_trylock(nmp, NULL,
2136                                     op->nfso_fh, op->nfso_fhlen, lp,
2137                                     firstlock, 1, lop->nfslo_first, len,
2138                                     lop->nfslo_type, tcred, p);
2139                                 if (error != 0)
2140                                     nfscl_freelock(lop, 0);
2141                                 else
2142                                     firstlock = 0;
2143                                 lop = nlop;
2144                             }
2145                             /* If no locks, but a lockowner, just delete it. */
2146                             if (LIST_EMPTY(&lp->nfsl_lock))
2147                                 nfscl_freelockowner(lp, 0);
2148                             lp = nlp;
2149                         }
2150                     } else if (error == NFSERR_NOGRACE && !recovered_one &&
2151                         NFSHASNFSV4N(nmp)) {
2152                         /*
2153                          * For NFSv4.1/4.2, the NFSERR_EXPIRED case will
2154                          * actually end up here, since the client will do
2155                          * a recovery for NFSERR_BADSESSION, but will get
2156                          * an NFSERR_NOGRACE reply for the first "reclaim"
2157                          * attempt.
2158                          * So, call nfscl_expireclient() to recover the
2159                          * opens as best we can and then do a reclaim
2160                          * complete and return.
2161                          */
2162                         nfsrpc_reclaimcomplete(nmp, cred, p);
2163                         nfscl_expireclient(clp, nmp, tcred, p);
2164                         goto out;
2165                     }
2166                 }
2167                 if (error != 0 && error != NFSERR_BADSESSION)
2168                     nfscl_freeopen(op, 0);
2169                 op = nop;
2170             }
2171             owp = nowp;
2172         }
2173
2174         /*
2175          * Now, try and get any delegations not yet reclaimed by cobbling
2176          * to-gether an appropriate open.
2177          */
2178         nowp = NULL;
2179         dp = TAILQ_FIRST(&clp->nfsc_deleg);
2180         while (dp != NULL) {
2181             ndp = TAILQ_NEXT(dp, nfsdl_list);
2182             if ((dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) {
2183                 if (nowp == NULL) {
2184                     nowp = malloc(
2185                         sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK);
2186                     /*
2187                      * Name must be as long an largest possible
2188                      * NFSV4CL_LOCKNAMELEN. 12 for now.
2189                      */
2190                     NFSBCOPY("RECLAIMDELEG", nowp->nfsow_owner,
2191                         NFSV4CL_LOCKNAMELEN);
2192                     LIST_INIT(&nowp->nfsow_open);
2193                     nowp->nfsow_clp = clp;
2194                     nowp->nfsow_seqid = 0;
2195                     nowp->nfsow_defunct = 0;
2196                     nfscl_lockinit(&nowp->nfsow_rwlock);
2197                 }
2198                 nop = NULL;
2199                 if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
2200                     nop = malloc(sizeof (struct nfsclopen) +
2201                         dp->nfsdl_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
2202                     nop->nfso_own = nowp;
2203                     if ((dp->nfsdl_flags & NFSCLDL_WRITE)) {
2204                         nop->nfso_mode = NFSV4OPEN_ACCESSWRITE;
2205                         delegtype = NFSV4OPEN_DELEGATEWRITE;
2206                     } else {
2207                         nop->nfso_mode = NFSV4OPEN_ACCESSREAD;
2208                         delegtype = NFSV4OPEN_DELEGATEREAD;
2209                     }
2210                     nop->nfso_opencnt = 0;
2211                     nop->nfso_posixlock = 1;
2212                     nop->nfso_fhlen = dp->nfsdl_fhlen;
2213                     NFSBCOPY(dp->nfsdl_fh, nop->nfso_fh, dp->nfsdl_fhlen);
2214                     LIST_INIT(&nop->nfso_lock);
2215                     nop->nfso_stateid.seqid = 0;
2216                     nop->nfso_stateid.other[0] = 0;
2217                     nop->nfso_stateid.other[1] = 0;
2218                     nop->nfso_stateid.other[2] = 0;
2219                     newnfs_copycred(&dp->nfsdl_cred, tcred);
2220                     newnfs_copyincred(tcred, &nop->nfso_cred);
2221                     tdp = NULL;
2222                     error = nfscl_tryopen(nmp, NULL, nop->nfso_fh,
2223                         nop->nfso_fhlen, nop->nfso_fh, nop->nfso_fhlen,
2224                         nop->nfso_mode, nop, NULL, 0, &tdp, 1,
2225                         delegtype, tcred, p);
2226                     if (tdp != NULL) {
2227                         if ((tdp->nfsdl_flags & NFSCLDL_WRITE))
2228                             mode = NFSV4OPEN_ACCESSWRITE;
2229                         else
2230                             mode = NFSV4OPEN_ACCESSREAD;
2231                         if ((nop->nfso_mode & mode) == mode &&
2232                             nop->nfso_fhlen == tdp->nfsdl_fhlen &&
2233                             !NFSBCMP(nop->nfso_fh, tdp->nfsdl_fh,
2234                             nop->nfso_fhlen)) {
2235                             dp->nfsdl_stateid = tdp->nfsdl_stateid;
2236                             dp->nfsdl_sizelimit = tdp->nfsdl_sizelimit;
2237                             dp->nfsdl_ace = tdp->nfsdl_ace;
2238                             dp->nfsdl_change = tdp->nfsdl_change;
2239                             dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM;
2240                             if ((tdp->nfsdl_flags & NFSCLDL_RECALL))
2241                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
2242                             free(tdp, M_NFSCLDELEG);
2243                         } else {
2244                             TAILQ_INSERT_HEAD(&extra_deleg, tdp, nfsdl_list);
2245                         }
2246                     }
2247                 }
2248                 if (error) {
2249                     if (nop != NULL)
2250                         free(nop, M_NFSCLOPEN);
2251                     if (error == NFSERR_NOGRACE && !recovered_one &&
2252                         NFSHASNFSV4N(nmp)) {
2253                         /*
2254                          * For NFSv4.1/4.2, the NFSERR_EXPIRED case will
2255                          * actually end up here, since the client will do
2256                          * a recovery for NFSERR_BADSESSION, but will get
2257                          * an NFSERR_NOGRACE reply for the first "reclaim"
2258                          * attempt.
2259                          * So, call nfscl_expireclient() to recover the
2260                          * opens as best we can and then do a reclaim
2261                          * complete and return.
2262                          */
2263                         nfsrpc_reclaimcomplete(nmp, cred, p);
2264                         nfscl_expireclient(clp, nmp, tcred, p);
2265                         free(nowp, M_NFSCLOWNER);
2266                         goto out;
2267                     }
2268                     /*
2269                      * Couldn't reclaim it, so throw the state
2270                      * away. Ouch!!
2271                      */
2272                     nfscl_cleandeleg(dp);
2273                     nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
2274                 } else {
2275                     recovered_one = true;
2276                     LIST_INSERT_HEAD(&extra_open, nop, nfso_list);
2277                 }
2278             }
2279             dp = ndp;
2280         }
2281
2282         /*
2283          * Now, get rid of extra Opens and Delegations.
2284          */
2285         LIST_FOREACH_SAFE(op, &extra_open, nfso_list, nop) {
2286                 do {
2287                         newnfs_copycred(&op->nfso_cred, tcred);
2288                         error = nfscl_tryclose(op, tcred, nmp, p);
2289                         if (error == NFSERR_GRACE)
2290                                 (void) nfs_catnap(PZERO, error, "nfsexcls");
2291                 } while (error == NFSERR_GRACE);
2292                 LIST_REMOVE(op, nfso_list);
2293                 free(op, M_NFSCLOPEN);
2294         }
2295         if (nowp != NULL)
2296                 free(nowp, M_NFSCLOWNER);
2297
2298         TAILQ_FOREACH_SAFE(dp, &extra_deleg, nfsdl_list, ndp) {
2299                 do {
2300                         newnfs_copycred(&dp->nfsdl_cred, tcred);
2301                         error = nfscl_trydelegreturn(dp, tcred, nmp, p);
2302                         if (error == NFSERR_GRACE)
2303                                 (void) nfs_catnap(PZERO, error, "nfsexdlg");
2304                 } while (error == NFSERR_GRACE);
2305                 TAILQ_REMOVE(&extra_deleg, dp, nfsdl_list);
2306                 free(dp, M_NFSCLDELEG);
2307         }
2308
2309         /* For NFSv4.1 or later, do a RECLAIM_COMPLETE. */
2310         if (NFSHASNFSV4N(nmp))
2311                 (void)nfsrpc_reclaimcomplete(nmp, cred, p);
2312
2313 out:
2314         NFSLOCKCLSTATE();
2315         clp->nfsc_flags &= ~NFSCLFLAGS_RECVRINPROG;
2316         wakeup(&clp->nfsc_flags);
2317         nfsv4_unlock(&clp->nfsc_lock, 0);
2318         NFSUNLOCKCLSTATE();
2319         NFSFREECRED(tcred);
2320 }
2321
2322 /*
2323  * This function is called when a server replies with NFSERR_EXPIRED.
2324  * It deletes all state for the client and does a fresh SetClientId/confirm.
2325  * XXX Someday it should post a signal to the process(es) that hold the
2326  * state, so they know that lock state has been lost.
2327  */
2328 int
2329 nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p)
2330 {
2331         struct nfsmount *nmp;
2332         struct ucred *cred;
2333         int igotlock = 0, error, trycnt;
2334
2335         /*
2336          * If the clientid has gone away or a new SetClientid has already
2337          * been done, just return ok.
2338          */
2339         if (clp == NULL || clidrev != clp->nfsc_clientidrev)
2340                 return (0);
2341
2342         /*
2343          * First, lock the client structure, so everyone else will
2344          * block when trying to use state. Also, use NFSCLFLAGS_EXPIREIT so
2345          * that only one thread does the work.
2346          */
2347         NFSLOCKCLSTATE();
2348         clp->nfsc_flags |= NFSCLFLAGS_EXPIREIT;
2349         do {
2350                 igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
2351                     NFSCLSTATEMUTEXPTR, NULL);
2352         } while (!igotlock && (clp->nfsc_flags & NFSCLFLAGS_EXPIREIT));
2353         if ((clp->nfsc_flags & NFSCLFLAGS_EXPIREIT) == 0) {
2354                 if (igotlock)
2355                         nfsv4_unlock(&clp->nfsc_lock, 0);
2356                 NFSUNLOCKCLSTATE();
2357                 return (0);
2358         }
2359         clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG;
2360         NFSUNLOCKCLSTATE();
2361
2362         nmp = clp->nfsc_nmp;
2363         if (nmp == NULL)
2364                 panic("nfscl expired");
2365         cred = newnfs_getcred();
2366         trycnt = 5;
2367         do {
2368                 error = nfsrpc_setclient(nmp, clp, 0, cred, p);
2369         } while ((error == NFSERR_STALECLIENTID ||
2370              error == NFSERR_BADSESSION ||
2371              error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
2372         if (error) {
2373                 NFSLOCKCLSTATE();
2374                 clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2375         } else {
2376                 /*
2377                  * Expire the state for the client.
2378                  */
2379                 nfscl_expireclient(clp, nmp, cred, p);
2380                 NFSLOCKCLSTATE();
2381                 clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
2382                 clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2383         }
2384         clp->nfsc_flags &= ~(NFSCLFLAGS_EXPIREIT | NFSCLFLAGS_RECVRINPROG);
2385         wakeup(&clp->nfsc_flags);
2386         nfsv4_unlock(&clp->nfsc_lock, 0);
2387         NFSUNLOCKCLSTATE();
2388         NFSFREECRED(cred);
2389         return (error);
2390 }
2391
2392 /*
2393  * This function inserts a lock in the list after insert_lop.
2394  */
2395 static void
2396 nfscl_insertlock(struct nfscllockowner *lp, struct nfscllock *new_lop,
2397     struct nfscllock *insert_lop, int local)
2398 {
2399
2400         if ((struct nfscllockowner *)insert_lop == lp)
2401                 LIST_INSERT_HEAD(&lp->nfsl_lock, new_lop, nfslo_list);
2402         else
2403                 LIST_INSERT_AFTER(insert_lop, new_lop, nfslo_list);
2404         if (local)
2405                 nfsstatsv1.cllocallocks++;
2406         else
2407                 nfsstatsv1.cllocks++;
2408 }
2409
2410 /*
2411  * This function updates the locking for a lock owner and given file. It
2412  * maintains a list of lock ranges ordered on increasing file offset that
2413  * are NFSCLLOCK_READ or NFSCLLOCK_WRITE and non-overlapping (aka POSIX style).
2414  * It always adds new_lop to the list and sometimes uses the one pointed
2415  * at by other_lopp.
2416  * Returns 1 if the locks were modified, 0 otherwise.
2417  */
2418 static int
2419 nfscl_updatelock(struct nfscllockowner *lp, struct nfscllock **new_lopp,
2420     struct nfscllock **other_lopp, int local)
2421 {
2422         struct nfscllock *new_lop = *new_lopp;
2423         struct nfscllock *lop, *tlop, *ilop;
2424         struct nfscllock *other_lop;
2425         int unlock = 0, modified = 0;
2426         u_int64_t tmp;
2427
2428         /*
2429          * Work down the list until the lock is merged.
2430          */
2431         if (new_lop->nfslo_type == F_UNLCK)
2432                 unlock = 1;
2433         ilop = (struct nfscllock *)lp;
2434         lop = LIST_FIRST(&lp->nfsl_lock);
2435         while (lop != NULL) {
2436             /*
2437              * Only check locks for this file that aren't before the start of
2438              * new lock's range.
2439              */
2440             if (lop->nfslo_end >= new_lop->nfslo_first) {
2441                 if (new_lop->nfslo_end < lop->nfslo_first) {
2442                     /*
2443                      * If the new lock ends before the start of the
2444                      * current lock's range, no merge, just insert
2445                      * the new lock.
2446                      */
2447                     break;
2448                 }
2449                 if (new_lop->nfslo_type == lop->nfslo_type ||
2450                     (new_lop->nfslo_first <= lop->nfslo_first &&
2451                      new_lop->nfslo_end >= lop->nfslo_end)) {
2452                     /*
2453                      * This lock can be absorbed by the new lock/unlock.
2454                      * This happens when it covers the entire range
2455                      * of the old lock or is contiguous
2456                      * with the old lock and is of the same type or an
2457                      * unlock.
2458                      */
2459                     if (new_lop->nfslo_type != lop->nfslo_type ||
2460                         new_lop->nfslo_first != lop->nfslo_first ||
2461                         new_lop->nfslo_end != lop->nfslo_end)
2462                         modified = 1;
2463                     if (lop->nfslo_first < new_lop->nfslo_first)
2464                         new_lop->nfslo_first = lop->nfslo_first;
2465                     if (lop->nfslo_end > new_lop->nfslo_end)
2466                         new_lop->nfslo_end = lop->nfslo_end;
2467                     tlop = lop;
2468                     lop = LIST_NEXT(lop, nfslo_list);
2469                     nfscl_freelock(tlop, local);
2470                     continue;
2471                 }
2472
2473                 /*
2474                  * All these cases are for contiguous locks that are not the
2475                  * same type, so they can't be merged.
2476                  */
2477                 if (new_lop->nfslo_first <= lop->nfslo_first) {
2478                     /*
2479                      * This case is where the new lock overlaps with the
2480                      * first part of the old lock. Move the start of the
2481                      * old lock to just past the end of the new lock. The
2482                      * new lock will be inserted in front of the old, since
2483                      * ilop hasn't been updated. (We are done now.)
2484                      */
2485                     if (lop->nfslo_first != new_lop->nfslo_end) {
2486                         lop->nfslo_first = new_lop->nfslo_end;
2487                         modified = 1;
2488                     }
2489                     break;
2490                 }
2491                 if (new_lop->nfslo_end >= lop->nfslo_end) {
2492                     /*
2493                      * This case is where the new lock overlaps with the
2494                      * end of the old lock's range. Move the old lock's
2495                      * end to just before the new lock's first and insert
2496                      * the new lock after the old lock.
2497                      * Might not be done yet, since the new lock could
2498                      * overlap further locks with higher ranges.
2499                      */
2500                     if (lop->nfslo_end != new_lop->nfslo_first) {
2501                         lop->nfslo_end = new_lop->nfslo_first;
2502                         modified = 1;
2503                     }
2504                     ilop = lop;
2505                     lop = LIST_NEXT(lop, nfslo_list);
2506                     continue;
2507                 }
2508                 /*
2509                  * The final case is where the new lock's range is in the
2510                  * middle of the current lock's and splits the current lock
2511                  * up. Use *other_lopp to handle the second part of the
2512                  * split old lock range. (We are done now.)
2513                  * For unlock, we use new_lop as other_lop and tmp, since
2514                  * other_lop and new_lop are the same for this case.
2515                  * We noted the unlock case above, so we don't need
2516                  * new_lop->nfslo_type any longer.
2517                  */
2518                 tmp = new_lop->nfslo_first;
2519                 if (unlock) {
2520                     other_lop = new_lop;
2521                     *new_lopp = NULL;
2522                 } else {
2523                     other_lop = *other_lopp;
2524                     *other_lopp = NULL;
2525                 }
2526                 other_lop->nfslo_first = new_lop->nfslo_end;
2527                 other_lop->nfslo_end = lop->nfslo_end;
2528                 other_lop->nfslo_type = lop->nfslo_type;
2529                 lop->nfslo_end = tmp;
2530                 nfscl_insertlock(lp, other_lop, lop, local);
2531                 ilop = lop;
2532                 modified = 1;
2533                 break;
2534             }
2535             ilop = lop;
2536             lop = LIST_NEXT(lop, nfslo_list);
2537             if (lop == NULL)
2538                 break;
2539         }
2540
2541         /*
2542          * Insert the new lock in the list at the appropriate place.
2543          */
2544         if (!unlock) {
2545                 nfscl_insertlock(lp, new_lop, ilop, local);
2546                 *new_lopp = NULL;
2547                 modified = 1;
2548         }
2549         return (modified);
2550 }
2551
2552 /*
2553  * This function must be run as a kernel thread.
2554  * It does Renew Ops and recovery, when required.
2555  */
2556 void
2557 nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p)
2558 {
2559         struct nfsclowner *owp, *nowp;
2560         struct nfsclopen *op;
2561         struct nfscllockowner *lp, *nlp;
2562         struct nfscldeleghead dh;
2563         struct nfscldeleg *dp, *ndp;
2564         struct ucred *cred;
2565         u_int32_t clidrev;
2566         int error, cbpathdown, islept, igotlock, ret, clearok;
2567         uint32_t recover_done_time = 0;
2568         time_t mytime;
2569         static time_t prevsec = 0;
2570         struct nfscllockownerfh *lfhp, *nlfhp;
2571         struct nfscllockownerfhhead lfh;
2572         struct nfscllayout *lyp, *nlyp;
2573         struct nfscldevinfo *dip, *ndip;
2574         struct nfscllayouthead rlh;
2575         struct nfsclrecalllayout *recallp;
2576         struct nfsclds *dsp;
2577         struct mount *mp;
2578         vnode_t vp;
2579
2580         cred = newnfs_getcred();
2581         NFSLOCKCLSTATE();
2582         clp->nfsc_flags |= NFSCLFLAGS_HASTHREAD;
2583         mp = clp->nfsc_nmp->nm_mountp;
2584         NFSUNLOCKCLSTATE();
2585         for(;;) {
2586                 newnfs_setroot(cred);
2587                 cbpathdown = 0;
2588                 if (clp->nfsc_flags & NFSCLFLAGS_RECOVER) {
2589                         /*
2590                          * Only allow one recover within 1/2 of the lease
2591                          * duration (nfsc_renew).
2592                          */
2593                         if (recover_done_time < NFSD_MONOSEC) {
2594                                 recover_done_time = NFSD_MONOSEC +
2595                                     clp->nfsc_renew;
2596                                 NFSCL_DEBUG(1, "Doing recovery..\n");
2597                                 nfscl_recover(clp, cred, p);
2598                         } else {
2599                                 NFSCL_DEBUG(1, "Clear Recovery dt=%u ms=%jd\n",
2600                                     recover_done_time, (intmax_t)NFSD_MONOSEC);
2601                                 NFSLOCKCLSTATE();
2602                                 clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2603                                 NFSUNLOCKCLSTATE();
2604                         }
2605                 }
2606                 if (clp->nfsc_expire <= NFSD_MONOSEC &&
2607                     (clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) {
2608                         clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
2609                         clidrev = clp->nfsc_clientidrev;
2610                         error = nfsrpc_renew(clp, NULL, cred, p);
2611                         if (error == NFSERR_CBPATHDOWN)
2612                             cbpathdown = 1;
2613                         else if (error == NFSERR_STALECLIENTID ||
2614                             error == NFSERR_BADSESSION) {
2615                             NFSLOCKCLSTATE();
2616                             clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
2617                             NFSUNLOCKCLSTATE();
2618                         } else if (error == NFSERR_EXPIRED)
2619                             (void) nfscl_hasexpired(clp, clidrev, p);
2620                 }
2621
2622 checkdsrenew:
2623                 if (NFSHASNFSV4N(clp->nfsc_nmp)) {
2624                         /* Do renews for any DS sessions. */
2625                         NFSLOCKMNT(clp->nfsc_nmp);
2626                         /* Skip first entry, since the MDS is handled above. */
2627                         dsp = TAILQ_FIRST(&clp->nfsc_nmp->nm_sess);
2628                         if (dsp != NULL)
2629                                 dsp = TAILQ_NEXT(dsp, nfsclds_list);
2630                         while (dsp != NULL) {
2631                                 if (dsp->nfsclds_expire <= NFSD_MONOSEC &&
2632                                     dsp->nfsclds_sess.nfsess_defunct == 0) {
2633                                         dsp->nfsclds_expire = NFSD_MONOSEC +
2634                                             clp->nfsc_renew;
2635                                         NFSUNLOCKMNT(clp->nfsc_nmp);
2636                                         (void)nfsrpc_renew(clp, dsp, cred, p);
2637                                         goto checkdsrenew;
2638                                 }
2639                                 dsp = TAILQ_NEXT(dsp, nfsclds_list);
2640                         }
2641                         NFSUNLOCKMNT(clp->nfsc_nmp);
2642                 }
2643
2644                 TAILQ_INIT(&dh);
2645                 NFSLOCKCLSTATE();
2646                 if (cbpathdown)
2647                         /* It's a Total Recall! */
2648                         nfscl_totalrecall(clp);
2649
2650                 /*
2651                  * Now, handle defunct owners.
2652                  */
2653                 LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
2654                         if (LIST_EMPTY(&owp->nfsow_open)) {
2655                                 if (owp->nfsow_defunct != 0)
2656                                         nfscl_freeopenowner(owp, 0);
2657                         }
2658                 }
2659
2660                 /*
2661                  * Do the recall on any delegations. To avoid trouble, always
2662                  * come back up here after having slept.
2663                  */
2664                 igotlock = 0;
2665 tryagain:
2666                 dp = TAILQ_FIRST(&clp->nfsc_deleg);
2667                 while (dp != NULL) {
2668                         ndp = TAILQ_NEXT(dp, nfsdl_list);
2669                         if ((dp->nfsdl_flags & NFSCLDL_RECALL)) {
2670                                 /*
2671                                  * Wait for outstanding I/O ops to be done.
2672                                  */
2673                                 if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
2674                                     if (igotlock) {
2675                                         nfsv4_unlock(&clp->nfsc_lock, 0);
2676                                         igotlock = 0;
2677                                     }
2678                                     dp->nfsdl_rwlock.nfslock_lock |=
2679                                         NFSV4LOCK_WANTED;
2680                                     msleep(&dp->nfsdl_rwlock,
2681                                         NFSCLSTATEMUTEXPTR, PVFS, "nfscld",
2682                                         5 * hz);
2683                                     if (NFSCL_FORCEDISM(mp))
2684                                         goto terminate;
2685                                     goto tryagain;
2686                                 }
2687                                 while (!igotlock) {
2688                                     igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
2689                                         &islept, NFSCLSTATEMUTEXPTR, mp);
2690                                     if (igotlock == 0 && NFSCL_FORCEDISM(mp))
2691                                         goto terminate;
2692                                     if (islept)
2693                                         goto tryagain;
2694                                 }
2695                                 NFSUNLOCKCLSTATE();
2696                                 newnfs_copycred(&dp->nfsdl_cred, cred);
2697                                 ret = nfscl_recalldeleg(clp, clp->nfsc_nmp, dp,
2698                                     NULL, cred, p, 1, &vp);
2699                                 if (!ret) {
2700                                     nfscl_cleandeleg(dp);
2701                                     TAILQ_REMOVE(&clp->nfsc_deleg, dp,
2702                                         nfsdl_list);
2703                                     LIST_REMOVE(dp, nfsdl_hash);
2704                                     TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
2705                                     nfscl_delegcnt--;
2706                                     nfsstatsv1.cldelegates--;
2707                                 }
2708                                 NFSLOCKCLSTATE();
2709                                 /*
2710                                  * The nfsc_lock must be released before doing
2711                                  * vrele(), since it might call nfs_inactive().
2712                                  * For the unlikely case where the vnode failed
2713                                  * to be acquired by nfscl_recalldeleg(), a
2714                                  * VOP_RECLAIM() should be in progress and it
2715                                  * will return the delegation.
2716                                  */
2717                                 nfsv4_unlock(&clp->nfsc_lock, 0);
2718                                 igotlock = 0;
2719                                 if (vp != NULL) {
2720                                         NFSUNLOCKCLSTATE();
2721                                         vrele(vp);
2722                                         NFSLOCKCLSTATE();
2723                                 }
2724                                 goto tryagain;
2725                         }
2726                         dp = ndp;
2727                 }
2728
2729                 /*
2730                  * Clear out old delegations, if we are above the high water
2731                  * mark. Only clear out ones with no state related to them.
2732                  * The tailq list is in LRU order.
2733                  */
2734                 dp = TAILQ_LAST(&clp->nfsc_deleg, nfscldeleghead);
2735                 while (nfscl_delegcnt > nfscl_deleghighwater && dp != NULL) {
2736                     ndp = TAILQ_PREV(dp, nfscldeleghead, nfsdl_list);
2737                     if (dp->nfsdl_rwlock.nfslock_usecnt == 0 &&
2738                         dp->nfsdl_rwlock.nfslock_lock == 0 &&
2739                         dp->nfsdl_timestamp < NFSD_MONOSEC &&
2740                         (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_ZAPPED |
2741                           NFSCLDL_NEEDRECLAIM | NFSCLDL_DELEGRET)) == 0) {
2742                         clearok = 1;
2743                         LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
2744                             op = LIST_FIRST(&owp->nfsow_open);
2745                             if (op != NULL) {
2746                                 clearok = 0;
2747                                 break;
2748                             }
2749                         }
2750                         if (clearok) {
2751                             LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
2752                                 if (!LIST_EMPTY(&lp->nfsl_lock)) {
2753                                     clearok = 0;
2754                                     break;
2755                                 }
2756                             }
2757                         }
2758                         if (clearok) {
2759                             TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
2760                             LIST_REMOVE(dp, nfsdl_hash);
2761                             TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
2762                             nfscl_delegcnt--;
2763                             nfsstatsv1.cldelegates--;
2764                         }
2765                     }
2766                     dp = ndp;
2767                 }
2768                 if (igotlock)
2769                         nfsv4_unlock(&clp->nfsc_lock, 0);
2770
2771                 /*
2772                  * Do the recall on any layouts. To avoid trouble, always
2773                  * come back up here after having slept.
2774                  */
2775                 TAILQ_INIT(&rlh);
2776 tryagain2:
2777                 TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) {
2778                         if ((lyp->nfsly_flags & NFSLY_RECALL) != 0) {
2779                                 /*
2780                                  * Wait for outstanding I/O ops to be done.
2781                                  */
2782                                 if (lyp->nfsly_lock.nfslock_usecnt > 0 ||
2783                                     (lyp->nfsly_lock.nfslock_lock &
2784                                      NFSV4LOCK_LOCK) != 0) {
2785                                         lyp->nfsly_lock.nfslock_lock |=
2786                                             NFSV4LOCK_WANTED;
2787                                         msleep(&lyp->nfsly_lock.nfslock_lock,
2788                                             NFSCLSTATEMUTEXPTR, PVFS, "nfslyp",
2789                                             5 * hz);
2790                                         if (NFSCL_FORCEDISM(mp))
2791                                             goto terminate;
2792                                         goto tryagain2;
2793                                 }
2794                                 /* Move the layout to the recall list. */
2795                                 TAILQ_REMOVE(&clp->nfsc_layout, lyp,
2796                                     nfsly_list);
2797                                 LIST_REMOVE(lyp, nfsly_hash);
2798                                 TAILQ_INSERT_HEAD(&rlh, lyp, nfsly_list);
2799
2800                                 /* Handle any layout commits. */
2801                                 if (!NFSHASNOLAYOUTCOMMIT(clp->nfsc_nmp) &&
2802                                     (lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
2803                                         lyp->nfsly_flags &= ~NFSLY_WRITTEN;
2804                                         NFSUNLOCKCLSTATE();
2805                                         NFSCL_DEBUG(3, "do layoutcommit\n");
2806                                         nfscl_dolayoutcommit(clp->nfsc_nmp, lyp,
2807                                             cred, p);
2808                                         NFSLOCKCLSTATE();
2809                                         goto tryagain2;
2810                                 }
2811                         }
2812                 }
2813
2814                 /* Now, look for stale layouts. */
2815                 lyp = TAILQ_LAST(&clp->nfsc_layout, nfscllayouthead);
2816                 while (lyp != NULL) {
2817                         nlyp = TAILQ_PREV(lyp, nfscllayouthead, nfsly_list);
2818                         if (lyp->nfsly_timestamp < NFSD_MONOSEC &&
2819                             (lyp->nfsly_flags & NFSLY_RECALL) == 0 &&
2820                             lyp->nfsly_lock.nfslock_usecnt == 0 &&
2821                             lyp->nfsly_lock.nfslock_lock == 0) {
2822                                 NFSCL_DEBUG(4, "ret stale lay=%d\n",
2823                                     nfscl_layoutcnt);
2824                                 recallp = malloc(sizeof(*recallp),
2825                                     M_NFSLAYRECALL, M_NOWAIT);
2826                                 if (recallp == NULL)
2827                                         break;
2828                                 (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE,
2829                                     lyp, NFSLAYOUTIOMODE_ANY, 0, UINT64_MAX,
2830                                     lyp->nfsly_stateid.seqid, 0, 0, NULL,
2831                                     recallp);
2832                         }
2833                         lyp = nlyp;
2834                 }
2835
2836                 /*
2837                  * Free up any unreferenced device info structures.
2838                  */
2839                 LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) {
2840                         if (dip->nfsdi_layoutrefs == 0 &&
2841                             dip->nfsdi_refcnt == 0) {
2842                                 NFSCL_DEBUG(4, "freeing devinfo\n");
2843                                 LIST_REMOVE(dip, nfsdi_list);
2844                                 nfscl_freedevinfo(dip);
2845                         }
2846                 }
2847                 NFSUNLOCKCLSTATE();
2848
2849                 /* Do layout return(s), as required. */
2850                 TAILQ_FOREACH_SAFE(lyp, &rlh, nfsly_list, nlyp) {
2851                         TAILQ_REMOVE(&rlh, lyp, nfsly_list);
2852                         NFSCL_DEBUG(4, "ret layout\n");
2853                         nfscl_layoutreturn(clp->nfsc_nmp, lyp, cred, p);
2854                         nfscl_freelayout(lyp);
2855                 }
2856
2857                 /*
2858                  * Delegreturn any delegations cleaned out or recalled.
2859                  */
2860                 TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) {
2861                         newnfs_copycred(&dp->nfsdl_cred, cred);
2862                         (void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
2863                         TAILQ_REMOVE(&dh, dp, nfsdl_list);
2864                         free(dp, M_NFSCLDELEG);
2865                 }
2866
2867                 SLIST_INIT(&lfh);
2868                 /*
2869                  * Call nfscl_cleanupkext() once per second to check for
2870                  * open/lock owners where the process has exited.
2871                  */
2872                 mytime = NFSD_MONOSEC;
2873                 if (prevsec != mytime) {
2874                         prevsec = mytime;
2875                         nfscl_cleanupkext(clp, &lfh);
2876                 }
2877
2878                 /*
2879                  * Do a ReleaseLockOwner for all lock owners where the
2880                  * associated process no longer exists, as found by
2881                  * nfscl_cleanupkext().
2882                  */
2883                 newnfs_setroot(cred);
2884                 SLIST_FOREACH_SAFE(lfhp, &lfh, nfslfh_list, nlfhp) {
2885                         LIST_FOREACH_SAFE(lp, &lfhp->nfslfh_lock, nfsl_list,
2886                             nlp) {
2887                                 (void)nfsrpc_rellockown(clp->nfsc_nmp, lp,
2888                                     lfhp->nfslfh_fh, lfhp->nfslfh_len, cred,
2889                                     p);
2890                                 nfscl_freelockowner(lp, 0);
2891                         }
2892                         free(lfhp, M_TEMP);
2893                 }
2894                 SLIST_INIT(&lfh);
2895
2896                 NFSLOCKCLSTATE();
2897                 if ((clp->nfsc_flags & NFSCLFLAGS_RECOVER) == 0)
2898                         (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, "nfscl",
2899                             hz);
2900 terminate:
2901                 if (clp->nfsc_flags & NFSCLFLAGS_UMOUNT) {
2902                         clp->nfsc_flags &= ~NFSCLFLAGS_HASTHREAD;
2903                         NFSUNLOCKCLSTATE();
2904                         NFSFREECRED(cred);
2905                         wakeup((caddr_t)clp);
2906                         return;
2907                 }
2908                 NFSUNLOCKCLSTATE();
2909         }
2910 }
2911
2912 /*
2913  * Initiate state recovery. Called when NFSERR_STALECLIENTID,
2914  * NFSERR_STALESTATEID or NFSERR_BADSESSION is received.
2915  */
2916 void
2917 nfscl_initiate_recovery(struct nfsclclient *clp)
2918 {
2919
2920         if (clp == NULL)
2921                 return;
2922         NFSLOCKCLSTATE();
2923         clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
2924         NFSUNLOCKCLSTATE();
2925         wakeup((caddr_t)clp);
2926 }
2927
2928 /*
2929  * Dump out the state stuff for debugging.
2930  */
2931 void
2932 nfscl_dumpstate(struct nfsmount *nmp, int openowner, int opens,
2933     int lockowner, int locks)
2934 {
2935         struct nfsclclient *clp;
2936         struct nfsclowner *owp;
2937         struct nfsclopen *op;
2938         struct nfscllockowner *lp;
2939         struct nfscllock *lop;
2940         struct nfscldeleg *dp;
2941
2942         clp = nmp->nm_clp;
2943         if (clp == NULL) {
2944                 printf("nfscl dumpstate NULL clp\n");
2945                 return;
2946         }
2947         NFSLOCKCLSTATE();
2948         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2949           LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
2950             if (openowner && !LIST_EMPTY(&owp->nfsow_open))
2951                 printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n",
2952                     owp->nfsow_owner[0], owp->nfsow_owner[1],
2953                     owp->nfsow_owner[2], owp->nfsow_owner[3],
2954                     owp->nfsow_seqid);
2955             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
2956                 if (opens)
2957                     printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n",
2958                         op->nfso_stateid.other[0], op->nfso_stateid.other[1],
2959                         op->nfso_stateid.other[2], op->nfso_opencnt,
2960                         op->nfso_fh[12]);
2961                 LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
2962                     if (lockowner)
2963                         printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n",
2964                             lp->nfsl_owner[0], lp->nfsl_owner[1],
2965                             lp->nfsl_owner[2], lp->nfsl_owner[3],
2966                             lp->nfsl_seqid,
2967                             lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1],
2968                             lp->nfsl_stateid.other[2]);
2969                     LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
2970                         if (locks)
2971 #ifdef __FreeBSD__
2972                             printf("lck typ=%d fst=%ju end=%ju\n",
2973                                 lop->nfslo_type, (intmax_t)lop->nfslo_first,
2974                                 (intmax_t)lop->nfslo_end);
2975 #else
2976                             printf("lck typ=%d fst=%qd end=%qd\n",
2977                                 lop->nfslo_type, lop->nfslo_first,
2978                                 lop->nfslo_end);
2979 #endif
2980                     }
2981                 }
2982             }
2983           }
2984         }
2985         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
2986             if (openowner && !LIST_EMPTY(&owp->nfsow_open))
2987                 printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n",
2988                     owp->nfsow_owner[0], owp->nfsow_owner[1],
2989                     owp->nfsow_owner[2], owp->nfsow_owner[3],
2990                     owp->nfsow_seqid);
2991             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
2992                 if (opens)
2993                     printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n",
2994                         op->nfso_stateid.other[0], op->nfso_stateid.other[1],
2995                         op->nfso_stateid.other[2], op->nfso_opencnt,
2996                         op->nfso_fh[12]);
2997                 LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
2998                     if (lockowner)
2999                         printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n",
3000                             lp->nfsl_owner[0], lp->nfsl_owner[1],
3001                             lp->nfsl_owner[2], lp->nfsl_owner[3],
3002                             lp->nfsl_seqid,
3003                             lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1],
3004                             lp->nfsl_stateid.other[2]);
3005                     LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3006                         if (locks)
3007 #ifdef __FreeBSD__
3008                             printf("lck typ=%d fst=%ju end=%ju\n",
3009                                 lop->nfslo_type, (intmax_t)lop->nfslo_first,
3010                                 (intmax_t)lop->nfslo_end);
3011 #else
3012                             printf("lck typ=%d fst=%qd end=%qd\n",
3013                                 lop->nfslo_type, lop->nfslo_first,
3014                                 lop->nfslo_end);
3015 #endif
3016                     }
3017                 }
3018             }
3019         }
3020         NFSUNLOCKCLSTATE();
3021 }
3022
3023 /*
3024  * Check for duplicate open owners and opens.
3025  * (Only used as a diagnostic aid.)
3026  */
3027 void
3028 nfscl_dupopen(vnode_t vp, int dupopens)
3029 {
3030         struct nfsclclient *clp;
3031         struct nfsclowner *owp, *owp2;
3032         struct nfsclopen *op, *op2;
3033         struct nfsfh *nfhp;
3034
3035         clp = VFSTONFS(vnode_mount(vp))->nm_clp;
3036         if (clp == NULL) {
3037                 printf("nfscl dupopen NULL clp\n");
3038                 return;
3039         }
3040         nfhp = VTONFS(vp)->n_fhp;
3041         NFSLOCKCLSTATE();
3042
3043         /*
3044          * First, search for duplicate owners.
3045          * These should never happen!
3046          */
3047         LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3048             LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3049                 if (owp != owp2 &&
3050                     !NFSBCMP(owp->nfsow_owner, owp2->nfsow_owner,
3051                     NFSV4CL_LOCKNAMELEN)) {
3052                         NFSUNLOCKCLSTATE();
3053                         printf("DUP OWNER\n");
3054                         nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1, 0, 0);
3055                         return;
3056                 }
3057             }
3058         }
3059
3060         /*
3061          * Now, search for duplicate stateids.
3062          * These shouldn't happen, either.
3063          */
3064         LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3065             LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) {
3066                 LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3067                     LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3068                         if (op != op2 &&
3069                             (op->nfso_stateid.other[0] != 0 ||
3070                              op->nfso_stateid.other[1] != 0 ||
3071                              op->nfso_stateid.other[2] != 0) &&
3072                             op->nfso_stateid.other[0] == op2->nfso_stateid.other[0] &&
3073                             op->nfso_stateid.other[1] == op2->nfso_stateid.other[1] &&
3074                             op->nfso_stateid.other[2] == op2->nfso_stateid.other[2]) {
3075                             NFSUNLOCKCLSTATE();
3076                             printf("DUP STATEID\n");
3077                             nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1, 0,
3078                                 0);
3079                             return;
3080                         }
3081                     }
3082                 }
3083             }
3084         }
3085
3086         /*
3087          * Now search for duplicate opens.
3088          * Duplicate opens for the same owner
3089          * should never occur. Other duplicates are
3090          * possible and are checked for if "dupopens"
3091          * is true.
3092          */
3093         LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3094             LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) {
3095                 if (nfhp->nfh_len == op2->nfso_fhlen &&
3096                     !NFSBCMP(nfhp->nfh_fh, op2->nfso_fh, nfhp->nfh_len)) {
3097                     LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3098                         LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3099                             if (op != op2 && nfhp->nfh_len == op->nfso_fhlen &&
3100                                 !NFSBCMP(nfhp->nfh_fh, op->nfso_fh, nfhp->nfh_len) &&
3101                                 (!NFSBCMP(op->nfso_own->nfsow_owner,
3102                                  op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN) ||
3103                                  dupopens)) {
3104                                 if (!NFSBCMP(op->nfso_own->nfsow_owner,
3105                                     op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN)) {
3106                                     NFSUNLOCKCLSTATE();
3107                                     printf("BADDUP OPEN\n");
3108                                 } else {
3109                                     NFSUNLOCKCLSTATE();
3110                                     printf("DUP OPEN\n");
3111                                 }
3112                                 nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1,
3113                                     0, 0);
3114                                 return;
3115                             }
3116                         }
3117                     }
3118                 }
3119             }
3120         }
3121         NFSUNLOCKCLSTATE();
3122 }
3123
3124 /*
3125  * During close, find an open that needs to be dereferenced and
3126  * dereference it. If there are no more opens for this file,
3127  * log a message to that effect.
3128  * Opens aren't actually Close'd until VOP_INACTIVE() is performed
3129  * on the file's vnode.
3130  * This is the safe way, since it is difficult to identify
3131  * which open the close is for and I/O can be performed after the
3132  * close(2) system call when a file is mmap'd.
3133  * If it returns 0 for success, there will be a referenced
3134  * clp returned via clpp.
3135  */
3136 int
3137 nfscl_getclose(vnode_t vp, struct nfsclclient **clpp)
3138 {
3139         struct nfsclclient *clp;
3140         struct nfsclowner *owp;
3141         struct nfsclopen *op;
3142         struct nfscldeleg *dp;
3143         struct nfsfh *nfhp;
3144         int error, notdecr;
3145
3146         error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, true, &clp);
3147         if (error)
3148                 return (error);
3149         *clpp = clp;
3150
3151         nfhp = VTONFS(vp)->n_fhp;
3152         notdecr = 1;
3153         NFSLOCKCLSTATE();
3154         /*
3155          * First, look for one under a delegation that was locally issued
3156          * and just decrement the opencnt for it. Since all my Opens against
3157          * the server are DENY_NONE, I don't see a problem with hanging
3158          * onto them. (It is much easier to use one of the extant Opens
3159          * that I already have on the server when a Delegation is recalled
3160          * than to do fresh Opens.) Someday, I might need to rethink this, but.
3161          */
3162         dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
3163         if (dp != NULL) {
3164                 LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
3165                         op = LIST_FIRST(&owp->nfsow_open);
3166                         if (op != NULL) {
3167                                 /*
3168                                  * Since a delegation is for a file, there
3169                                  * should never be more than one open for
3170                                  * each openowner.
3171                                  */
3172                                 if (LIST_NEXT(op, nfso_list) != NULL)
3173                                         panic("nfscdeleg opens");
3174                                 if (notdecr && op->nfso_opencnt > 0) {
3175                                         notdecr = 0;
3176                                         op->nfso_opencnt--;
3177                                         break;
3178                                 }
3179                         }
3180                 }
3181         }
3182
3183         /* Now process the opens against the server. */
3184         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3185                 LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3186                         if (op->nfso_fhlen == nfhp->nfh_len &&
3187                             !NFSBCMP(op->nfso_fh, nfhp->nfh_fh,
3188                             nfhp->nfh_len)) {
3189                                 /* Found an open, decrement cnt if possible */
3190                                 if (notdecr && op->nfso_opencnt > 0) {
3191                                         notdecr = 0;
3192                                         op->nfso_opencnt--;
3193                                 }
3194                                 /*
3195                                  * There are more opens, so just return.
3196                                  */
3197                                 if (op->nfso_opencnt > 0) {
3198                                         NFSUNLOCKCLSTATE();
3199                                         return (0);
3200                                 }
3201                         }
3202                 }
3203         }
3204         NFSUNLOCKCLSTATE();
3205         if (notdecr)
3206                 printf("nfscl: never fnd open\n");
3207         return (0);
3208 }
3209
3210 int
3211 nfscl_doclose(vnode_t vp, struct nfsclclient **clpp, NFSPROC_T *p)
3212 {
3213         struct nfsclclient *clp;
3214         struct nfsclowner *owp, *nowp;
3215         struct nfsclopen *op;
3216         struct nfscldeleg *dp;
3217         struct nfsfh *nfhp;
3218         struct nfsclrecalllayout *recallp;
3219         int error;
3220
3221         error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, true, &clp);
3222         if (error)
3223                 return (error);
3224         *clpp = clp;
3225
3226         nfhp = VTONFS(vp)->n_fhp;
3227         recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK);
3228         NFSLOCKCLSTATE();
3229         /*
3230          * First get rid of the local Open structures, which should be no
3231          * longer in use.
3232          */
3233         dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
3234         if (dp != NULL) {
3235                 LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) {
3236                         op = LIST_FIRST(&owp->nfsow_open);
3237                         if (op != NULL) {
3238                                 KASSERT((op->nfso_opencnt == 0),
3239                                     ("nfscl: bad open cnt on deleg"));
3240                                 nfscl_freeopen(op, 1);
3241                         }
3242                         nfscl_freeopenowner(owp, 1);
3243                 }
3244         }
3245
3246         /* Return any layouts marked return on close. */
3247         nfscl_retoncloselayout(vp, clp, nfhp->nfh_fh, nfhp->nfh_len, &recallp);
3248
3249         /* Now process the opens against the server. */
3250 lookformore:
3251         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3252                 op = LIST_FIRST(&owp->nfsow_open);
3253                 while (op != NULL) {
3254                         if (op->nfso_fhlen == nfhp->nfh_len &&
3255                             !NFSBCMP(op->nfso_fh, nfhp->nfh_fh,
3256                             nfhp->nfh_len)) {
3257                                 /* Found an open, close it. */
3258                                 KASSERT((op->nfso_opencnt == 0),
3259                                     ("nfscl: bad open cnt on server"));
3260                                 NFSUNLOCKCLSTATE();
3261                                 nfsrpc_doclose(VFSTONFS(vnode_mount(vp)), op,
3262                                     p);
3263                                 NFSLOCKCLSTATE();
3264                                 goto lookformore;
3265                         }
3266                         op = LIST_NEXT(op, nfso_list);
3267                 }
3268         }
3269         NFSUNLOCKCLSTATE();
3270         /*
3271          * recallp has been set NULL by nfscl_retoncloselayout() if it was
3272          * used by the function, but calling free() with a NULL pointer is ok.
3273          */
3274         free(recallp, M_NFSLAYRECALL);
3275         return (0);
3276 }
3277
3278 /*
3279  * Return all delegations on this client.
3280  * (Must be called with client sleep lock.)
3281  */
3282 static void
3283 nfscl_delegreturnall(struct nfsclclient *clp, NFSPROC_T *p)
3284 {
3285         struct nfscldeleg *dp, *ndp;
3286         struct ucred *cred;
3287
3288         cred = newnfs_getcred();
3289         TAILQ_FOREACH_SAFE(dp, &clp->nfsc_deleg, nfsdl_list, ndp) {
3290                 nfscl_cleandeleg(dp);
3291                 (void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
3292                 nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
3293         }
3294         NFSFREECRED(cred);
3295 }
3296
3297 /*
3298  * Return any delegation for this vp.
3299  */
3300 void
3301 nfscl_delegreturnvp(vnode_t vp, NFSPROC_T *p)
3302 {
3303         struct nfsclclient *clp;
3304         struct nfscldeleg *dp;
3305         struct ucred *cred;
3306         struct nfsnode *np;
3307
3308         np = VTONFS(vp);
3309         cred = newnfs_getcred();
3310         dp = NULL;
3311         NFSLOCKCLSTATE();
3312         clp = VFSTONFS(vp->v_mount)->nm_clp;
3313         if (clp != NULL)
3314                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
3315                     np->n_fhp->nfh_len);
3316         if (dp != NULL) {
3317                 nfscl_cleandeleg(dp);
3318                 nfscl_freedeleg(&clp->nfsc_deleg, dp, false);
3319                 NFSUNLOCKCLSTATE();
3320                 newnfs_copycred(&dp->nfsdl_cred, cred);
3321                 nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
3322                 free(dp, M_NFSCLDELEG);
3323         } else
3324                 NFSUNLOCKCLSTATE();
3325         NFSFREECRED(cred);
3326 }
3327
3328 /*
3329  * Do a callback RPC.
3330  */
3331 void
3332 nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p)
3333 {
3334         int clist, gotseq_ok, i, j, k, op, rcalls;
3335         u_int32_t *tl;
3336         struct nfsclclient *clp;
3337         struct nfscldeleg *dp = NULL;
3338         int numops, taglen = -1, error = 0, trunc __unused;
3339         u_int32_t minorvers = 0, retops = 0, *retopsp = NULL, *repp, cbident;
3340         u_char tag[NFSV4_SMALLSTR + 1], *tagstr;
3341         vnode_t vp = NULL;
3342         struct nfsnode *np;
3343         struct vattr va;
3344         struct nfsfh *nfhp;
3345         mount_t mp;
3346         nfsattrbit_t attrbits, rattrbits;
3347         nfsv4stateid_t stateid;
3348         uint32_t seqid, slotid = 0, highslot, cachethis __unused;
3349         uint8_t sessionid[NFSX_V4SESSIONID];
3350         struct mbuf *rep;
3351         struct nfscllayout *lyp;
3352         uint64_t filesid[2], len, off;
3353         int changed, gotone, laytype, recalltype;
3354         uint32_t iomode;
3355         struct nfsclrecalllayout *recallp = NULL;
3356         struct nfsclsession *tsep;
3357
3358         gotseq_ok = 0;
3359         nfsrvd_rephead(nd);
3360         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3361         taglen = fxdr_unsigned(int, *tl);
3362         if (taglen < 0) {
3363                 error = EBADRPC;
3364                 goto nfsmout;
3365         }
3366         if (taglen <= NFSV4_SMALLSTR)
3367                 tagstr = tag;
3368         else
3369                 tagstr = malloc(taglen + 1, M_TEMP, M_WAITOK);
3370         error = nfsrv_mtostr(nd, tagstr, taglen);
3371         if (error) {
3372                 if (taglen > NFSV4_SMALLSTR)
3373                         free(tagstr, M_TEMP);
3374                 taglen = -1;
3375                 goto nfsmout;
3376         }
3377         (void) nfsm_strtom(nd, tag, taglen);
3378         if (taglen > NFSV4_SMALLSTR) {
3379                 free(tagstr, M_TEMP);
3380         }
3381         NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED);
3382         NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3383         minorvers = fxdr_unsigned(u_int32_t, *tl++);
3384         if (minorvers != NFSV4_MINORVERSION && minorvers != NFSV41_MINORVERSION)
3385                 nd->nd_repstat = NFSERR_MINORVERMISMATCH;
3386         cbident = fxdr_unsigned(u_int32_t, *tl++);
3387         if (nd->nd_repstat)
3388                 numops = 0;
3389         else
3390                 numops = fxdr_unsigned(int, *tl);
3391         /*
3392          * Loop around doing the sub ops.
3393          */
3394         for (i = 0; i < numops; i++) {
3395                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3396                 NFSM_BUILD(repp, u_int32_t *, 2 * NFSX_UNSIGNED);
3397                 *repp++ = *tl;
3398                 op = fxdr_unsigned(int, *tl);
3399                 if (op < NFSV4OP_CBGETATTR ||
3400                    (op > NFSV4OP_CBRECALL && minorvers == NFSV4_MINORVERSION) ||
3401                    (op > NFSV4OP_CBNOTIFYDEVID &&
3402                     minorvers == NFSV41_MINORVERSION)) {
3403                     nd->nd_repstat = NFSERR_OPILLEGAL;
3404                     *repp = nfscl_errmap(nd, minorvers);
3405                     retops++;
3406                     break;
3407                 }
3408                 nd->nd_procnum = op;
3409                 if (op < NFSV41_CBNOPS)
3410                         nfsstatsv1.cbrpccnt[nd->nd_procnum]++;
3411                 switch (op) {
3412                 case NFSV4OP_CBGETATTR:
3413                         NFSCL_DEBUG(4, "cbgetattr\n");
3414                         mp = NULL;
3415                         vp = NULL;
3416                         error = nfsm_getfh(nd, &nfhp);
3417                         if (!error)
3418                                 error = nfsrv_getattrbits(nd, &attrbits,
3419                                     NULL, NULL);
3420                         if (error == 0 && i == 0 &&
3421                             minorvers != NFSV4_MINORVERSION)
3422                                 error = NFSERR_OPNOTINSESS;
3423                         if (!error) {
3424                                 mp = nfscl_getmnt(minorvers, sessionid, cbident,
3425                                     &clp);
3426                                 if (mp == NULL)
3427                                         error = NFSERR_SERVERFAULT;
3428                         }
3429                         if (!error) {
3430                                 error = nfscl_ngetreopen(mp, nfhp->nfh_fh,
3431                                     nfhp->nfh_len, p, &np);
3432                                 if (!error)
3433                                         vp = NFSTOV(np);
3434                         }
3435                         if (!error) {
3436                                 NFSZERO_ATTRBIT(&rattrbits);
3437                                 NFSLOCKCLSTATE();
3438                                 dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
3439                                     nfhp->nfh_len);
3440                                 if (dp != NULL) {
3441                                         if (NFSISSET_ATTRBIT(&attrbits,
3442                                             NFSATTRBIT_SIZE)) {
3443                                                 if (vp != NULL)
3444                                                         va.va_size = np->n_size;
3445                                                 else
3446                                                         va.va_size =
3447                                                             dp->nfsdl_size;
3448                                                 NFSSETBIT_ATTRBIT(&rattrbits,
3449                                                     NFSATTRBIT_SIZE);
3450                                         }
3451                                         if (NFSISSET_ATTRBIT(&attrbits,
3452                                             NFSATTRBIT_CHANGE)) {
3453                                                 va.va_filerev =
3454                                                     dp->nfsdl_change;
3455                                                 if (vp == NULL ||
3456                                                     (np->n_flag & NDELEGMOD))
3457                                                         va.va_filerev++;
3458                                                 NFSSETBIT_ATTRBIT(&rattrbits,
3459                                                     NFSATTRBIT_CHANGE);
3460                                         }
3461                                 } else
3462                                         error = NFSERR_SERVERFAULT;
3463                                 NFSUNLOCKCLSTATE();
3464                         }
3465                         if (vp != NULL)
3466                                 vrele(vp);
3467                         if (mp != NULL)
3468                                 vfs_unbusy(mp);
3469                         if (nfhp != NULL)
3470                                 free(nfhp, M_NFSFH);
3471                         if (!error)
3472                                 (void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va,
3473                                     NULL, 0, &rattrbits, NULL, p, 0, 0, 0, 0,
3474                                     (uint64_t)0, NULL);
3475                         break;
3476                 case NFSV4OP_CBRECALL:
3477                         NFSCL_DEBUG(4, "cbrecall\n");
3478                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
3479                             NFSX_UNSIGNED);
3480                         stateid.seqid = *tl++;
3481                         NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other,
3482                             NFSX_STATEIDOTHER);
3483                         tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
3484                         trunc = fxdr_unsigned(int, *tl);
3485                         error = nfsm_getfh(nd, &nfhp);
3486                         if (error == 0 && i == 0 &&
3487                             minorvers != NFSV4_MINORVERSION)
3488                                 error = NFSERR_OPNOTINSESS;
3489                         if (!error) {
3490                                 NFSLOCKCLSTATE();
3491                                 if (minorvers == NFSV4_MINORVERSION)
3492                                         clp = nfscl_getclnt(cbident);
3493                                 else
3494                                         clp = nfscl_getclntsess(sessionid);
3495                                 if (clp != NULL) {
3496                                         dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
3497                                             nfhp->nfh_len);
3498                                         if (dp != NULL && (dp->nfsdl_flags &
3499                                             NFSCLDL_DELEGRET) == 0) {
3500                                                 dp->nfsdl_flags |=
3501                                                     NFSCLDL_RECALL;
3502                                                 wakeup((caddr_t)clp);
3503                                         }
3504                                 } else {
3505                                         error = NFSERR_SERVERFAULT;
3506                                 }
3507                                 NFSUNLOCKCLSTATE();
3508                         }
3509                         if (nfhp != NULL)
3510                                 free(nfhp, M_NFSFH);
3511                         break;
3512                 case NFSV4OP_CBLAYOUTRECALL:
3513                         NFSCL_DEBUG(4, "cblayrec\n");
3514                         nfhp = NULL;
3515                         NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
3516                         laytype = fxdr_unsigned(int, *tl++);
3517                         iomode = fxdr_unsigned(uint32_t, *tl++);
3518                         if (newnfs_true == *tl++)
3519                                 changed = 1;
3520                         else
3521                                 changed = 0;
3522                         recalltype = fxdr_unsigned(int, *tl);
3523                         NFSCL_DEBUG(4, "layt=%d iom=%d ch=%d rectyp=%d\n",
3524                             laytype, iomode, changed, recalltype);
3525                         recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL,
3526                             M_WAITOK);
3527                         if (laytype != NFSLAYOUT_NFSV4_1_FILES &&
3528                             laytype != NFSLAYOUT_FLEXFILE)
3529                                 error = NFSERR_NOMATCHLAYOUT;
3530                         else if (recalltype == NFSLAYOUTRETURN_FILE) {
3531                                 error = nfsm_getfh(nd, &nfhp);
3532                                 NFSCL_DEBUG(4, "retfile getfh=%d\n", error);
3533                                 if (error != 0)
3534                                         goto nfsmout;
3535                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER +
3536                                     NFSX_STATEID);
3537                                 off = fxdr_hyper(tl); tl += 2;
3538                                 len = fxdr_hyper(tl); tl += 2;
3539                                 stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
3540                                 NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
3541                                 if (minorvers == NFSV4_MINORVERSION)
3542                                         error = NFSERR_NOTSUPP;
3543                                 else if (i == 0)
3544                                         error = NFSERR_OPNOTINSESS;
3545                                 NFSCL_DEBUG(4, "off=%ju len=%ju sq=%u err=%d\n",
3546                                     (uintmax_t)off, (uintmax_t)len,
3547                                     stateid.seqid, error);
3548                                 if (error == 0) {
3549                                         NFSLOCKCLSTATE();
3550                                         clp = nfscl_getclntsess(sessionid);
3551                                         NFSCL_DEBUG(4, "cbly clp=%p\n", clp);
3552                                         if (clp != NULL) {
3553                                                 lyp = nfscl_findlayout(clp,
3554                                                     nfhp->nfh_fh,
3555                                                     nfhp->nfh_len);
3556                                                 NFSCL_DEBUG(4, "cblyp=%p\n",
3557                                                     lyp);
3558                                                 if (lyp != NULL &&
3559                                                     (lyp->nfsly_flags &
3560                                                      (NFSLY_FILES |
3561                                                       NFSLY_FLEXFILE)) != 0 &&
3562                                                     !NFSBCMP(stateid.other,
3563                                                     lyp->nfsly_stateid.other,
3564                                                     NFSX_STATEIDOTHER)) {
3565                                                         error =
3566                                                             nfscl_layoutrecall(
3567                                                             recalltype,
3568                                                             lyp, iomode, off,
3569                                                             len, stateid.seqid,
3570                                                             0, 0, NULL,
3571                                                             recallp);
3572                                                         if (error == 0 &&
3573                                                             stateid.seqid >
3574                                                             lyp->nfsly_stateid.seqid)
3575                                                                 lyp->nfsly_stateid.seqid =
3576                                                                     stateid.seqid;
3577                                                         recallp = NULL;
3578                                                         wakeup(clp);
3579                                                         NFSCL_DEBUG(4,
3580                                                             "aft layrcal=%d "
3581                                                             "layseqid=%d\n",
3582                                                             error,
3583                                                             lyp->nfsly_stateid.seqid);
3584                                                 } else
3585                                                         error =
3586                                                           NFSERR_NOMATCHLAYOUT;
3587                                         } else
3588                                                 error = NFSERR_NOMATCHLAYOUT;
3589                                         NFSUNLOCKCLSTATE();
3590                                 }
3591                                 free(nfhp, M_NFSFH);
3592                         } else if (recalltype == NFSLAYOUTRETURN_FSID) {
3593                                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER);
3594                                 filesid[0] = fxdr_hyper(tl); tl += 2;
3595                                 filesid[1] = fxdr_hyper(tl); tl += 2;
3596                                 gotone = 0;
3597                                 NFSLOCKCLSTATE();
3598                                 clp = nfscl_getclntsess(sessionid);
3599                                 if (clp != NULL) {
3600                                         TAILQ_FOREACH(lyp, &clp->nfsc_layout,
3601                                             nfsly_list) {
3602                                                 if (lyp->nfsly_filesid[0] ==
3603                                                     filesid[0] &&
3604                                                     lyp->nfsly_filesid[1] ==
3605                                                     filesid[1]) {
3606                                                         error =
3607                                                             nfscl_layoutrecall(
3608                                                             recalltype,
3609                                                             lyp, iomode, 0,
3610                                                             UINT64_MAX,
3611                                                             lyp->nfsly_stateid.seqid,
3612                                                             0, 0, NULL,
3613                                                             recallp);
3614                                                         recallp = NULL;
3615                                                         gotone = 1;
3616                                                 }
3617                                         }
3618                                         if (gotone != 0)
3619                                                 wakeup(clp);
3620                                         else
3621                                                 error = NFSERR_NOMATCHLAYOUT;
3622                                 } else
3623                                         error = NFSERR_NOMATCHLAYOUT;
3624                                 NFSUNLOCKCLSTATE();
3625                         } else if (recalltype == NFSLAYOUTRETURN_ALL) {
3626                                 gotone = 0;
3627                                 NFSLOCKCLSTATE();
3628                                 clp = nfscl_getclntsess(sessionid);
3629                                 if (clp != NULL) {
3630                                         TAILQ_FOREACH(lyp, &clp->nfsc_layout,
3631                                             nfsly_list) {
3632                                                 error = nfscl_layoutrecall(
3633                                                     recalltype, lyp, iomode, 0,
3634                                                     UINT64_MAX,
3635                                                     lyp->nfsly_stateid.seqid,
3636                                                     0, 0, NULL, recallp);
3637                                                 recallp = NULL;
3638                                                 gotone = 1;
3639                                         }
3640                                         if (gotone != 0)
3641                                                 wakeup(clp);
3642                                         else
3643                                                 error = NFSERR_NOMATCHLAYOUT;
3644                                 } else
3645                                         error = NFSERR_NOMATCHLAYOUT;
3646                                 NFSUNLOCKCLSTATE();
3647                         } else
3648                                 error = NFSERR_NOMATCHLAYOUT;
3649                         if (recallp != NULL) {
3650                                 free(recallp, M_NFSLAYRECALL);
3651                                 recallp = NULL;
3652                         }
3653                         break;
3654                 case NFSV4OP_CBSEQUENCE:
3655                         NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
3656                             5 * NFSX_UNSIGNED);
3657                         bcopy(tl, sessionid, NFSX_V4SESSIONID);
3658                         tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3659                         seqid = fxdr_unsigned(uint32_t, *tl++);
3660                         slotid = fxdr_unsigned(uint32_t, *tl++);
3661                         highslot = fxdr_unsigned(uint32_t, *tl++);
3662                         cachethis = *tl++;
3663                         /* Throw away the referring call stuff. */
3664                         clist = fxdr_unsigned(int, *tl);
3665                         for (j = 0; j < clist; j++) {
3666                                 NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
3667                                     NFSX_UNSIGNED);
3668                                 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3669                                 rcalls = fxdr_unsigned(int, *tl);
3670                                 for (k = 0; k < rcalls; k++) {
3671                                         NFSM_DISSECT(tl, uint32_t *,
3672                                             2 * NFSX_UNSIGNED);
3673                                 }
3674                         }
3675                         NFSLOCKCLSTATE();
3676                         if (i == 0) {
3677                                 clp = nfscl_getclntsess(sessionid);
3678                                 if (clp == NULL)
3679                                         error = NFSERR_SERVERFAULT;
3680                         } else
3681                                 error = NFSERR_SEQUENCEPOS;
3682                         if (error == 0) {
3683                                 tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3684                                 error = nfsv4_seqsession(seqid, slotid,
3685                                     highslot, tsep->nfsess_cbslots, &rep,
3686                                     tsep->nfsess_backslots);
3687                         }
3688                         NFSUNLOCKCLSTATE();
3689                         if (error == 0 || error == NFSERR_REPLYFROMCACHE) {
3690                                 gotseq_ok = 1;
3691                                 if (rep != NULL) {
3692                                         /*
3693                                          * Handle a reply for a retried
3694                                          * callback.  The reply will be
3695                                          * re-inserted in the session cache
3696                                          * by the nfsv4_seqsess_cacherep() call
3697                                          * after out:
3698                                          */
3699                                         KASSERT(error == NFSERR_REPLYFROMCACHE,
3700                                             ("cbsequence: non-NULL rep"));
3701                                         NFSCL_DEBUG(4, "Got cbretry\n");
3702                                         m_freem(nd->nd_mreq);
3703                                         nd->nd_mreq = rep;
3704                                         rep = NULL;
3705                                         goto out;
3706                                 }
3707                                 NFSM_BUILD(tl, uint32_t *,
3708                                     NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED);
3709                                 bcopy(sessionid, tl, NFSX_V4SESSIONID);
3710                                 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3711                                 *tl++ = txdr_unsigned(seqid);
3712                                 *tl++ = txdr_unsigned(slotid);
3713                                 *tl++ = txdr_unsigned(NFSV4_CBSLOTS - 1);
3714                                 *tl = txdr_unsigned(NFSV4_CBSLOTS - 1);
3715                         }
3716                         break;
3717                 default:
3718                         if (i == 0 && minorvers == NFSV41_MINORVERSION)
3719                                 error = NFSERR_OPNOTINSESS;
3720                         else {
3721                                 NFSCL_DEBUG(1, "unsupp callback %d\n", op);
3722                                 error = NFSERR_NOTSUPP;
3723                         }
3724                         break;
3725                 }
3726                 if (error) {
3727                         if (error == EBADRPC || error == NFSERR_BADXDR) {
3728                                 nd->nd_repstat = NFSERR_BADXDR;
3729                         } else {
3730                                 nd->nd_repstat = error;
3731                         }
3732                         error = 0;
3733                 }
3734                 retops++;
3735                 if (nd->nd_repstat) {
3736                         *repp = nfscl_errmap(nd, minorvers);
3737                         break;
3738                 } else
3739                         *repp = 0;      /* NFS4_OK */
3740         }
3741 nfsmout:
3742         if (recallp != NULL)
3743                 free(recallp, M_NFSLAYRECALL);
3744         if (error) {
3745                 if (error == EBADRPC || error == NFSERR_BADXDR)
3746                         nd->nd_repstat = NFSERR_BADXDR;
3747                 else
3748                         printf("nfsv4 comperr1=%d\n", error);
3749         }
3750         if (taglen == -1) {
3751                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3752                 *tl++ = 0;
3753                 *tl = 0;
3754         } else {
3755                 *retopsp = txdr_unsigned(retops);
3756         }
3757         *nd->nd_errp = nfscl_errmap(nd, minorvers);
3758 out:
3759         if (gotseq_ok != 0) {
3760                 rep = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
3761                 NFSLOCKCLSTATE();
3762                 clp = nfscl_getclntsess(sessionid);
3763                 if (clp != NULL) {
3764                         tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3765                         nfsv4_seqsess_cacherep(slotid, tsep->nfsess_cbslots,
3766                             NFSERR_OK, &rep);
3767                         NFSUNLOCKCLSTATE();
3768                 } else {
3769                         NFSUNLOCKCLSTATE();
3770                         m_freem(rep);
3771                 }
3772         }
3773 }
3774
3775 /*
3776  * Generate the next cbident value. Basically just increment a static value
3777  * and then check that it isn't already in the list, if it has wrapped around.
3778  */
3779 static u_int32_t
3780 nfscl_nextcbident(void)
3781 {
3782         struct nfsclclient *clp;
3783         int matched;
3784         static u_int32_t nextcbident = 0;
3785         static int haswrapped = 0;
3786
3787         nextcbident++;
3788         if (nextcbident == 0)
3789                 haswrapped = 1;
3790         if (haswrapped) {
3791                 /*
3792                  * Search the clientid list for one already using this cbident.
3793                  */
3794                 do {
3795                         matched = 0;
3796                         NFSLOCKCLSTATE();
3797                         LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3798                                 if (clp->nfsc_cbident == nextcbident) {
3799                                         matched = 1;
3800                                         break;
3801                                 }
3802                         }
3803                         NFSUNLOCKCLSTATE();
3804                         if (matched == 1)
3805                                 nextcbident++;
3806                 } while (matched);
3807         }
3808         return (nextcbident);
3809 }
3810
3811 /*
3812  * Get the mount point related to a given cbident or session and busy it.
3813  */
3814 static mount_t
3815 nfscl_getmnt(int minorvers, uint8_t *sessionid, u_int32_t cbident,
3816     struct nfsclclient **clpp)
3817 {
3818         struct nfsclclient *clp;
3819         mount_t mp;
3820         int error;
3821         struct nfsclsession *tsep;
3822
3823         *clpp = NULL;
3824         NFSLOCKCLSTATE();
3825         LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3826                 tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3827                 if (minorvers == NFSV4_MINORVERSION) {
3828                         if (clp->nfsc_cbident == cbident)
3829                                 break;
3830                 } else if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
3831                     NFSX_V4SESSIONID))
3832                         break;
3833         }
3834         if (clp == NULL) {
3835                 NFSUNLOCKCLSTATE();
3836                 return (NULL);
3837         }
3838         mp = clp->nfsc_nmp->nm_mountp;
3839         vfs_ref(mp);
3840         NFSUNLOCKCLSTATE();
3841         error = vfs_busy(mp, 0);
3842         vfs_rel(mp);
3843         if (error != 0)
3844                 return (NULL);
3845         *clpp = clp;
3846         return (mp);
3847 }
3848
3849 /*
3850  * Get the clientid pointer related to a given cbident.
3851  */
3852 static struct nfsclclient *
3853 nfscl_getclnt(u_int32_t cbident)
3854 {
3855         struct nfsclclient *clp;
3856
3857         LIST_FOREACH(clp, &nfsclhead, nfsc_list)
3858                 if (clp->nfsc_cbident == cbident)
3859                         break;
3860         return (clp);
3861 }
3862
3863 /*
3864  * Get the clientid pointer related to a given sessionid.
3865  */
3866 static struct nfsclclient *
3867 nfscl_getclntsess(uint8_t *sessionid)
3868 {
3869         struct nfsclclient *clp;
3870         struct nfsclsession *tsep;
3871
3872         LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3873                 tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3874                 if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
3875                     NFSX_V4SESSIONID))
3876                         break;
3877         }
3878         return (clp);
3879 }
3880
3881 /*
3882  * Search for a lock conflict locally on the client. A conflict occurs if
3883  * - not same owner and overlapping byte range and at least one of them is
3884  *   a write lock or this is an unlock.
3885  */
3886 static int
3887 nfscl_localconflict(struct nfsclclient *clp, u_int8_t *fhp, int fhlen,
3888     struct nfscllock *nlop, u_int8_t *own, struct nfscldeleg *dp,
3889     struct nfscllock **lopp)
3890 {
3891         struct nfsclowner *owp;
3892         struct nfsclopen *op;
3893         int ret;
3894
3895         if (dp != NULL) {
3896                 ret = nfscl_checkconflict(&dp->nfsdl_lock, nlop, own, lopp);
3897                 if (ret)
3898                         return (ret);
3899         }
3900         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3901                 LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3902                         if (op->nfso_fhlen == fhlen &&
3903                             !NFSBCMP(op->nfso_fh, fhp, fhlen)) {
3904                                 ret = nfscl_checkconflict(&op->nfso_lock, nlop,
3905                                     own, lopp);
3906                                 if (ret)
3907                                         return (ret);
3908                         }
3909                 }
3910         }
3911         return (0);
3912 }
3913
3914 static int
3915 nfscl_checkconflict(struct nfscllockownerhead *lhp, struct nfscllock *nlop,
3916     u_int8_t *own, struct nfscllock **lopp)
3917 {
3918         struct nfscllockowner *lp;
3919         struct nfscllock *lop;
3920
3921         LIST_FOREACH(lp, lhp, nfsl_list) {
3922                 if (NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) {
3923                         LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3924                                 if (lop->nfslo_first >= nlop->nfslo_end)
3925                                         break;
3926                                 if (lop->nfslo_end <= nlop->nfslo_first)
3927                                         continue;
3928                                 if (lop->nfslo_type == F_WRLCK ||
3929                                     nlop->nfslo_type == F_WRLCK ||
3930                                     nlop->nfslo_type == F_UNLCK) {
3931                                         if (lopp != NULL)
3932                                                 *lopp = lop;
3933                                         return (NFSERR_DENIED);
3934                                 }
3935                         }
3936                 }
3937         }
3938         return (0);
3939 }
3940
3941 /*
3942  * Check for a local conflicting lock.
3943  */
3944 int
3945 nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off,
3946     u_int64_t len, struct flock *fl, NFSPROC_T *p, void *id, int flags)
3947 {
3948         struct nfscllock *lop, nlck;
3949         struct nfscldeleg *dp;
3950         struct nfsnode *np;
3951         u_int8_t own[NFSV4CL_LOCKNAMELEN];
3952         int error;
3953
3954         nlck.nfslo_type = fl->l_type;
3955         nlck.nfslo_first = off;
3956         if (len == NFS64BITSSET) {
3957                 nlck.nfslo_end = NFS64BITSSET;
3958         } else {
3959                 nlck.nfslo_end = off + len;
3960                 if (nlck.nfslo_end <= nlck.nfslo_first)
3961                         return (NFSERR_INVAL);
3962         }
3963         np = VTONFS(vp);
3964         nfscl_filllockowner(id, own, flags);
3965         NFSLOCKCLSTATE();
3966         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
3967         error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
3968             &nlck, own, dp, &lop);
3969         if (error != 0) {
3970                 fl->l_whence = SEEK_SET;
3971                 fl->l_start = lop->nfslo_first;
3972                 if (lop->nfslo_end == NFS64BITSSET)
3973                         fl->l_len = 0;
3974                 else
3975                         fl->l_len = lop->nfslo_end - lop->nfslo_first;
3976                 fl->l_pid = (pid_t)0;
3977                 fl->l_type = lop->nfslo_type;
3978                 error = -1;                     /* no RPC required */
3979         } else if (dp != NULL && ((dp->nfsdl_flags & NFSCLDL_WRITE) ||
3980             fl->l_type == F_RDLCK)) {
3981                 /*
3982                  * The delegation ensures that there isn't a conflicting
3983                  * lock on the server, so return -1 to indicate an RPC
3984                  * isn't required.
3985                  */
3986                 fl->l_type = F_UNLCK;
3987                 error = -1;
3988         }
3989         NFSUNLOCKCLSTATE();
3990         return (error);
3991 }
3992
3993 /*
3994  * Handle Recall of a delegation.
3995  * The clp must be exclusive locked when this is called.
3996  */
3997 static int
3998 nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp,
3999     struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4000     int called_from_renewthread, vnode_t *vpp)
4001 {
4002         struct nfsclowner *owp, *lowp, *nowp;
4003         struct nfsclopen *op, *lop;
4004         struct nfscllockowner *lp;
4005         struct nfscllock *lckp;
4006         struct nfsnode *np;
4007         int error = 0, ret;
4008
4009         if (vp == NULL) {
4010                 KASSERT(vpp != NULL, ("nfscl_recalldeleg: vpp NULL"));
4011                 *vpp = NULL;
4012                 /*
4013                  * First, get a vnode for the file. This is needed to do RPCs.
4014                  */
4015                 ret = nfscl_ngetreopen(nmp->nm_mountp, dp->nfsdl_fh,
4016                     dp->nfsdl_fhlen, p, &np);
4017                 if (ret) {
4018                         /*
4019                          * File isn't open, so nothing to move over to the
4020                          * server.
4021                          */
4022                         return (0);
4023                 }
4024                 vp = NFSTOV(np);
4025                 *vpp = vp;
4026         } else {
4027                 np = VTONFS(vp);
4028         }
4029         dp->nfsdl_flags &= ~NFSCLDL_MODTIMESET;
4030
4031         /*
4032          * Ok, if it's a write delegation, flush data to the server, so
4033          * that close/open consistency is retained.
4034          */
4035         ret = 0;
4036         NFSLOCKNODE(np);
4037         if ((dp->nfsdl_flags & NFSCLDL_WRITE) && (np->n_flag & NMODIFIED)) {
4038                 np->n_flag |= NDELEGRECALL;
4039                 NFSUNLOCKNODE(np);
4040                 ret = ncl_flush(vp, MNT_WAIT, p, 1, called_from_renewthread);
4041                 NFSLOCKNODE(np);
4042                 np->n_flag &= ~NDELEGRECALL;
4043         }
4044         NFSINVALATTRCACHE(np);
4045         NFSUNLOCKNODE(np);
4046         if (ret == EIO && called_from_renewthread != 0) {
4047                 /*
4048                  * If the flush failed with EIO for the renew thread,
4049                  * return now, so that the dirty buffer will be flushed
4050                  * later.
4051                  */
4052                 return (ret);
4053         }
4054
4055         /*
4056          * Now, for each openowner with opens issued locally, move them
4057          * over to state against the server.
4058          */
4059         LIST_FOREACH(lowp, &dp->nfsdl_owner, nfsow_list) {
4060                 lop = LIST_FIRST(&lowp->nfsow_open);
4061                 if (lop != NULL) {
4062                         if (LIST_NEXT(lop, nfso_list) != NULL)
4063                                 panic("nfsdlg mult opens");
4064                         /*
4065                          * Look for the same openowner against the server.
4066                          */
4067                         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
4068                                 if (!NFSBCMP(lowp->nfsow_owner,
4069                                     owp->nfsow_owner, NFSV4CL_LOCKNAMELEN)) {
4070                                         newnfs_copycred(&dp->nfsdl_cred, cred);
4071                                         ret = nfscl_moveopen(vp, clp, nmp, lop,
4072                                             owp, dp, cred, p);
4073                                         if (ret == NFSERR_STALECLIENTID ||
4074                                             ret == NFSERR_STALEDONTRECOVER ||
4075                                             ret == NFSERR_BADSESSION)
4076                                                 return (ret);
4077                                         if (ret) {
4078                                                 nfscl_freeopen(lop, 1);
4079                                                 if (!error)
4080                                                         error = ret;
4081                                         }
4082                                         break;
4083                                 }
4084                         }
4085
4086                         /*
4087                          * If no openowner found, create one and get an open
4088                          * for it.
4089                          */
4090                         if (owp == NULL) {
4091                                 nowp = malloc(
4092                                     sizeof (struct nfsclowner), M_NFSCLOWNER,
4093                                     M_WAITOK);
4094                                 nfscl_newopen(clp, NULL, &owp, &nowp, &op, 
4095                                     NULL, lowp->nfsow_owner, dp->nfsdl_fh,
4096                                     dp->nfsdl_fhlen, NULL, NULL);
4097                                 newnfs_copycred(&dp->nfsdl_cred, cred);
4098                                 ret = nfscl_moveopen(vp, clp, nmp, lop,
4099                                     owp, dp, cred, p);
4100                                 if (ret) {
4101                                         nfscl_freeopenowner(owp, 0);
4102                                         if (ret == NFSERR_STALECLIENTID ||
4103                                             ret == NFSERR_STALEDONTRECOVER ||
4104                                             ret == NFSERR_BADSESSION)
4105                                                 return (ret);
4106                                         if (ret) {
4107                                                 nfscl_freeopen(lop, 1);
4108                                                 if (!error)
4109                                                         error = ret;
4110                                         }
4111                                 }
4112                         }
4113                 }
4114         }
4115
4116         /*
4117          * Now, get byte range locks for any locks done locally.
4118          */
4119         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4120                 LIST_FOREACH(lckp, &lp->nfsl_lock, nfslo_list) {
4121                         newnfs_copycred(&dp->nfsdl_cred, cred);
4122                         ret = nfscl_relock(vp, clp, nmp, lp, lckp, cred, p);
4123                         if (ret == NFSERR_STALESTATEID ||
4124                             ret == NFSERR_STALEDONTRECOVER ||
4125                             ret == NFSERR_STALECLIENTID ||
4126                             ret == NFSERR_BADSESSION)
4127                                 return (ret);
4128                         if (ret && !error)
4129                                 error = ret;
4130                 }
4131         }
4132         return (error);
4133 }
4134
4135 /*
4136  * Move a locally issued open over to an owner on the state list.
4137  * SIDE EFFECT: If it needs to sleep (do an rpc), it unlocks clstate and
4138  * returns with it unlocked.
4139  */
4140 static int
4141 nfscl_moveopen(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
4142     struct nfsclopen *lop, struct nfsclowner *owp, struct nfscldeleg *dp,
4143     struct ucred *cred, NFSPROC_T *p)
4144 {
4145         struct nfsclopen *op, *nop;
4146         struct nfscldeleg *ndp;
4147         struct nfsnode *np;
4148         int error = 0, newone;
4149
4150         /*
4151          * First, look for an appropriate open, If found, just increment the
4152          * opencnt in it.
4153          */
4154         LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
4155                 if ((op->nfso_mode & lop->nfso_mode) == lop->nfso_mode &&
4156                     op->nfso_fhlen == lop->nfso_fhlen &&
4157                     !NFSBCMP(op->nfso_fh, lop->nfso_fh, op->nfso_fhlen)) {
4158                         op->nfso_opencnt += lop->nfso_opencnt;
4159                         nfscl_freeopen(lop, 1);
4160                         return (0);
4161                 }
4162         }
4163
4164         /* No appropriate open, so we have to do one against the server. */
4165         np = VTONFS(vp);
4166         nop = malloc(sizeof (struct nfsclopen) +
4167             lop->nfso_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
4168         newone = 0;
4169         nfscl_newopen(clp, NULL, &owp, NULL, &op, &nop, owp->nfsow_owner,
4170             lop->nfso_fh, lop->nfso_fhlen, cred, &newone);
4171         ndp = dp;
4172         error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen,
4173             lop->nfso_fh, lop->nfso_fhlen, lop->nfso_mode, op,
4174             NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &ndp, 0, 0, cred, p);
4175         if (error) {
4176                 if (newone)
4177                         nfscl_freeopen(op, 0);
4178         } else {
4179                 op->nfso_mode |= lop->nfso_mode;
4180                 op->nfso_opencnt += lop->nfso_opencnt;
4181                 nfscl_freeopen(lop, 1);
4182         }
4183         if (nop != NULL)
4184                 free(nop, M_NFSCLOPEN);
4185         if (ndp != NULL) {
4186                 /*
4187                  * What should I do with the returned delegation, since the
4188                  * delegation is being recalled? For now, just printf and
4189                  * through it away.
4190                  */
4191                 printf("Moveopen returned deleg\n");
4192                 free(ndp, M_NFSCLDELEG);
4193         }
4194         return (error);
4195 }
4196
4197 /*
4198  * Recall all delegations on this client.
4199  */
4200 static void
4201 nfscl_totalrecall(struct nfsclclient *clp)
4202 {
4203         struct nfscldeleg *dp;
4204
4205         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
4206                 if ((dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0)
4207                         dp->nfsdl_flags |= NFSCLDL_RECALL;
4208         }
4209 }
4210
4211 /*
4212  * Relock byte ranges. Called for delegation recall and state expiry.
4213  */
4214 static int
4215 nfscl_relock(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
4216     struct nfscllockowner *lp, struct nfscllock *lop, struct ucred *cred,
4217     NFSPROC_T *p)
4218 {
4219         struct nfscllockowner *nlp;
4220         struct nfsfh *nfhp;
4221         u_int64_t off, len;
4222         int error, newone, donelocally;
4223
4224         off = lop->nfslo_first;
4225         len = lop->nfslo_end - lop->nfslo_first;
4226         error = nfscl_getbytelock(vp, off, len, lop->nfslo_type, cred, p,
4227             clp, 1, NULL, lp->nfsl_lockflags, lp->nfsl_owner,
4228             lp->nfsl_openowner, &nlp, &newone, &donelocally);
4229         if (error || donelocally)
4230                 return (error);
4231         nfhp = VTONFS(vp)->n_fhp;
4232         error = nfscl_trylock(nmp, vp, nfhp->nfh_fh,
4233             nfhp->nfh_len, nlp, newone, 0, off,
4234             len, lop->nfslo_type, cred, p);
4235         if (error)
4236                 nfscl_freelockowner(nlp, 0);
4237         return (error);
4238 }
4239
4240 /*
4241  * Called to re-open a file. Basically get a vnode for the file handle
4242  * and then call nfsrpc_openrpc() to do the rest.
4243  */
4244 static int
4245 nfsrpc_reopen(struct nfsmount *nmp, u_int8_t *fhp, int fhlen,
4246     u_int32_t mode, struct nfsclopen *op, struct nfscldeleg **dpp,
4247     struct ucred *cred, NFSPROC_T *p)
4248 {
4249         struct nfsnode *np;
4250         vnode_t vp;
4251         int error;
4252
4253         error = nfscl_ngetreopen(nmp->nm_mountp, fhp, fhlen, p, &np);
4254         if (error)
4255                 return (error);
4256         vp = NFSTOV(np);
4257         if (np->n_v4 != NULL) {
4258                 error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data,
4259                     np->n_v4->n4_fhlen, fhp, fhlen, mode, op,
4260                     NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, dpp, 0, 0,
4261                     cred, p);
4262         } else {
4263                 error = EINVAL;
4264         }
4265         vrele(vp);
4266         return (error);
4267 }
4268
4269 /*
4270  * Try an open against the server. Just call nfsrpc_openrpc(), retrying while
4271  * NFSERR_DELAY. Also, try system credentials, if the passed in credentials
4272  * fail.
4273  */
4274 static int
4275 nfscl_tryopen(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
4276     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
4277     u_int8_t *name, int namelen, struct nfscldeleg **ndpp,
4278     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p)
4279 {
4280         int error;
4281
4282         do {
4283                 error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp, newfhlen,
4284                     mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p,
4285                     0, 0);
4286                 if (error == NFSERR_DELAY)
4287                         (void) nfs_catnap(PZERO, error, "nfstryop");
4288         } while (error == NFSERR_DELAY);
4289         if (error == EAUTH || error == EACCES) {
4290                 /* Try again using system credentials */
4291                 newnfs_setroot(cred);
4292                 do {
4293                     error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp,
4294                         newfhlen, mode, op, name, namelen, ndpp, reclaim,
4295                         delegtype, cred, p, 1, 0);
4296                     if (error == NFSERR_DELAY)
4297                         (void) nfs_catnap(PZERO, error, "nfstryop");
4298                 } while (error == NFSERR_DELAY);
4299         }
4300         return (error);
4301 }
4302
4303 /*
4304  * Try a byte range lock. Just loop on nfsrpc_lock() while it returns
4305  * NFSERR_DELAY. Also, retry with system credentials, if the provided
4306  * cred don't work.
4307  */
4308 static int
4309 nfscl_trylock(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp,
4310     int fhlen, struct nfscllockowner *nlp, int newone, int reclaim,
4311     u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p)
4312 {
4313         struct nfsrv_descript nfsd, *nd = &nfsd;
4314         int error;
4315
4316         do {
4317                 error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone,
4318                     reclaim, off, len, type, cred, p, 0);
4319                 if (!error && nd->nd_repstat == NFSERR_DELAY)
4320                         (void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4321                             "nfstrylck");
4322         } while (!error && nd->nd_repstat == NFSERR_DELAY);
4323         if (!error)
4324                 error = nd->nd_repstat;
4325         if (error == EAUTH || error == EACCES) {
4326                 /* Try again using root credentials */
4327                 newnfs_setroot(cred);
4328                 do {
4329                         error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp,
4330                             newone, reclaim, off, len, type, cred, p, 1);
4331                         if (!error && nd->nd_repstat == NFSERR_DELAY)
4332                                 (void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4333                                     "nfstrylck");
4334                 } while (!error && nd->nd_repstat == NFSERR_DELAY);
4335                 if (!error)
4336                         error = nd->nd_repstat;
4337         }
4338         return (error);
4339 }
4340
4341 /*
4342  * Try a delegreturn against the server. Just call nfsrpc_delegreturn(),
4343  * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in
4344  * credentials fail.
4345  */
4346 static int
4347 nfscl_trydelegreturn(struct nfscldeleg *dp, struct ucred *cred,
4348     struct nfsmount *nmp, NFSPROC_T *p)
4349 {
4350         int error;
4351
4352         do {
4353                 error = nfsrpc_delegreturn(dp, cred, nmp, p, 0);
4354                 if (error == NFSERR_DELAY)
4355                         (void) nfs_catnap(PZERO, error, "nfstrydp");
4356         } while (error == NFSERR_DELAY);
4357         if (error == EAUTH || error == EACCES) {
4358                 /* Try again using system credentials */
4359                 newnfs_setroot(cred);
4360                 do {
4361                         error = nfsrpc_delegreturn(dp, cred, nmp, p, 1);
4362                         if (error == NFSERR_DELAY)
4363                                 (void) nfs_catnap(PZERO, error, "nfstrydp");
4364                 } while (error == NFSERR_DELAY);
4365         }
4366         return (error);
4367 }
4368
4369 /*
4370  * Try a close against the server. Just call nfsrpc_closerpc(),
4371  * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in
4372  * credentials fail.
4373  */
4374 int
4375 nfscl_tryclose(struct nfsclopen *op, struct ucred *cred,
4376     struct nfsmount *nmp, NFSPROC_T *p)
4377 {
4378         struct nfsrv_descript nfsd, *nd = &nfsd;
4379         int error;
4380
4381         do {
4382                 error = nfsrpc_closerpc(nd, nmp, op, cred, p, 0);
4383                 if (error == NFSERR_DELAY)
4384                         (void) nfs_catnap(PZERO, error, "nfstrycl");
4385         } while (error == NFSERR_DELAY);
4386         if (error == EAUTH || error == EACCES) {
4387                 /* Try again using system credentials */
4388                 newnfs_setroot(cred);
4389                 do {
4390                         error = nfsrpc_closerpc(nd, nmp, op, cred, p, 1);
4391                         if (error == NFSERR_DELAY)
4392                                 (void) nfs_catnap(PZERO, error, "nfstrycl");
4393                 } while (error == NFSERR_DELAY);
4394         }
4395         return (error);
4396 }
4397
4398 /*
4399  * Decide if a delegation on a file permits close without flushing writes
4400  * to the server. This might be a big performance win in some environments.
4401  * (Not useful until the client does caching on local stable storage.)
4402  */
4403 int
4404 nfscl_mustflush(vnode_t vp)
4405 {
4406         struct nfsclclient *clp;
4407         struct nfscldeleg *dp;
4408         struct nfsnode *np;
4409         struct nfsmount *nmp;
4410
4411         np = VTONFS(vp);
4412         nmp = VFSTONFS(vnode_mount(vp));
4413         if (!NFSHASNFSV4(nmp))
4414                 return (1);
4415         NFSLOCKCLSTATE();
4416         clp = nfscl_findcl(nmp);
4417         if (clp == NULL) {
4418                 NFSUNLOCKCLSTATE();
4419                 return (1);
4420         }
4421         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4422         if (dp != NULL && (dp->nfsdl_flags &
4423             (NFSCLDL_WRITE | NFSCLDL_RECALL | NFSCLDL_DELEGRET)) ==
4424              NFSCLDL_WRITE &&
4425             (dp->nfsdl_sizelimit >= np->n_size ||
4426              !NFSHASSTRICT3530(nmp))) {
4427                 NFSUNLOCKCLSTATE();
4428                 return (0);
4429         }
4430         NFSUNLOCKCLSTATE();
4431         return (1);
4432 }
4433
4434 /*
4435  * See if a (write) delegation exists for this file.
4436  */
4437 int
4438 nfscl_nodeleg(vnode_t vp, int writedeleg)
4439 {
4440         struct nfsclclient *clp;
4441         struct nfscldeleg *dp;
4442         struct nfsnode *np;
4443         struct nfsmount *nmp;
4444
4445         np = VTONFS(vp);
4446         nmp = VFSTONFS(vnode_mount(vp));
4447         if (!NFSHASNFSV4(nmp))
4448                 return (1);
4449         NFSLOCKCLSTATE();
4450         clp = nfscl_findcl(nmp);
4451         if (clp == NULL) {
4452                 NFSUNLOCKCLSTATE();
4453                 return (1);
4454         }
4455         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4456         if (dp != NULL &&
4457             (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == 0 &&
4458             (writedeleg == 0 || (dp->nfsdl_flags & NFSCLDL_WRITE) ==
4459              NFSCLDL_WRITE)) {
4460                 NFSUNLOCKCLSTATE();
4461                 return (0);
4462         }
4463         NFSUNLOCKCLSTATE();
4464         return (1);
4465 }
4466
4467 /*
4468  * Look for an associated delegation that should be DelegReturned.
4469  */
4470 int
4471 nfscl_removedeleg(vnode_t vp, NFSPROC_T *p, nfsv4stateid_t *stp)
4472 {
4473         struct nfsclclient *clp;
4474         struct nfscldeleg *dp;
4475         struct nfsclowner *owp;
4476         struct nfscllockowner *lp;
4477         struct nfsmount *nmp;
4478         struct ucred *cred;
4479         struct nfsnode *np;
4480         int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept;
4481
4482         nmp = VFSTONFS(vnode_mount(vp));
4483         np = VTONFS(vp);
4484         NFSLOCKCLSTATE();
4485         /*
4486          * Loop around waiting for:
4487          * - outstanding I/O operations on delegations to complete
4488          * - for a delegation on vp that has state, lock the client and
4489          *   do a recall
4490          * - return delegation with no state
4491          */
4492         while (1) {
4493                 clp = nfscl_findcl(nmp);
4494                 if (clp == NULL) {
4495                         NFSUNLOCKCLSTATE();
4496                         return (retcnt);
4497                 }
4498                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4499                     np->n_fhp->nfh_len);
4500                 if (dp != NULL) {
4501                     /*
4502                      * Wait for outstanding I/O ops to be done.
4503                      */
4504                     if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4505                         if (igotlock) {
4506                             nfsv4_unlock(&clp->nfsc_lock, 0);
4507                             igotlock = 0;
4508                         }
4509                         dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4510                         (void) nfsmsleep(&dp->nfsdl_rwlock,
4511                             NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4512                         continue;
4513                     }
4514                     needsrecall = 0;
4515                     LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4516                         if (!LIST_EMPTY(&owp->nfsow_open)) {
4517                             needsrecall = 1;
4518                             break;
4519                         }
4520                     }
4521                     if (!needsrecall) {
4522                         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4523                             if (!LIST_EMPTY(&lp->nfsl_lock)) {
4524                                 needsrecall = 1;
4525                                 break;
4526                             }
4527                         }
4528                     }
4529                     if (needsrecall && !triedrecall) {
4530                         dp->nfsdl_flags |= NFSCLDL_DELEGRET;
4531                         islept = 0;
4532                         while (!igotlock) {
4533                             igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
4534                                 &islept, NFSCLSTATEMUTEXPTR, NULL);
4535                             if (islept)
4536                                 break;
4537                         }
4538                         if (islept)
4539                             continue;
4540                         NFSUNLOCKCLSTATE();
4541                         cred = newnfs_getcred();
4542                         newnfs_copycred(&dp->nfsdl_cred, cred);
4543                         nfscl_recalldeleg(clp, nmp, dp, vp, cred, p, 0, NULL);
4544                         NFSFREECRED(cred);
4545                         triedrecall = 1;
4546                         NFSLOCKCLSTATE();
4547                         nfsv4_unlock(&clp->nfsc_lock, 0);
4548                         igotlock = 0;
4549                         continue;
4550                     }
4551                     *stp = dp->nfsdl_stateid;
4552                     retcnt = 1;
4553                     nfscl_cleandeleg(dp);
4554                     nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4555                 }
4556                 if (igotlock)
4557                     nfsv4_unlock(&clp->nfsc_lock, 0);
4558                 NFSUNLOCKCLSTATE();
4559                 return (retcnt);
4560         }
4561 }
4562
4563 /*
4564  * Look for associated delegation(s) that should be DelegReturned.
4565  */
4566 int
4567 nfscl_renamedeleg(vnode_t fvp, nfsv4stateid_t *fstp, int *gotfdp, vnode_t tvp,
4568     nfsv4stateid_t *tstp, int *gottdp, NFSPROC_T *p)
4569 {
4570         struct nfsclclient *clp;
4571         struct nfscldeleg *dp;
4572         struct nfsclowner *owp;
4573         struct nfscllockowner *lp;
4574         struct nfsmount *nmp;
4575         struct ucred *cred;
4576         struct nfsnode *np;
4577         int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept;
4578
4579         nmp = VFSTONFS(vnode_mount(fvp));
4580         *gotfdp = 0;
4581         *gottdp = 0;
4582         NFSLOCKCLSTATE();
4583         /*
4584          * Loop around waiting for:
4585          * - outstanding I/O operations on delegations to complete
4586          * - for a delegation on fvp that has state, lock the client and
4587          *   do a recall
4588          * - return delegation(s) with no state.
4589          */
4590         while (1) {
4591                 clp = nfscl_findcl(nmp);
4592                 if (clp == NULL) {
4593                         NFSUNLOCKCLSTATE();
4594                         return (retcnt);
4595                 }
4596                 np = VTONFS(fvp);
4597                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4598                     np->n_fhp->nfh_len);
4599                 if (dp != NULL && *gotfdp == 0) {
4600                     /*
4601                      * Wait for outstanding I/O ops to be done.
4602                      */
4603                     if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4604                         if (igotlock) {
4605                             nfsv4_unlock(&clp->nfsc_lock, 0);
4606                             igotlock = 0;
4607                         }
4608                         dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4609                         (void) nfsmsleep(&dp->nfsdl_rwlock,
4610                             NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4611                         continue;
4612                     }
4613                     needsrecall = 0;
4614                     LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4615                         if (!LIST_EMPTY(&owp->nfsow_open)) {
4616                             needsrecall = 1;
4617                             break;
4618                         }
4619                     }
4620                     if (!needsrecall) {
4621                         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4622                             if (!LIST_EMPTY(&lp->nfsl_lock)) {
4623                                 needsrecall = 1;
4624                                 break;
4625                             }
4626                         }
4627                     }
4628                     if (needsrecall && !triedrecall) {
4629                         dp->nfsdl_flags |= NFSCLDL_DELEGRET;
4630                         islept = 0;
4631                         while (!igotlock) {
4632                             igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
4633                                 &islept, NFSCLSTATEMUTEXPTR, NULL);
4634                             if (islept)
4635                                 break;
4636                         }
4637                         if (islept)
4638                             continue;
4639                         NFSUNLOCKCLSTATE();
4640                         cred = newnfs_getcred();
4641                         newnfs_copycred(&dp->nfsdl_cred, cred);
4642                         nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p, 0, NULL);
4643                         NFSFREECRED(cred);
4644                         triedrecall = 1;
4645                         NFSLOCKCLSTATE();
4646                         nfsv4_unlock(&clp->nfsc_lock, 0);
4647                         igotlock = 0;
4648                         continue;
4649                     }
4650                     *fstp = dp->nfsdl_stateid;
4651                     retcnt++;
4652                     *gotfdp = 1;
4653                     nfscl_cleandeleg(dp);
4654                     nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4655                 }
4656                 if (igotlock) {
4657                     nfsv4_unlock(&clp->nfsc_lock, 0);
4658                     igotlock = 0;
4659                 }
4660                 if (tvp != NULL) {
4661                     np = VTONFS(tvp);
4662                     dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4663                         np->n_fhp->nfh_len);
4664                     if (dp != NULL && *gottdp == 0) {
4665                         /*
4666                          * Wait for outstanding I/O ops to be done.
4667                          */
4668                         if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4669                             dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4670                             (void) nfsmsleep(&dp->nfsdl_rwlock,
4671                                 NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4672                             continue;
4673                         }
4674                         LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4675                             if (!LIST_EMPTY(&owp->nfsow_open)) {
4676                                 NFSUNLOCKCLSTATE();
4677                                 return (retcnt);
4678                             }
4679                         }
4680                         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4681                             if (!LIST_EMPTY(&lp->nfsl_lock)) {
4682                                 NFSUNLOCKCLSTATE();
4683                                 return (retcnt);
4684                             }
4685                         }
4686                         *tstp = dp->nfsdl_stateid;
4687                         retcnt++;
4688                         *gottdp = 1;
4689                         nfscl_cleandeleg(dp);
4690                         nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4691                     }
4692                 }
4693                 NFSUNLOCKCLSTATE();
4694                 return (retcnt);
4695         }
4696 }
4697
4698 /*
4699  * Get a reference on the clientid associated with the mount point.
4700  * Return 1 if success, 0 otherwise.
4701  */
4702 int
4703 nfscl_getref(struct nfsmount *nmp)
4704 {
4705         struct nfsclclient *clp;
4706
4707         NFSLOCKCLSTATE();
4708         clp = nfscl_findcl(nmp);
4709         if (clp == NULL) {
4710                 NFSUNLOCKCLSTATE();
4711                 return (0);
4712         }
4713         nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, NULL);
4714         NFSUNLOCKCLSTATE();
4715         return (1);
4716 }
4717
4718 /*
4719  * Release a reference on a clientid acquired with the above call.
4720  */
4721 void
4722 nfscl_relref(struct nfsmount *nmp)
4723 {
4724         struct nfsclclient *clp;
4725
4726         NFSLOCKCLSTATE();
4727         clp = nfscl_findcl(nmp);
4728         if (clp == NULL) {
4729                 NFSUNLOCKCLSTATE();
4730                 return;
4731         }
4732         nfsv4_relref(&clp->nfsc_lock);
4733         NFSUNLOCKCLSTATE();
4734 }
4735
4736 /*
4737  * Save the size attribute in the delegation, since the nfsnode
4738  * is going away.
4739  */
4740 void
4741 nfscl_reclaimnode(vnode_t vp)
4742 {
4743         struct nfsclclient *clp;
4744         struct nfscldeleg *dp;
4745         struct nfsnode *np = VTONFS(vp);
4746         struct nfsmount *nmp;
4747
4748         nmp = VFSTONFS(vnode_mount(vp));
4749         if (!NFSHASNFSV4(nmp))
4750                 return;
4751         NFSLOCKCLSTATE();
4752         clp = nfscl_findcl(nmp);
4753         if (clp == NULL) {
4754                 NFSUNLOCKCLSTATE();
4755                 return;
4756         }
4757         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4758         if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE))
4759                 dp->nfsdl_size = np->n_size;
4760         NFSUNLOCKCLSTATE();
4761 }
4762
4763 /*
4764  * Get the saved size attribute in the delegation, since it is a
4765  * newly allocated nfsnode.
4766  */
4767 void
4768 nfscl_newnode(vnode_t vp)
4769 {
4770         struct nfsclclient *clp;
4771         struct nfscldeleg *dp;
4772         struct nfsnode *np = VTONFS(vp);
4773         struct nfsmount *nmp;
4774
4775         nmp = VFSTONFS(vnode_mount(vp));
4776         if (!NFSHASNFSV4(nmp))
4777                 return;
4778         NFSLOCKCLSTATE();
4779         clp = nfscl_findcl(nmp);
4780         if (clp == NULL) {
4781                 NFSUNLOCKCLSTATE();
4782                 return;
4783         }
4784         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4785         if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE))
4786                 np->n_size = dp->nfsdl_size;
4787         NFSUNLOCKCLSTATE();
4788 }
4789
4790 /*
4791  * If there is a valid write delegation for this file, set the modtime
4792  * to the local clock time.
4793  */
4794 void
4795 nfscl_delegmodtime(vnode_t vp)
4796 {
4797         struct nfsclclient *clp;
4798         struct nfscldeleg *dp;
4799         struct nfsnode *np = VTONFS(vp);
4800         struct nfsmount *nmp;
4801
4802         nmp = VFSTONFS(vnode_mount(vp));
4803         if (!NFSHASNFSV4(nmp))
4804                 return;
4805         NFSLOCKCLSTATE();
4806         clp = nfscl_findcl(nmp);
4807         if (clp == NULL) {
4808                 NFSUNLOCKCLSTATE();
4809                 return;
4810         }
4811         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4812         if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) {
4813                 nanotime(&dp->nfsdl_modtime);
4814                 dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
4815         }
4816         NFSUNLOCKCLSTATE();
4817 }
4818
4819 /*
4820  * If there is a valid write delegation for this file with a modtime set,
4821  * put that modtime in mtime.
4822  */
4823 void
4824 nfscl_deleggetmodtime(vnode_t vp, struct timespec *mtime)
4825 {
4826         struct nfsclclient *clp;
4827         struct nfscldeleg *dp;
4828         struct nfsnode *np = VTONFS(vp);
4829         struct nfsmount *nmp;
4830
4831         nmp = VFSTONFS(vnode_mount(vp));
4832         if (!NFSHASNFSV4(nmp))
4833                 return;
4834         NFSLOCKCLSTATE();
4835         clp = nfscl_findcl(nmp);
4836         if (clp == NULL) {
4837                 NFSUNLOCKCLSTATE();
4838                 return;
4839         }
4840         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4841         if (dp != NULL &&
4842             (dp->nfsdl_flags & (NFSCLDL_WRITE | NFSCLDL_MODTIMESET)) ==
4843             (NFSCLDL_WRITE | NFSCLDL_MODTIMESET))
4844                 *mtime = dp->nfsdl_modtime;
4845         NFSUNLOCKCLSTATE();
4846 }
4847
4848 static int
4849 nfscl_errmap(struct nfsrv_descript *nd, u_int32_t minorvers)
4850 {
4851         short *defaulterrp, *errp;
4852
4853         if (!nd->nd_repstat)
4854                 return (0);
4855         if (nd->nd_procnum == NFSPROC_NOOP)
4856                 return (txdr_unsigned(nd->nd_repstat & 0xffff));
4857         if (nd->nd_repstat == EBADRPC)
4858                 return (txdr_unsigned(NFSERR_BADXDR));
4859         if (nd->nd_repstat == NFSERR_MINORVERMISMATCH ||
4860             nd->nd_repstat == NFSERR_OPILLEGAL)
4861                 return (txdr_unsigned(nd->nd_repstat));
4862         if (nd->nd_repstat >= NFSERR_BADIOMODE && nd->nd_repstat < 20000 &&
4863             minorvers > NFSV4_MINORVERSION) {
4864                 /* NFSv4.n error. */
4865                 return (txdr_unsigned(nd->nd_repstat));
4866         }
4867         if (nd->nd_procnum < NFSV4OP_CBNOPS)
4868                 errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum];
4869         else
4870                 return (txdr_unsigned(nd->nd_repstat));
4871         while (*++errp)
4872                 if (*errp == (short)nd->nd_repstat)
4873                         return (txdr_unsigned(nd->nd_repstat));
4874         return (txdr_unsigned(*defaulterrp));
4875 }
4876
4877 /*
4878  * Called to find/add a layout to a client.
4879  * This function returns the layout with a refcnt (shared lock) upon
4880  * success (returns 0) or with no lock/refcnt on the layout when an
4881  * error is returned.
4882  * If a layout is passed in via lypp, it is locked (exclusively locked).
4883  */
4884 int
4885 nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
4886     nfsv4stateid_t *stateidp, int layouttype, int retonclose,
4887     struct nfsclflayouthead *fhlp, struct nfscllayout **lypp,
4888     struct ucred *cred, NFSPROC_T *p)
4889 {
4890         struct nfsclclient *clp;
4891         struct nfscllayout *lyp, *tlyp;
4892         struct nfsclflayout *flp;
4893         struct nfsnode *np = VTONFS(vp);
4894         mount_t mp;
4895         int layout_passed_in;
4896
4897         mp = nmp->nm_mountp;
4898         layout_passed_in = 1;
4899         tlyp = NULL;
4900         lyp = *lypp;
4901         if (lyp == NULL) {
4902                 layout_passed_in = 0;
4903                 tlyp = malloc(sizeof(*tlyp) + fhlen - 1, M_NFSLAYOUT,
4904                     M_WAITOK | M_ZERO);
4905         }
4906
4907         NFSLOCKCLSTATE();
4908         clp = nmp->nm_clp;
4909         if (clp == NULL) {
4910                 if (layout_passed_in != 0)
4911                         nfsv4_unlock(&lyp->nfsly_lock, 0);
4912                 NFSUNLOCKCLSTATE();
4913                 if (tlyp != NULL)
4914                         free(tlyp, M_NFSLAYOUT);
4915                 return (EPERM);
4916         }
4917         if (lyp == NULL) {
4918                 /*
4919                  * Although no lyp was passed in, another thread might have
4920                  * allocated one. If one is found, just increment it's ref
4921                  * count and return it.
4922                  */
4923                 lyp = nfscl_findlayout(clp, fhp, fhlen);
4924                 if (lyp == NULL) {
4925                         lyp = tlyp;
4926                         tlyp = NULL;
4927                         lyp->nfsly_stateid.seqid = stateidp->seqid;
4928                         lyp->nfsly_stateid.other[0] = stateidp->other[0];
4929                         lyp->nfsly_stateid.other[1] = stateidp->other[1];
4930                         lyp->nfsly_stateid.other[2] = stateidp->other[2];
4931                         lyp->nfsly_lastbyte = 0;
4932                         LIST_INIT(&lyp->nfsly_flayread);
4933                         LIST_INIT(&lyp->nfsly_flayrw);
4934                         LIST_INIT(&lyp->nfsly_recall);
4935                         lyp->nfsly_filesid[0] = np->n_vattr.na_filesid[0];
4936                         lyp->nfsly_filesid[1] = np->n_vattr.na_filesid[1];
4937                         lyp->nfsly_clp = clp;
4938                         if (layouttype == NFSLAYOUT_FLEXFILE)
4939                                 lyp->nfsly_flags = NFSLY_FLEXFILE;
4940                         else
4941                                 lyp->nfsly_flags = NFSLY_FILES;
4942                         if (retonclose != 0)
4943                                 lyp->nfsly_flags |= NFSLY_RETONCLOSE;
4944                         lyp->nfsly_fhlen = fhlen;
4945                         NFSBCOPY(fhp, lyp->nfsly_fh, fhlen);
4946                         TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
4947                         LIST_INSERT_HEAD(NFSCLLAYOUTHASH(clp, fhp, fhlen), lyp,
4948                             nfsly_hash);
4949                         lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
4950                         nfscl_layoutcnt++;
4951                 } else {
4952                         if (retonclose != 0)
4953                                 lyp->nfsly_flags |= NFSLY_RETONCLOSE;
4954                         if (stateidp->seqid > lyp->nfsly_stateid.seqid)
4955                                 lyp->nfsly_stateid.seqid = stateidp->seqid;
4956                         TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
4957                         TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
4958                         lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
4959                 }
4960                 nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
4961                 if (NFSCL_FORCEDISM(mp)) {
4962                         NFSUNLOCKCLSTATE();
4963                         if (tlyp != NULL)
4964                                 free(tlyp, M_NFSLAYOUT);
4965                         return (EPERM);
4966                 }
4967                 *lypp = lyp;
4968         } else if (stateidp->seqid > lyp->nfsly_stateid.seqid)
4969                 lyp->nfsly_stateid.seqid = stateidp->seqid;
4970
4971         /* Merge the new list of File Layouts into the list. */
4972         flp = LIST_FIRST(fhlp);
4973         if (flp != NULL) {
4974                 if (flp->nfsfl_iomode == NFSLAYOUTIOMODE_READ)
4975                         nfscl_mergeflayouts(&lyp->nfsly_flayread, fhlp);
4976                 else
4977                         nfscl_mergeflayouts(&lyp->nfsly_flayrw, fhlp);
4978         }
4979         if (layout_passed_in != 0)
4980                 nfsv4_unlock(&lyp->nfsly_lock, 1);
4981         NFSUNLOCKCLSTATE();
4982         if (tlyp != NULL)
4983                 free(tlyp, M_NFSLAYOUT);
4984         return (0);
4985 }
4986
4987 /*
4988  * Search for a layout by MDS file handle.
4989  * If one is found, it is returned with a refcnt (shared lock) iff
4990  * retflpp returned non-NULL and locked (exclusive locked) iff retflpp is
4991  * returned NULL.
4992  */
4993 struct nfscllayout *
4994 nfscl_getlayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen,
4995     uint64_t off, struct nfsclflayout **retflpp, int *recalledp)
4996 {
4997         struct nfscllayout *lyp;
4998         mount_t mp;
4999         int error, igotlock;
5000
5001         mp = clp->nfsc_nmp->nm_mountp;
5002         *recalledp = 0;
5003         *retflpp = NULL;
5004         NFSLOCKCLSTATE();
5005         lyp = nfscl_findlayout(clp, fhp, fhlen);
5006         if (lyp != NULL) {
5007                 if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
5008                         TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
5009                         TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
5010                         lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
5011                         error = nfscl_findlayoutforio(lyp, off,
5012                             NFSV4OPEN_ACCESSREAD, retflpp);
5013                         if (error == 0)
5014                                 nfsv4_getref(&lyp->nfsly_lock, NULL,
5015                                     NFSCLSTATEMUTEXPTR, mp);
5016                         else {
5017                                 do {
5018                                         igotlock = nfsv4_lock(&lyp->nfsly_lock,
5019                                             1, NULL, NFSCLSTATEMUTEXPTR, mp);
5020                                 } while (igotlock == 0 && !NFSCL_FORCEDISM(mp));
5021                                 *retflpp = NULL;
5022                         }
5023                         if (NFSCL_FORCEDISM(mp)) {
5024                                 lyp = NULL;
5025                                 *recalledp = 1;
5026                         }
5027                 } else {
5028                         lyp = NULL;
5029                         *recalledp = 1;
5030                 }
5031         }
5032         NFSUNLOCKCLSTATE();
5033         return (lyp);
5034 }
5035
5036 /*
5037  * Search for a layout by MDS file handle. If one is found, mark in to be
5038  * recalled, if it already marked "return on close".
5039  */
5040 static void
5041 nfscl_retoncloselayout(vnode_t vp, struct nfsclclient *clp, uint8_t *fhp,
5042     int fhlen, struct nfsclrecalllayout **recallpp)
5043 {
5044         struct nfscllayout *lyp;
5045         uint32_t iomode;
5046
5047         if (vp->v_type != VREG || !NFSHASPNFS(VFSTONFS(vnode_mount(vp))) ||
5048             nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
5049             (VTONFS(vp)->n_flag & NNOLAYOUT) != 0)
5050                 return;
5051         lyp = nfscl_findlayout(clp, fhp, fhlen);
5052         if (lyp != NULL && (lyp->nfsly_flags & (NFSLY_RETONCLOSE |
5053             NFSLY_RECALL)) == NFSLY_RETONCLOSE) {
5054                 iomode = 0;
5055                 if (!LIST_EMPTY(&lyp->nfsly_flayread))
5056                         iomode |= NFSLAYOUTIOMODE_READ;
5057                 if (!LIST_EMPTY(&lyp->nfsly_flayrw))
5058                         iomode |= NFSLAYOUTIOMODE_RW;
5059                 (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode,
5060                     0, UINT64_MAX, lyp->nfsly_stateid.seqid, 0, 0, NULL,
5061                     *recallpp);
5062                 NFSCL_DEBUG(4, "retoncls recall iomode=%d\n", iomode);
5063                 *recallpp = NULL;
5064         }
5065 }
5066
5067 /*
5068  * Mark the layout to be recalled and with an error.
5069  * Also, disable the dsp from further use.
5070  */
5071 void
5072 nfscl_dserr(uint32_t op, uint32_t stat, struct nfscldevinfo *dp,
5073     struct nfscllayout *lyp, struct nfsclds *dsp)
5074 {
5075         struct nfsclrecalllayout *recallp;
5076         uint32_t iomode;
5077
5078         printf("DS being disabled, error=%d\n", stat);
5079         /* Set up the return of the layout. */
5080         recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK);
5081         iomode = 0;
5082         NFSLOCKCLSTATE();
5083         if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
5084                 if (!LIST_EMPTY(&lyp->nfsly_flayread))
5085                         iomode |= NFSLAYOUTIOMODE_READ;
5086                 if (!LIST_EMPTY(&lyp->nfsly_flayrw))
5087                         iomode |= NFSLAYOUTIOMODE_RW;
5088                 (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode,
5089                     0, UINT64_MAX, lyp->nfsly_stateid.seqid, stat, op,
5090                     dp->nfsdi_deviceid, recallp);
5091                 NFSUNLOCKCLSTATE();
5092                 NFSCL_DEBUG(4, "nfscl_dserr recall iomode=%d\n", iomode);
5093         } else {
5094                 NFSUNLOCKCLSTATE();
5095                 free(recallp, M_NFSLAYRECALL);
5096         }
5097
5098         /* And shut the TCP connection down. */
5099         nfscl_cancelreqs(dsp);
5100 }
5101
5102 /*
5103  * Cancel all RPCs for this "dsp" by closing the connection.
5104  * Also, mark the session as defunct.
5105  * If NFSCLDS_SAMECONN is set, the connection is shared with other DSs and
5106  * cannot be shut down.
5107  */
5108 void
5109 nfscl_cancelreqs(struct nfsclds *dsp)
5110 {
5111         struct __rpc_client *cl;
5112         static int non_event;
5113
5114         NFSLOCKDS(dsp);
5115         if ((dsp->nfsclds_flags & (NFSCLDS_CLOSED | NFSCLDS_SAMECONN)) == 0 &&
5116             dsp->nfsclds_sockp != NULL &&
5117             dsp->nfsclds_sockp->nr_client != NULL) {
5118                 dsp->nfsclds_flags |= NFSCLDS_CLOSED;
5119                 cl = dsp->nfsclds_sockp->nr_client;
5120                 dsp->nfsclds_sess.nfsess_defunct = 1;
5121                 NFSUNLOCKDS(dsp);
5122                 CLNT_CLOSE(cl);
5123                 /*
5124                  * This 1sec sleep is done to reduce the number of reconnect
5125                  * attempts made on the DS while it has failed.
5126                  */
5127                 tsleep(&non_event, PVFS, "ndscls", hz);
5128                 return;
5129         }
5130         NFSUNLOCKDS(dsp);
5131 }
5132
5133 /*
5134  * Dereference a layout.
5135  */
5136 void
5137 nfscl_rellayout(struct nfscllayout *lyp, int exclocked)
5138 {
5139
5140         NFSLOCKCLSTATE();
5141         if (exclocked != 0)
5142                 nfsv4_unlock(&lyp->nfsly_lock, 0);
5143         else
5144                 nfsv4_relref(&lyp->nfsly_lock);
5145         NFSUNLOCKCLSTATE();
5146 }
5147
5148 /*
5149  * Search for a devinfo by deviceid. If one is found, return it after
5150  * acquiring a reference count on it.
5151  */
5152 struct nfscldevinfo *
5153 nfscl_getdevinfo(struct nfsclclient *clp, uint8_t *deviceid,
5154     struct nfscldevinfo *dip)
5155 {
5156
5157         NFSLOCKCLSTATE();
5158         if (dip == NULL)
5159                 dip = nfscl_finddevinfo(clp, deviceid);
5160         if (dip != NULL)
5161                 dip->nfsdi_refcnt++;
5162         NFSUNLOCKCLSTATE();
5163         return (dip);
5164 }
5165
5166 /*
5167  * Dereference a devinfo structure.
5168  */
5169 static void
5170 nfscl_reldevinfo_locked(struct nfscldevinfo *dip)
5171 {
5172
5173         dip->nfsdi_refcnt--;
5174         if (dip->nfsdi_refcnt == 0)
5175                 wakeup(&dip->nfsdi_refcnt);
5176 }
5177
5178 /*
5179  * Dereference a devinfo structure.
5180  */
5181 void
5182 nfscl_reldevinfo(struct nfscldevinfo *dip)
5183 {
5184
5185         NFSLOCKCLSTATE();
5186         nfscl_reldevinfo_locked(dip);
5187         NFSUNLOCKCLSTATE();
5188 }
5189
5190 /*
5191  * Find a layout for this file handle. Return NULL upon failure.
5192  */
5193 static struct nfscllayout *
5194 nfscl_findlayout(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
5195 {
5196         struct nfscllayout *lyp;
5197
5198         LIST_FOREACH(lyp, NFSCLLAYOUTHASH(clp, fhp, fhlen), nfsly_hash)
5199                 if (lyp->nfsly_fhlen == fhlen &&
5200                     !NFSBCMP(lyp->nfsly_fh, fhp, fhlen))
5201                         break;
5202         return (lyp);
5203 }
5204
5205 /*
5206  * Find a devinfo for this deviceid. Return NULL upon failure.
5207  */
5208 static struct nfscldevinfo *
5209 nfscl_finddevinfo(struct nfsclclient *clp, uint8_t *deviceid)
5210 {
5211         struct nfscldevinfo *dip;
5212
5213         LIST_FOREACH(dip, &clp->nfsc_devinfo, nfsdi_list)
5214                 if (NFSBCMP(dip->nfsdi_deviceid, deviceid, NFSX_V4DEVICEID)
5215                     == 0)
5216                         break;
5217         return (dip);
5218 }
5219
5220 /*
5221  * Merge the new file layout list into the main one, maintaining it in
5222  * increasing offset order.
5223  */
5224 static void
5225 nfscl_mergeflayouts(struct nfsclflayouthead *fhlp,
5226     struct nfsclflayouthead *newfhlp)
5227 {
5228         struct nfsclflayout *flp, *nflp, *prevflp, *tflp;
5229
5230         flp = LIST_FIRST(fhlp);
5231         prevflp = NULL;
5232         LIST_FOREACH_SAFE(nflp, newfhlp, nfsfl_list, tflp) {
5233                 while (flp != NULL && flp->nfsfl_off < nflp->nfsfl_off) {
5234                         prevflp = flp;
5235                         flp = LIST_NEXT(flp, nfsfl_list);
5236                 }
5237                 if (prevflp == NULL)
5238                         LIST_INSERT_HEAD(fhlp, nflp, nfsfl_list);
5239                 else
5240                         LIST_INSERT_AFTER(prevflp, nflp, nfsfl_list);
5241                 prevflp = nflp;
5242         }
5243 }
5244
5245 /*
5246  * Add this nfscldevinfo to the client, if it doesn't already exist.
5247  * This function consumes the structure pointed at by dip, if not NULL.
5248  */
5249 int
5250 nfscl_adddevinfo(struct nfsmount *nmp, struct nfscldevinfo *dip, int ind,
5251     struct nfsclflayout *flp)
5252 {
5253         struct nfsclclient *clp;
5254         struct nfscldevinfo *tdip;
5255         uint8_t *dev;
5256
5257         NFSLOCKCLSTATE();
5258         clp = nmp->nm_clp;
5259         if (clp == NULL) {
5260                 NFSUNLOCKCLSTATE();
5261                 if (dip != NULL)
5262                         free(dip, M_NFSDEVINFO);
5263                 return (ENODEV);
5264         }
5265         if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5266                 dev = flp->nfsfl_dev;
5267         else
5268                 dev = flp->nfsfl_ffm[ind].dev;
5269         tdip = nfscl_finddevinfo(clp, dev);
5270         if (tdip != NULL) {
5271                 tdip->nfsdi_layoutrefs++;
5272                 if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5273                         flp->nfsfl_devp = tdip;
5274                 else
5275                         flp->nfsfl_ffm[ind].devp = tdip;
5276                 nfscl_reldevinfo_locked(tdip);
5277                 NFSUNLOCKCLSTATE();
5278                 if (dip != NULL)
5279                         free(dip, M_NFSDEVINFO);
5280                 return (0);
5281         }
5282         if (dip != NULL) {
5283                 LIST_INSERT_HEAD(&clp->nfsc_devinfo, dip, nfsdi_list);
5284                 dip->nfsdi_layoutrefs = 1;
5285                 if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5286                         flp->nfsfl_devp = dip;
5287                 else
5288                         flp->nfsfl_ffm[ind].devp = dip;
5289         }
5290         NFSUNLOCKCLSTATE();
5291         if (dip == NULL)
5292                 return (ENODEV);
5293         return (0);
5294 }
5295
5296 /*
5297  * Free up a layout structure and associated file layout structure(s).
5298  */
5299 void
5300 nfscl_freelayout(struct nfscllayout *layp)
5301 {
5302         struct nfsclflayout *flp, *nflp;
5303         struct nfsclrecalllayout *rp, *nrp;
5304
5305         LIST_FOREACH_SAFE(flp, &layp->nfsly_flayread, nfsfl_list, nflp) {
5306                 LIST_REMOVE(flp, nfsfl_list);
5307                 nfscl_freeflayout(flp);
5308         }
5309         LIST_FOREACH_SAFE(flp, &layp->nfsly_flayrw, nfsfl_list, nflp) {
5310                 LIST_REMOVE(flp, nfsfl_list);
5311                 nfscl_freeflayout(flp);
5312         }
5313         LIST_FOREACH_SAFE(rp, &layp->nfsly_recall, nfsrecly_list, nrp) {
5314                 LIST_REMOVE(rp, nfsrecly_list);
5315                 free(rp, M_NFSLAYRECALL);
5316         }
5317         nfscl_layoutcnt--;
5318         free(layp, M_NFSLAYOUT);
5319 }
5320
5321 /*
5322  * Free up a file layout structure.
5323  */
5324 void
5325 nfscl_freeflayout(struct nfsclflayout *flp)
5326 {
5327         int i, j;
5328
5329         if ((flp->nfsfl_flags & NFSFL_FILE) != 0) {
5330                 for (i = 0; i < flp->nfsfl_fhcnt; i++)
5331                         free(flp->nfsfl_fh[i], M_NFSFH);
5332                 if (flp->nfsfl_devp != NULL)
5333                         flp->nfsfl_devp->nfsdi_layoutrefs--;
5334         }
5335         if ((flp->nfsfl_flags & NFSFL_FLEXFILE) != 0)
5336                 for (i = 0; i < flp->nfsfl_mirrorcnt; i++) {
5337                         for (j = 0; j < flp->nfsfl_ffm[i].fhcnt; j++)
5338                                 free(flp->nfsfl_ffm[i].fh[j], M_NFSFH);
5339                         if (flp->nfsfl_ffm[i].devp != NULL)     
5340                                 flp->nfsfl_ffm[i].devp->nfsdi_layoutrefs--;     
5341                 }
5342         free(flp, M_NFSFLAYOUT);
5343 }
5344
5345 /*
5346  * Free up a file layout devinfo structure.
5347  */
5348 void
5349 nfscl_freedevinfo(struct nfscldevinfo *dip)
5350 {
5351
5352         free(dip, M_NFSDEVINFO);
5353 }
5354
5355 /*
5356  * Mark any layouts that match as recalled.
5357  */
5358 static int
5359 nfscl_layoutrecall(int recalltype, struct nfscllayout *lyp, uint32_t iomode,
5360     uint64_t off, uint64_t len, uint32_t stateseqid, uint32_t stat, uint32_t op,
5361     char *devid, struct nfsclrecalllayout *recallp)
5362 {
5363         struct nfsclrecalllayout *rp, *orp;
5364
5365         recallp->nfsrecly_recalltype = recalltype;
5366         recallp->nfsrecly_iomode = iomode;
5367         recallp->nfsrecly_stateseqid = stateseqid;
5368         recallp->nfsrecly_off = off;
5369         recallp->nfsrecly_len = len;
5370         recallp->nfsrecly_stat = stat;
5371         recallp->nfsrecly_op = op;
5372         if (devid != NULL)
5373                 NFSBCOPY(devid, recallp->nfsrecly_devid, NFSX_V4DEVICEID);
5374         /*
5375          * Order the list as file returns first, followed by fsid and any
5376          * returns, both in increasing stateseqid order.
5377          * Note that the seqids wrap around, so 1 is after 0xffffffff.
5378          * (I'm not sure this is correct because I find RFC5661 confusing
5379          *  on this, but hopefully it will work ok.)
5380          */
5381         orp = NULL;
5382         LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
5383                 orp = rp;
5384                 if ((recalltype == NFSLAYOUTRETURN_FILE &&
5385                      (rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE ||
5386                       nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) ||
5387                     (recalltype != NFSLAYOUTRETURN_FILE &&
5388                      rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE &&
5389                      nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) {
5390                         LIST_INSERT_BEFORE(rp, recallp, nfsrecly_list);
5391                         break;
5392                 }
5393
5394                 /*
5395                  * Put any error return on all the file returns that will
5396                  * preceed this one.
5397                  */
5398                 if (rp->nfsrecly_recalltype == NFSLAYOUTRETURN_FILE &&
5399                    stat != 0 && rp->nfsrecly_stat == 0) {
5400                         rp->nfsrecly_stat = stat;
5401                         rp->nfsrecly_op = op;
5402                         if (devid != NULL)
5403                                 NFSBCOPY(devid, rp->nfsrecly_devid,
5404                                     NFSX_V4DEVICEID);
5405                 }
5406         }
5407         if (rp == NULL) {
5408                 if (orp == NULL)
5409                         LIST_INSERT_HEAD(&lyp->nfsly_recall, recallp,
5410                             nfsrecly_list);
5411                 else
5412                         LIST_INSERT_AFTER(orp, recallp, nfsrecly_list);
5413         }
5414         lyp->nfsly_flags |= NFSLY_RECALL;
5415         wakeup(lyp->nfsly_clp);
5416         return (0);
5417 }
5418
5419 /*
5420  * Compare the two seqids for ordering. The trick is that the seqids can
5421  * wrap around from 0xffffffff->0, so check for the cases where one
5422  * has wrapped around.
5423  * Return 1 if seqid1 comes before seqid2, 0 otherwise.
5424  */
5425 static int
5426 nfscl_seq(uint32_t seqid1, uint32_t seqid2)
5427 {
5428
5429         if (seqid2 > seqid1 && (seqid2 - seqid1) >= 0x7fffffff)
5430                 /* seqid2 has wrapped around. */
5431                 return (0);
5432         if (seqid1 > seqid2 && (seqid1 - seqid2) >= 0x7fffffff)
5433                 /* seqid1 has wrapped around. */
5434                 return (1);
5435         if (seqid1 <= seqid2)
5436                 return (1);
5437         return (0);
5438 }
5439
5440 /*
5441  * Do a layout return for each of the recalls.
5442  */
5443 static void
5444 nfscl_layoutreturn(struct nfsmount *nmp, struct nfscllayout *lyp,
5445     struct ucred *cred, NFSPROC_T *p)
5446 {
5447         struct nfsclrecalllayout *rp;
5448         nfsv4stateid_t stateid;
5449         int layouttype;
5450
5451         NFSBCOPY(lyp->nfsly_stateid.other, stateid.other, NFSX_STATEIDOTHER);
5452         stateid.seqid = lyp->nfsly_stateid.seqid;
5453         if ((lyp->nfsly_flags & NFSLY_FILES) != 0)
5454                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
5455         else
5456                 layouttype = NFSLAYOUT_FLEXFILE;
5457         LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
5458                 (void)nfsrpc_layoutreturn(nmp, lyp->nfsly_fh,
5459                     lyp->nfsly_fhlen, 0, layouttype,
5460                     rp->nfsrecly_iomode, rp->nfsrecly_recalltype,
5461                     rp->nfsrecly_off, rp->nfsrecly_len,
5462                     &stateid, cred, p, rp->nfsrecly_stat, rp->nfsrecly_op,
5463                     rp->nfsrecly_devid);
5464         }
5465 }
5466
5467 /*
5468  * Do the layout commit for a file layout.
5469  */
5470 static void
5471 nfscl_dolayoutcommit(struct nfsmount *nmp, struct nfscllayout *lyp,
5472     struct ucred *cred, NFSPROC_T *p)
5473 {
5474         struct nfsclflayout *flp;
5475         uint64_t len;
5476         int error, layouttype;
5477
5478         if ((lyp->nfsly_flags & NFSLY_FILES) != 0)
5479                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
5480         else
5481                 layouttype = NFSLAYOUT_FLEXFILE;
5482         LIST_FOREACH(flp, &lyp->nfsly_flayrw, nfsfl_list) {
5483                 if (layouttype == NFSLAYOUT_FLEXFILE &&
5484                     (flp->nfsfl_fflags & NFSFLEXFLAG_NO_LAYOUTCOMMIT) != 0) {
5485                         NFSCL_DEBUG(4, "Flex file: no layoutcommit\n");
5486                         /* If not supported, don't bother doing it. */
5487                         NFSLOCKMNT(nmp);
5488                         nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT;
5489                         NFSUNLOCKMNT(nmp);
5490                         break;
5491                 } else if (flp->nfsfl_off <= lyp->nfsly_lastbyte) {
5492                         len = flp->nfsfl_end - flp->nfsfl_off;
5493                         error = nfsrpc_layoutcommit(nmp, lyp->nfsly_fh,
5494                             lyp->nfsly_fhlen, 0, flp->nfsfl_off, len,
5495                             lyp->nfsly_lastbyte, &lyp->nfsly_stateid,
5496                             layouttype, cred, p, NULL);
5497                         NFSCL_DEBUG(4, "layoutcommit err=%d\n", error);
5498                         if (error == NFSERR_NOTSUPP) {
5499                                 /* If not supported, don't bother doing it. */
5500                                 NFSLOCKMNT(nmp);
5501                                 nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT;
5502                                 NFSUNLOCKMNT(nmp);
5503                                 break;
5504                         }
5505                 }
5506         }
5507 }
5508
5509 /*
5510  * Commit all layouts for a file (vnode).
5511  */
5512 int
5513 nfscl_layoutcommit(vnode_t vp, NFSPROC_T *p)
5514 {
5515         struct nfsclclient *clp;
5516         struct nfscllayout *lyp;
5517         struct nfsnode *np = VTONFS(vp);
5518         mount_t mp;
5519         struct nfsmount *nmp;
5520
5521         mp = vnode_mount(vp);
5522         nmp = VFSTONFS(mp);
5523         if (NFSHASNOLAYOUTCOMMIT(nmp))
5524                 return (0);
5525         NFSLOCKCLSTATE();
5526         clp = nmp->nm_clp;
5527         if (clp == NULL) {
5528                 NFSUNLOCKCLSTATE();
5529                 return (EPERM);
5530         }
5531         lyp = nfscl_findlayout(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
5532         if (lyp == NULL) {
5533                 NFSUNLOCKCLSTATE();
5534                 return (EPERM);
5535         }
5536         nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
5537         if (NFSCL_FORCEDISM(mp)) {
5538                 NFSUNLOCKCLSTATE();
5539                 return (EPERM);
5540         }
5541 tryagain:
5542         if ((lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
5543                 lyp->nfsly_flags &= ~NFSLY_WRITTEN;
5544                 NFSUNLOCKCLSTATE();
5545                 NFSCL_DEBUG(4, "do layoutcommit2\n");
5546                 nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, NFSPROCCRED(p), p);
5547                 NFSLOCKCLSTATE();
5548                 goto tryagain;
5549         }
5550         nfsv4_relref(&lyp->nfsly_lock);
5551         NFSUNLOCKCLSTATE();
5552         return (0);
5553 }
5554